BASKER_INLINE int Basker<Int, Entry, Exe_Space>::SetThreads(Int nthreads) { //Need to test if power of 2. if((nthreads != 1) && (nthreads%2 != 0)) { BASKER_ASSERT(0==1, "Number of thread error"); //Set default 1 num_threads = 1; return BASKER_ERROR; } //Next test if Kokkos has that many threads! //This is a common mistake in mpi-based apps #ifdef KOKKOS_HAVE_OPENMP int check_value = Kokkos::OpenMP::max_hardware_threads(); if(nthreads > check_value) { BASKER_ASSERT(0==1, "Number of thread not aval in Kokkos"); num_threads = 1; return BASKER_ERROR; } #else nthreads = 1; #endif num_threads = nthreads; return BASKER_SUCCESS; }//end SetThreads()
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::upper_tri_solve ( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { const Int bcol = M.scol; const Int brow = M.srow; //printf("Upper Tri Solve, scol: %d ncol: %d \n", // M.scol, M.ncol); //end over all columns for(Int k = M.ncol; k >= 1; k--) { //printf("Upper Tri Solve: k: %d \n", k); #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(M.val[M.col_ptr[k]-1]!=0.0,"UpperPivot\n"); #endif //if(M.val[M.col_ptr[k]-1]==0.0) if(M.val(M.col_ptr(k)-1)==0) { printf("Upper pivot: %d %f \n", M.row_idx[M.col_ptr[k]-1], M.val[M.col_ptr[k]-1]); return -1; } //printf("TEST, k: %d out: %f in: %f pivot: %f\n", // k, y[k+bcol-1], x[k+bcol-1], // M.val[M.col_ptr[k]-1]); //Comeback and do with and entry divide //y[k+bcol-1] = x[k+bcol-1] / M.val[M.col_ptr[k]-1]; y(k+brow-1) = x(k+bcol-1) / M.val(M.col_ptr(k)-1); //for(Int i = M.col_ptr[k]-2; i >= M.col_ptr[k-1]; i--) for(Int i = M.col_ptr(k)-2; i >= M.col_ptr(k-1); --i) { //Int j = M.row_idx[i]; const Int j = M.row_idx(i); // printf("Updating row_idx: %d %f %f \n", // j, x[j], M.val[i]*y[k+bcol-1]); //x[j] -= M.val[i]*y[k+bcol-1]; x(j+brow) -= M.val(i) * y(k+bcol-1); } }//end over all columns return 0; }//end upper_tri_solve
BASKER_INLINE void BaskerMatrix<Int,Entry,Exe_Space>::malloc_union_bit() { BASKER_ASSERT(nrow > 0, "matrix_malloc_union_bit"); MALLOC_BOOL_1DARRAY(union_bit, nrow); }//end malloc_union_bit
BASKER_INLINE void BaskerMatrix<Int, Entry, Exe_Space>::init_col() { BASKER_ASSERT(ncol >= 0, "INIT_COL, ncol > 0"); MALLOC_INT_1DARRAY(col_ptr, ncol+1); for(Int i = 0; i < ncol+1; ++i) { col_ptr(i) = (Int) BASKER_MAX_IDX; } }//end init_col()
BASKER_INLINE void BaskerMatrix<Int,Entry,Exe_Space>::malloc_perm(Int n) { BASKER_ASSERT(n > 0, "matrix malloc_perm"); MALLOC_INT_1DARRAY(lpinv,n); //Fix later. //NDE determine what the issue is... init_perm(); }//end init_perm(Int)
BASKER_INLINE void BaskerMatrix<Int, Entry, Exe_Space>::init_vectors ( Int _m, Int _n, Int _nnz ) { nrow = _m; ncol = _n; nnz = _nnz; mnnz = _nnz; if(ncol >= 0) { BASKER_ASSERT((ncol+1)>0, "matrix init_vector ncol"); MALLOC_INT_1DARRAY(col_ptr,ncol+1); } if(nnz > 0) { BASKER_ASSERT(nnz > 0, "matrix init_vector nnz"); MALLOC_INT_1DARRAY(row_idx,nnz); MALLOC_ENTRY_1DARRAY(val,nnz); #ifdef BASKER_INC_LVL MALLOC_INT_1DARRAY(inc_lvl, nnz); #endif } else if(nnz==0) { BASKER_ASSERT((nnz+1)>0, "nnz+1 init_vector "); MALLOC_INT_1DARRAY(row_idx, nnz+1); row_idx(0) = (Int) 0; MALLOC_ENTRY_1DARRAY(val, nnz+1); val(0) = (Entry) 0; #ifdef BASKER_INC_LVL MALLOC_INT_1DARRAY(inc_lvl, nnz+1); #endif } }//end init_vectors(Int, Int, Int)
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::find_btf(BASKER_MATRIX &M) { Int nblks = 0; strong_component(M,nblks,order_btf_array,btf_tabs); btf_flag = BASKER_TRUE; #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF nblks returned: %d \n", nblks); BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS"); #endif #ifdef BASKER_DEBUG_ORDER_BTF if(nblks<2) { printf("BTF did not find enough blks\n"); } #endif #ifdef BASKER_DEBUG_ORDER_BTF /* printf("\nBTF perm: \n"); for(Int i=0; i <M.nrow; i++) { printf("%d, ", order_btf_array(i)); //printf("%d, ", btf_perm(i)); } */ printf("\n\nBTF tabs: \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", btf_tabs(i)); } printf("\n"); #endif permute_col(M, order_btf_array); permute_row(M, order_btf_array); break_into_parts(M, nblks, btf_tabs); btf_nblks = nblks; //#ifdef BASKER_DEBUG_ORDER_BTF printf("------------BTF CUT: %d --------------\n", btf_tabs(btf_tabs_offset)); //#endif return 0; }//end find BTF
void BaskerMatrix<Int,Entry,Exe_Space>::init_pend() { if(ncol > 0) { BASKER_ASSERT((ncol+1)>0, "matrix init_pend") MALLOC_INT_1DARRAY(pend,ncol+1); for(Int i =0; i < ncol+1; ++i) { pend(i) = BASKER_MAX_IDX; } } }//end init_pend()
BASKER_FINLINE void Basker<Int, Entry,Exe_Space>::csymamd_order ( BASKER_MATRIX &M, INT_1DARRAY p, INT_1DARRAY cmember ) { amd_flag = BASKER_TRUE; //Debug, #ifdef BASKER_DEBUG_ORDER_AMD printf("cmember: \n"); for(Int i = 0; i < M.ncol; ++i) { printf("(%d, %d), ", i, cmember(i)); } printf("\n"); #endif //If doing iluk, we will not want this. //See amd blk notes if(Options.incomplete == BASKER_TRUE) { for(Int i = 0; i < M.ncol; i++) { p(i) = i; } //printf("Short csym \n"); return; } INT_1DARRAY temp_p; BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough"); MALLOC_INT_1DARRAY(temp_p, M.ncol+1); init_value(temp_p, M.ncol+1, (Int) 0); my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), &(temp_p(0)), &(cmember(0))); for(Int i = 0; i < M.ncol; ++i) { p(temp_p(i)) = i; } }//end csymamd()
BASKER_FINLINE void Basker<Int, Entry,Exe_Space>::csymamd_order ( BASKER_MATRIX &M, INT_1DARRAY p, INT_1DARRAY cmember ) { amd_flag = BASKER_TRUE; //Debug, #ifdef BASKER_DEBUG_ORDER_AMD printf("cmember: \n"); for(Int i = 0; i < M.ncol; ++i) { printf("(%d, %d), ", i, cmember(i)); } printf("\n"); #endif INT_1DARRAY temp_p; BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough"); MALLOC_INT_1DARRAY(temp_p, M.ncol+1); init_value(temp_p, M.ncol+1, (Int) 0); my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), &(temp_p(0)), &(cmember(0))); for(Int i = 0; i < M.ncol; ++i) { p(temp_p(i)) = i; } }//end csymamd()
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::nfactor_domain_error ( INT_1DARRAY threads_start ) { Int nthread_remalloc = 0; for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; }//end if NOERROR if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { printf("ERROR THREAD: %d DOMBLK SINGULAR: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if SINGULAR if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { printf("ERROR THREADS: %d DOMBLK NOMALLOC: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if NOMALLOC if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { BASKER_ASSERT(thread_array(ti).error_blk > 0, "nfactor_dom_error error_blk"); printf("ERROR THREADS: %d DOMBLK MALLOC: %d \n", ti, thread_array(ti).error_blk); //Resize L BASKER_MATRIX &L = LL(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, thread_array(ti).error_info); L.nnz = thread_array(ti).error_info; //clean up workspace if(L.w_fill == BASKER_TRUE) { //Clear workspace for(Int i = 0; i < L.iws_size*L.iws_mult; ++i) { L.iws(i) = (Int) 0; } for(Int i = 0; i < L.ews_size*L.ews_mult; ++i) { L.ews(i) = (Entry) 0; } //Clear perm for(Int i = L.srow; i < L.srow+L.nrow; ++i) { gperm(i) = BASKER_MAX_IDX; } } //Resize U BASKER_MATRIX &U = LU(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, thread_array(ti).error_info); U.nnz = thread_array(ti).error_info; threads_start(ti) = thread_array(ti).error_blk; //Reset thread_array(ti).error_type = BASKER_ERROR_NOERROR; thread_array(ti).error_blk = BASKER_MAX_IDX; thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC }//for all threads if(nthread_remalloc == 0) { return BASKER_SUCCESS; } else { return nthread_remalloc; } //Should never be here BASKER_ASSERT(0==1, "nfactor_diag_error, should never"); return BASKER_SUCCESS; }//end nfactor_domain_error
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::nfactor_diag_error ( INT_1DARRAY threads_start ) { Int nthread_remalloc = 0; for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; }//end if NOERROR if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { printf("ERROR THREAD: %d DIAGBLK SINGULAR: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if SINGULAR if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { printf("ERROR THREADS: %d DIAGBLK NOMALLOC: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if NOMALLOC if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { BASKER_ASSERT(thread_array(ti).error_blk > 0, "nfactor_diag_error error_blk"); printf("ERROR THREADS: %d DIAGBLK MALLOC: %d \n", ti, thread_array(ti).error_blk); //Clean the workspace printf("test: %d %d \n", thread_array(ti).iws_size*thread_array(ti).iws_mult, thread_array(ti).ews_size*thread_array(ti).ews_mult); for(Int i = 0; i < thread_array(ti).iws_size*thread_array(ti).iws_mult; i++) { thread_array(ti).iws(i) = (Int) 0; } for(Int i = 0; i < thread_array(ti).ews_size*thread_array(ti).ews_mult; i++) { thread_array(ti).ews(i) = (Entry) 0; } //Resize L BASKER_MATRIX &L = LBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, thread_array(ti).error_info); L.nnz = thread_array(ti).error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; } for(Int i = L.srow; i < (L.srow+L.nrow); i++) { gperm(i) = BASKER_MAX_IDX; } //Resize U BASKER_MATRIX &U = UBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, thread_array(ti).error_info); U.nnz = thread_array(ti).error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; } printf("Setting thread start(%d) %d \n", ti, thread_array(ti).error_blk); threads_start(ti) = thread_array(ti).error_blk; //Reset thread_array(ti).error_type = BASKER_ERROR_NOERROR; thread_array(ti).error_blk = BASKER_MAX_IDX; thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC }//for all threads if(nthread_remalloc == 0) { return BASKER_SUCCESS; } else { return nthread_remalloc; } //Should never be here BASKER_ASSERT(0==1, "nfactor_diag_error, should never"); return BASKER_SUCCESS; }//end nfactor_diag_error
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::find_btf2 ( BASKER_MATRIX &M ) { Int nblks = 0; strong_component(M,nblks,order_btf_array,btf_tabs); btf_nblks = nblks; btf_flag = BASKER_TRUE; //#ifdef BASKER_DEBUG_ORDER_BTF printf("BTF nblks returned: %d \n", nblks); BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS"); //#endif #ifdef BASKER_DEBUG_ORDER_BTF if(nblks<2) { printf("BTF did not find enough blks\n"); } #endif #ifdef BASKER_DEBUG_ORDER_BTF /* printf("\nBTF perm: \n"); for(Int i=0; i <M.nrow; i++) { printf("%d, ", order_btf_array(i)); //printf("%d, ", btf_perm(i)); } */ printf("num_threads: %d \n", num_threads); printf("\n\nBTF tabs: \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", btf_tabs(i)); } printf("\n"); #endif permute_col(M, order_btf_array); permute_row(M, order_btf_array); MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol); init_value(order_blk_amd_array, M.ncol, (Int)0); MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1); init_value(btf_blk_nnz, nblks+1, (Int) 0); MALLOC_INT_1DARRAY(btf_blk_work, nblks+1); init_value(btf_blk_work, nblks+1, (Int) 0); //Find AMD blk ordering, get nnz, and get work btf_blk_amd( M, order_blk_amd_array, btf_blk_nnz, btf_blk_work); #ifdef BASKER_DEBUG_ORDER_BTF printf("blk_perm:\n"); for(Int i = 0; i < M.ncol; i++) { printf("(%d,%d) ", i, order_blk_amd_array(i)); } printf("\n"); printf("id/blk_size/blk_nnz/work: \n"); for(Int i = 0; i < nblks; i++) { printf("(%d, %d, %d, %d) ", i, btf_tabs(i+1)-btf_tabs(i), btf_blk_nnz(i), btf_blk_work(i)); } printf("\n"); #endif //printMTX("A_BEFORE.mtx", M); //printVec("AMD.txt", order_blk_amd_array, M.ncol); permute_col(M, order_blk_amd_array); permute_row(M, order_blk_amd_array); sort_matrix(M); //changed col to row, error. //print to see issue //printMTX("A_AMD.mtx", M); break_into_parts2(M, nblks, btf_tabs); //find schedule find_btf_schedule(M, nblks, btf_tabs); #ifdef BASKER_DEBUG_ORDER_BTF printf("------------BTF CUT: %d --------------\n", btf_tabs(btf_tabs_offset)); #endif return 0; }//end find BTF(nnz)
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::lower_tri_solve ( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { const Int bcol = M.scol; const Int brow = M.scol; //M.info(); //printf("Lower-Tri-Solve-Test, [%d %d %d %d] \n", // M.srow, M.nrow, M.scol, M.ncol); for(Int k = 0; k < M.ncol; ++k) { //Test if zero pivot value #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(M.val[M.col_ptr[k]]!=0.0, "LOWER PIVOT 0"); #endif if(M.val[M.col_ptr[k]] == 0.0) { printf("Lower Pivot: %d %f \n", M.row_idx[M.col_ptr[k]], M.val[M.col_ptr[k]]); return -1; } //printf("Lower tri. k: %d out: %f in: %f piv: %f \n", // k+bcol, y[k+bcol], x[k+bcol], M.val[M.col_ptr[k]]); //Replace with Entry divide in future //y[k+bcol] = x[k+bcol] / M.val[M.col_ptr[k]]; y(k+brow) = x(k+bcol) / M.val(M.col_ptr(k)); //for(Int i = M.col_ptr[k]+1; i < M.col_ptr[k+1]; i++) for(Int i = M.col_ptr(k)+1; i < M.col_ptr(k+1); ++i) { //Int j = gperm[M.row_idx[i]]; const Int j = gperm(M.row_idx(i)+brow); #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(j != BASKER_MAX_IDX,"Using nonperm\n"); #endif //x[j] -= M.val[i]*y[k+bcol]; //printf("gperm: %d x(%d) y(i) \n", // M.row_idx(i) + brow, j, k+bcol); x(j) -= M.val(i)*y(k+bcol); }//over all nnz in a column }//over each column return 0; }//end lower_tri_solve
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::test_solve() { ENTRY_1DARRAY x_known; ENTRY_1DARRAY x; ENTRY_1DARRAY y; #ifdef BASKER_DEBUG_SOLVE_RHS printf("test_solve called \n"); printf("Global pivot permuation\n"); printVec(gperm, gn); printf("\n"); printf("Global pivot permutation inverse\n"); printVec(gpermi, gn); printf("\n"); #endif BASKER_ASSERT(gn > 0, "solve testsolve gn"); MALLOC_ENTRY_1DARRAY(x_known, gn); init_value(x_known, gn , (Entry)1.0); //temp for(Int i = 0; i < gn; i++) { //x_known(i) = (Entry)(i+1); x_known(i) = (Entry) 1.0; } //JDB: used for other test //permute(x_known, order_csym_array, gn); MALLOC_ENTRY_1DARRAY(x, gn); init_value(x, gn, (Entry) 0.0); BASKER_ASSERT(gm > 0, "solve testsolve gm"); MALLOC_ENTRY_1DARRAY(y, gm); init_value(y, gm, (Entry) 0.0); if(btf_nblks > 0) { sort_matrix(BTF_C); //printMTX("C_BEFORE_SOLVE.mtx", BTF_C); } if(Options.btf == BASKER_TRUE) { //printf("btf_tabs_offset: %d ", btf_tabs_offset); //printf("btf_nblks: %d \n", btf_nblks); if(btf_tabs_offset != 0) { //printf("BTF_A spmv\n"); spmv(BTF_A, x_known,y); if(btf_nblks> 1) { //printf("btf_B spmv \n"); spmv(BTF_B, x_known, y); } } if(btf_nblks > 1) { //printf("btf_c spmv \n"); spmv(BTF_C, x_known, y); } //return -1; } else { //printf("other\n"); //spmv(BTF_A, x_known,y); } //printf("\n Before Test Points \n"); //printf("i: %d x: %f y: %f \n", 0, x_known(0), y(0)); //if(gn > 24) // { // printf("i: %d x: %f y: %f \n", 24, x_known(24), y(24)); // } //pivot permuation //printVec("gperm.csc", gpermi, gn); for(Int i = 0; i < gn; i++) { x(gpermi(i)) = y(i); } for(Int i = 0; i < gn; i++) { y(i) = x(i); x(i) = 0; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); //printf("Known Solution: \n"); //for(Int i = 0; i < gn; i++) // { // printf("%f, " , x_known(i)); // } printf("\n\n"); printf("RHS: \n"); for(Int i =0; i < gm; i++) { printf("%d %f,\n ", i, y(i)); } printf("\n\n"); #endif if(Options.btf == BASKER_FALSE) { //printf("before serial solve\n"); if(btf_tabs_offset != 0) { serial_solve(y,x); } //printf("After serial solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } else { //A\y -> y //serial_btf_solve(y,x); //printf("before btf serial solve\n"); serial_btf_solve(y,x); //printf("After btf solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } Entry diff =0.0; for(Int i = 0; i < gn; i++) { diff += (x_known(i) - x(i)); } diff = diff/(Entry) gn; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); printf("Solve Compare: \n"); for(Int i = 0; i < gn; i++) { printf("%d %f %f \n", i, x_known(i), x(i)); } printf("\n\n"); #endif printf("\n Test Points \n"); printf("i: %d x: %f %f \n", 0, x_known(0), x(0)); if(gn > 24) { printf("i: %d x: %f %f \n", 10, x_known(10), x(10)); printf("i: %d x: %f %f \n", 24, x_known(24), x(24)); } printf("\n"); printf("TEST_SOLVE: ||x-x||/||x| = %e", diff); printf("\n"); if((diff > -1e-2) && (diff < 1e-2)) { printf("TEST PASSED \n"); } return 0; }//end test_solve
BASKER_INLINE void BaskerMatrix<Int,Entry,Exe_Space>::convert2D ( BASKER_MATRIX &M, BASKER_BOOL alloc, Int kid ) { if(nnz == 0) { for(Int i = 0; i < ncol+1; i++) { col_ptr(i) = 0; } MALLOC_INT_1DARRAY(row_idx, 1); row_idx(0) = (Int) 0; MALLOC_ENTRY_1DARRAY(val, 1); val(0) = (Entry) 0; return; } //info(); //We could check some flag ?? //We assume a pre-scan has already happened if(alloc == BASKER_TRUE) { //printf("ALLOC\n"); if(nnz > 0) { BASKER_ASSERT(nnz > 0, "matrix row nnz 2"); MALLOC_INT_1DARRAY(row_idx, nnz); } else if(nnz ==0) { BASKER_ASSERT((nnz+1)>0, "matrix row nnz 3"); MALLOC_INT_1DARRAY(row_idx, nnz+1); } } //init_value(row_idx, nnz, (Int) 0); //printf("clear row: %d \n", nnz); for(Int i = 0; i < nnz; ++i) { //printf("clear row_idx(%d) \n", i); row_idx(i) = 0; } if(alloc == BASKER_TRUE) { if(nnz > 0) { BASKER_ASSERT(nnz > 0, "matrix nnz 4"); MALLOC_ENTRY_1DARRAY(val, nnz); } else if(nnz == 0) { BASKER_ASSERT((nnz+1) > 0, "matrix nnz 5"); MALLOC_ENTRY_1DARRAY(val, nnz+1); } } //init_value(val, nnz, (Entry) 0); for(Int i = 0; i < nnz; ++i) { val(i) = 0; } Int temp_count = 0; for(Int k = scol; k < scol+ncol; ++k) { //note col_ptr[k-scol] contains the starting index if(col_ptr(k-scol) == BASKER_MAX_IDX) { col_ptr(k-scol) = temp_count; //printf("continue called, k: %d \n", k); continue; } for(Int i = col_ptr(k-scol); i < M.col_ptr(k+1); i++) { Int j = M.row_idx(i); //printf("i: %d j:%d \n", i,j); if(j >= srow+nrow) { break; } //printf("writing row_dix: %d i: %d val: %d nnz: %d srow: %d nrow: %d \n", // temp_count, i, j, nnz, // srow, nrow); //BASKER_ASSERT(temp_count < nnz, "2DConvert, too many values"); //row_idx[temp_count] = j; if(j-srow <0) { std::cout << "kid: " << kid << " j: " << j << " srow: " << srow << " k: " << k << " idx: " << i << std::endl; BASKER_ASSERT(0==1, "j-srow NO"); } row_idx(temp_count) = j-srow; val(temp_count) = M.val(i); temp_count++; } col_ptr(k-scol) = temp_count; } //col_ptr[0] = 0; //NO!!1 //Slide over the col counts for(Int i = ncol; i > 0; i--) { col_ptr(i) = col_ptr(i-1); } col_ptr(0) = (Int) 0; //info(); //print(); }//end convert2d(Matrix)
static inline int my_strong_component ( long &n, long *col_ptr, long *row_idx, long &nblks, long *perm, long *perm_in, long *CC ) { typedef long l_Int; //l_Int p[n]; //output row_per l_Int *p = new l_Int[n]; //l_Int r[n+1]; //comp_tabs l_Int *r = new l_Int[n+1]; //We will want to add option to use q in the future //l_Int work[n*4]; l_Int *work = new l_Int[n*4]; //printf("before amesos call \n"); /* nblks = amesos_btf_l_strongcomp(M.ncol,&(M.col_ptr[0]), &(M.row_idx[0]), &(perm_in[0]), p, r, work); */ nblks = amesos_btf_l_strongcomp(n, col_ptr, row_idx, perm_in, p, r, work); //printf("after amesos call \n"); #ifdef BASKER_DEBUG_ORDER_BTF printf("\nBTF perm: \n"); for(Int i=0; i <n; i++) { printf("%d, ", p[i]); } printf("\n\nBTF tabs: <right> \n"); for(l_Int i=0; i < nblks+1; i++) { printf("%d, ", r[i]); } printf("\n"); #endif BASKER_ASSERT(n > 0, "M.nrow btf"); //MALLOC_INT_1DARRAY(perm,M.nrow); for(l_Int i = 0; i < n; i++) { perm[p[i]] = i; } BASKER_ASSERT((nblks+1) > 0, "nblks+1 btf"); //MALLOC_INT_1DARRAY(CC, nblks+1); for(l_Int i = 0; i < nblks+1; i++) { CC[i] = r[i]; } delete [] p; delete [] r; delete [] work; return 0; }//strong_component<long int, Entry, Exe_Space>
//=========strong componenet=========== static inline int my_strong_component ( int &n, int *col_ptr, int *row_idx, int &nblks, int *perm, int *perm_in, int *CC ) { typedef int l_Int; l_Int *p = new l_Int[n]; l_Int *r = new l_Int[n+1]; //We will want to add option to use q in the future l_Int *work = new l_Int[n*4]; //printf("before amesos call \n"); /* nblks = amesos_btf_strongcomp(M.ncol,&(M.col_ptr[0]), &(M.row_idx[0]), &(perm_in[0]), p, r, work); */ nblks = amesos_btf_strongcomp(n, col_ptr, row_idx, perm_in, p, r, work); //printf("after amesos call \n"); #ifdef BASKER_DEBUG_ORDER_BTF printf("\nBTF perm: \n"); //for(Int i=0; i <M.nrow; i++) for(Int i=0; i < n; i++) { printf("%d, ", p[i]); } printf("\n\nBTF tabs: <right> \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", r[i]); } printf("\n"); #endif //BASKER_ASSERT(M.nrow > 0, "M.nrow btf"); BASKER_ASSERT(n > 0, "M.nrow btf"); //MALLOC_INT_1DARRAY(perm,M.nrow); // MALLOC_INT_1DARRAY(perm, n); for(l_Int i = 0; i < n; i++) { perm[p[i]] = i; } BASKER_ASSERT((nblks+1) > 0, "nblks+1 btf"); //MALLOC_INT_1DARRAY(CC, nblks+1); for(l_Int i = 0; i < nblks+1; i++) { CC[i] = r[i]; } delete [] p; delete [] r; delete [] work; return 0; }
BASKER_INLINE int Basker<Int, Entry,Exe_Space>::break_into_parts2 ( BASKER_MATRIX &M, Int nblks, INT_1DARRAY btf_tabs ) { #ifdef BASKER_DEBUG_ORDER_BTF printf("break_into_parts2 called \n"); printf("nblks: %d \n", nblks); #endif Options.btf = BASKER_TRUE; //Alg. // A -> [BTF_A BTF_B] // [0 BTF_C] //1. Run backward through the btf_tabs to find size C //2. Form A,B,C based on size in 1. //Short circuit, //If nblks == 1, than only BTF_A exists if(nblks == 1) { //#ifdef BASKER_DEBUG_ORDER_BTF printf("Short Circuit part_call \n"); //#endif BTF_A = A; //Options.btf = BASKER_FALSE; btf_tabs_offset = 1; return 0; } //Step 1. //Find total work estimate Int total_work_estimate = 0; for(Int b = 0; b < nblks; b++) { total_work_estimate += btf_blk_work(b); } //Set a class variable to use later btf_total_work = total_work_estimate; //printf("Total work estimate: %d \n", // total_work_estimate); //printf("num_threads: %d epsilon: %f \n", // num_threads, // ((double)1/num_threads) + // ((double)BASKER_BTF_IMBALANCE)); Int break_size = ceil((double)total_work_estimate*( ((double)1/num_threads) + ((double)BASKER_BTF_IMBALANCE))); printf("Break size: %d \n", break_size); Int t_size = 0; Int scol = M.ncol; Int blk_idx = nblks; BASKER_BOOL move_fwd = BASKER_TRUE; while(move_fwd==BASKER_TRUE) { //printf("------TEST blk_idx: %d \n", // blk_idx); Int blk_work = btf_blk_work(blk_idx-1); Int blk_size = btf_tabs(blk_idx) - btf_tabs(blk_idx-1); #ifdef BASKER_DEBUG_ORDER_BTF printf(" \n move_fwd loop \n"); BASKER_ASSERT(blk_idx>=0, "btf blk idx off"); BASKER_ASSERT(blk_work>=0, "btk_work wrong"); BASKER_ASSERT(blk_size>0, "btf blk size wrong"); printf("blk_idx: %d blk_work: %d break_size: %d \n", blk_idx, blk_work, break_size); #endif //Should be end //if(((blk_work < break_size) || // (blk_size < BASKER_BTF_SMALL)) && // (blk_idx > 1)) //Continue to be in btf if(((blk_work < break_size) && (blk_idx > 1))) { #ifdef BASKER_DEBUG_ORDER_BTF printf("first choice \n"); #endif t_size = t_size+blk_size; blk_idx = blk_idx-1; scol = btf_tabs[blk_idx]; } //break due to size else if(blk_work >= break_size) { printf("break due to size\n"); move_fwd = BASKER_FALSE; } //break due to end else if(blk_idx == 1) { printf("break last blk\n"); blk_idx = 0; t_size = t_size + blk_size; scol = btf_tabs[blk_idx]; move_fwd = BASKER_FALSE; } //should not be called else { BASKER_ASSERT(1==0, "btf order break"); move_fwd = BASKER_FALSE; } }//end while(move_fwd) //#ifdef BASKER_DEBUG_ORDER_BTF printf("Done finding BTF2 cut. Cut size: %d scol: %d \n", t_size, scol); printf("Done finding BTF2 cut. blk_idx: %d \n", blk_idx); //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n"); BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n"); //#endif //Comeback and change btf_tabs_offset = blk_idx; //2. Move into Blocks if(btf_tabs_offset != 0) { //--Move A into BTF_A; BTF_A.set_shape(0, scol, 0, scol); BTF_A.nnz = M.col_ptr(scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Init BTF_A. ncol: %d nnz: %d \n", scol, BTF_A.nnz); #endif if(BTF_A.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr"); MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1); BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz"); MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz); MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz); BTF_A.fill(); } Int annz = 0; for(Int k = 0; k < scol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("copy column: %d into A_BTF, [%d %d] \n", k, M.col_ptr(k), M.col_ptr(k+1)); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //printf("annz: %d i: %d \n", annz, i); BTF_A.row_idx(annz) = M.row_idx(i); BTF_A.val(annz) = M.val(i); annz++; } BTF_A.col_ptr(k+1) = annz; } }//no A //Fill in B and C at the same time INT_1DARRAY cws; BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC"); MALLOC_INT_1DARRAY(cws, M.ncol-scol+1); init_value(cws, M.ncol-scol+1, (Int)M.ncol); BTF_B.set_shape(0 , scol, scol, M.ncol-scol); BTF_C.set_shape(scol, M.ncol-scol, scol, M.ncol-scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Set Shape BTF_B: %d %d %d %d \n", BTF_B.srow, BTF_B.nrow, BTF_B.scol, BTF_B.ncol); printf("Set Shape BTF_C: %d %d %d %d \n", BTF_C.srow, BTF_C.nrow, BTF_C.scol, BTF_C.nrow); #endif //Scan and find nnz //We can do this much better!!!! Int bnnz = 0; Int cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx(i)); #endif bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Lower, %d %d \n", scol, M.row_idx(i)); #endif cnnz++; } }//over all nnz in k }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF_B nnz: %d \n", bnnz); printf("BTF_C nnz: %d \n", cnnz); #endif BTF_B.nnz = bnnz; BTF_C.nnz = cnnz; //Malloc need space if((BTF_B.v_fill == BASKER_FALSE) && (BTF_B.nnz > 0)) //if(BTF_B.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol"); MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1); BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz"); MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz); MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz); BTF_B.fill(); } if(BTF_C.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol"); MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1); BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz"); MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz); MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz); BTF_C.fill(); } //scan again (Very bad!!!) bnnz = 0; cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx[i]); #endif BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit"); //BTF_B.row_idx[bnnz] = M.row_idx[i]; //Note: do not offset because B srow = 0 BTF_B.row_idx(bnnz) = M.row_idx(i); BTF_B.val(bnnz) = M.val(i); bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz Lower,k: %d %d %d %f \n", k, scol, M.row_idx[i], M.val(i)); #endif //BTF_C.row_idx[cnnz] = M.row_idx[i]; BTF_C.row_idx(cnnz) = M.row_idx(i)-scol; BTF_C.val(cnnz) = M.val(i); cnnz++; } }//over all nnz in k if(BTF_B.nnz > 0) { BTF_B.col_ptr(k-scol+1) = bnnz; } BTF_C.col_ptr(k-scol+1) = cnnz; }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("After BTF_B nnz: %d \n", bnnz); printf("After BTF_C nnz: %d \n", cnnz); #endif //printf("\n\n"); //printf("DEBUG\n"); //BTF_C.print(); //printf("\n\n"); return 0; }//end break_into_parts2 (based on imbalance)
BASKER_INLINE int Basker<Int, Entry,Exe_Space>::break_into_parts ( BASKER_MATRIX &M, Int nblks, INT_1DARRAY btf_tabs ) { #ifdef BASKER_DEBUG_ORDER_BTF printf("break_into_parts called \n"); printf("nblks: %d \n", nblks); #endif Options.btf = BASKER_TRUE; //Alg. // A -> [BTF_A BTF_B] // [0 BTF_C] //1. Run backward through the btf_tabs to find size C //2. Form A,B,C based on size in 1. //Short circuit, //If nblks == 1, than only BTF_A exists if(nblks == 1) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Short Circuit part_call \n"); #endif BTF_A = A; //Options.btf = BASKER_FALSE; btf_tabs_offset = 1; return 0; } //Step 1. Int t_size = 0; Int scol = M.ncol; Int blk_idx = nblks; BASKER_BOOL move_fwd = BASKER_TRUE; while(move_fwd==BASKER_TRUE) { Int blk_size = btf_tabs(blk_idx)- btf_tabs(blk_idx-1); #ifdef BASKER_DEBUG_ORDER_BTF printf("move_fwd loop \n"); BASKER_ASSERT(blk_idx>=0, "btf blk idx off"); BASKER_ASSERT(blk_size>0, "btf blk size wrong"); printf("blk_idx: %d blk_size: %d \n", blk_idx, blk_size); std::cout << blk_size << std::endl; #endif if((blk_size < Options.btf_large) && ((((double)t_size+blk_size)/(double)M.ncol) < Options.btf_max_percent)) { #ifdef BASKER_DEBUG_ORDER_BTF printf("first choice \n"); printf("blksize test: %d %d %d \n", blk_size, Options.btf_large, BASKER_BTF_LARGE); printf("blkpercent test: %f %f %f \n", ((double)t_size+blk_size)/(double)M.ncol, Options.btf_max_percent, (double) BASKER_BTF_MAX_PERCENT); #endif t_size = t_size+blk_size; blk_idx = blk_idx-1; scol = btf_tabs[blk_idx]; } else { //printf("second choice \n"); //#ifdef BASKER_DEBUG_ORDER_BTF printf("Cut: blk_size: %d percent: %f \n", blk_size, ((double)t_size+blk_size)/(double)M.ncol); if((((double)t_size+blk_size)/(double)M.ncol) == 1.0) { blk_idx = 0; t_size = t_size + blk_size; scol = btf_tabs[blk_idx]; } //#endif move_fwd = BASKER_FALSE; } }//end while(move_fwd) #ifdef BASKER_DEBUG_ORDER_BTF printf("Done finding BTF cut. Cut size: %d scol: %d \n", t_size, scol); //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n"); BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n"); #endif //Comeback and change btf_tabs_offset = blk_idx; //2. Move into Blocks if(btf_tabs_offset != 0) { //--Move A into BTF_A; BTF_A.set_shape(0, scol, 0, scol); BTF_A.nnz = M.col_ptr(scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Init BTF_A. ncol: %d nnz: %d \n", scol, BTF_A.nnz); #endif if(BTF_A.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr"); MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1); BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz"); MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz); MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz); BTF_A.fill(); } Int annz = 0; for(Int k = 0; k < scol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("copy column: %d into A_BTF, [%d %d] \n", k, M.col_ptr(k), M.col_ptr(k+1)); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //printf("annz: %d i: %d \n", annz, i); BTF_A.row_idx(annz) = M.row_idx(i); BTF_A.val(annz) = M.val(i); annz++; } BTF_A.col_ptr(k+1) = annz; } }//no A //Fill in B and C at the same time INT_1DARRAY cws; BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC"); MALLOC_INT_1DARRAY(cws, M.ncol-scol+1); init_value(cws, M.ncol-scol+1, (Int)M.ncol); BTF_B.set_shape(0 , scol, scol, M.ncol-scol); BTF_C.set_shape(scol, M.ncol-scol, scol, M.ncol-scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Set Shape BTF_B: %d %d %d %d \n", BTF_B.srow, BTF_B.nrow, BTF_B.scol, BTF_B.ncol); printf("Set Shape BTF_C: %d %d %d %d \n", BTF_C.srow, BTF_C.nrow, BTF_C.scol, BTF_C.nrow); #endif //Scan and find nnz //We can do this much better!!!! Int bnnz = 0; Int cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx(i)); #endif bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Lower, %d %d \n", scol, M.row_idx(i)); #endif cnnz++; } }//over all nnz in k }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF_B nnz: %d \n", bnnz); printf("BTF_C nnz: %d \n", cnnz); #endif BTF_B.nnz = bnnz; BTF_C.nnz = cnnz; //Malloc need space if((BTF_B.v_fill == BASKER_FALSE) && (BTF_B.nnz > 0)) //if(BTF_B.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol"); MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1); BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz"); MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz); MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz); BTF_B.fill(); } if(BTF_C.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol"); MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1); BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz"); MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz); MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz); BTF_C.fill(); } //scan again (Very bad!!!) bnnz = 0; cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx[i]); #endif BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit"); //BTF_B.row_idx[bnnz] = M.row_idx[i]; //Note: do not offset because B srow = 0 BTF_B.row_idx(bnnz) = M.row_idx(i); BTF_B.val(bnnz) = M.val(i); bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz Lower,k: %d %d %d %f \n", k, scol, M.row_idx[i], M.val(i)); #endif //BTF_C.row_idx[cnnz] = M.row_idx[i]; BTF_C.row_idx(cnnz) = M.row_idx(i)-scol; BTF_C.val(cnnz) = M.val(i); cnnz++; } }//over all nnz in k if(BTF_B.nnz > 0) { BTF_B.col_ptr(k-scol+1) = bnnz; } BTF_C.col_ptr(k-scol+1) = cnnz; }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("After BTF_B nnz: %d \n", bnnz); printf("After BTF_C nnz: %d \n", cnnz); #endif //printf("\n\n"); //printf("DEBUG\n"); //BTF_C.print(); //printf("\n\n"); return 0; }//end break_into_parts