BASKER_INLINE int Basker<Int,Entry,Exe_Space>::neg_spmv_perm( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y) { //Add checks #ifdef BASKER_DEBUG_SOLVE_RHS printf("SPMV. scol: %d ncol: %d \n", M.scol, M.ncol); #endif const Int bcol = M.scol; const Int brow = M.srow; //for(Int k=M.scol; k < (M.scol+M.ncol); k++) for(Int k=0; k < M.ncol; ++k) { //for(Int i = M.col_ptr[k-bcol]; // i < M.col_ptr[k-bcol+1]; i++) for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //Int j = M.row_idx[i]; const Int j = M.row_idx(i); const Int jnew = j + M.srow; //y[j] -= M.val[i]*x[k]; //y(j+brow) -= M.val(i)*x(k+bcol); y(gperm(jnew)) -= M.val(i)*x(k+bcol); } } return 0; }//neg_spmv
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::nfactor_domain_error ( INT_1DARRAY threads_start ) { Int nthread_remalloc = 0; for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; }//end if NOERROR if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { printf("ERROR THREAD: %d DOMBLK SINGULAR: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if SINGULAR if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { printf("ERROR THREADS: %d DOMBLK NOMALLOC: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if NOMALLOC if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { BASKER_ASSERT(thread_array(ti).error_blk > 0, "nfactor_dom_error error_blk"); printf("ERROR THREADS: %d DOMBLK MALLOC: %d \n", ti, thread_array(ti).error_blk); //Resize L BASKER_MATRIX &L = LL(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, thread_array(ti).error_info); L.nnz = thread_array(ti).error_info; //clean up workspace if(L.w_fill == BASKER_TRUE) { //Clear workspace for(Int i = 0; i < L.iws_size*L.iws_mult; ++i) { L.iws(i) = (Int) 0; } for(Int i = 0; i < L.ews_size*L.ews_mult; ++i) { L.ews(i) = (Entry) 0; } //Clear perm for(Int i = L.srow; i < L.srow+L.nrow; ++i) { gperm(i) = BASKER_MAX_IDX; } } //Resize U BASKER_MATRIX &U = LU(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, thread_array(ti).error_info); U.nnz = thread_array(ti).error_info; threads_start(ti) = thread_array(ti).error_blk; //Reset thread_array(ti).error_type = BASKER_ERROR_NOERROR; thread_array(ti).error_blk = BASKER_MAX_IDX; thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC }//for all threads if(nthread_remalloc == 0) { return BASKER_SUCCESS; } else { return nthread_remalloc; } //Should never be here BASKER_ASSERT(0==1, "nfactor_diag_error, should never"); return BASKER_SUCCESS; }//end nfactor_domain_error
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::nfactor_diag_error ( INT_1DARRAY threads_start ) { Int nthread_remalloc = 0; for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; }//end if NOERROR if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { printf("ERROR THREAD: %d DIAGBLK SINGULAR: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if SINGULAR if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { printf("ERROR THREADS: %d DIAGBLK NOMALLOC: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if NOMALLOC if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { BASKER_ASSERT(thread_array(ti).error_blk > 0, "nfactor_diag_error error_blk"); printf("ERROR THREADS: %d DIAGBLK MALLOC: %d \n", ti, thread_array(ti).error_blk); //Clean the workspace printf("test: %d %d \n", thread_array(ti).iws_size*thread_array(ti).iws_mult, thread_array(ti).ews_size*thread_array(ti).ews_mult); for(Int i = 0; i < thread_array(ti).iws_size*thread_array(ti).iws_mult; i++) { thread_array(ti).iws(i) = (Int) 0; } for(Int i = 0; i < thread_array(ti).ews_size*thread_array(ti).ews_mult; i++) { thread_array(ti).ews(i) = (Entry) 0; } //Resize L BASKER_MATRIX &L = LBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, thread_array(ti).error_info); L.nnz = thread_array(ti).error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; } for(Int i = L.srow; i < (L.srow+L.nrow); i++) { gperm(i) = BASKER_MAX_IDX; } //Resize U BASKER_MATRIX &U = UBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, thread_array(ti).error_info); U.nnz = thread_array(ti).error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; } printf("Setting thread start(%d) %d \n", ti, thread_array(ti).error_blk); threads_start(ti) = thread_array(ti).error_blk; //Reset thread_array(ti).error_type = BASKER_ERROR_NOERROR; thread_array(ti).error_blk = BASKER_MAX_IDX; thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC }//for all threads if(nthread_remalloc == 0) { return BASKER_SUCCESS; } else { return nthread_remalloc; } //Should never be here BASKER_ASSERT(0==1, "nfactor_diag_error, should never"); return BASKER_SUCCESS; }//end nfactor_diag_error
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::spmv_BTF ( Int tab, BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { //Tab = block in const Int bcol = btf_tabs(tab)- M.scol; const Int brow = M.srow; const Int ecol = btf_tabs(tab+1) - M.scol; Int erow = 0; if(tab > 0) { erow = btf_tabs(tab); } else { erow = brow-1; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("BTF_UPDATE, TAB: %d [%d %d] [%d %d] \n", tab, brow, erow, bcol, ecol); #endif //loop over each column for(Int k = bcol; k < ecol; ++k) { //for(Int i = M.col_ptr[k]; i < M.col_ptr[k+1]; i++) //printf("k: %d col_ptr: %d \n", k, M.col_ptr(k)); for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //Int j = M.row_idx[i]; const Int j = gperm(M.row_idx(i)); //printf("j: %d jp: %d \n", M.row_idx(i), j); if(j > erow) { #ifdef BASKER_DEBUG_SOLVE_RHS ///printf("break, k: %d j: %d erow: %d\n", // k, j, erow); #endif //break; //breaks for 1 colummn continue; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("BTF_UPDATE-val, j: %d y: %f x: %f, val: %f \n", j, y[j], x[k+M.scol], M.val[i]); #endif //for now just do a single function with zero //y[j] -= M.val[i]*x[k+M.scol]; y(j+brow) -= M.val(i)*x(k+M.scol); }//over all nnz in row } //printf("done\n"); return 0; }//end spmv_BTF();
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::lower_tri_solve ( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { const Int bcol = M.scol; const Int brow = M.scol; //M.info(); //printf("Lower-Tri-Solve-Test, [%d %d %d %d] \n", // M.srow, M.nrow, M.scol, M.ncol); for(Int k = 0; k < M.ncol; ++k) { //Test if zero pivot value #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(M.val[M.col_ptr[k]]!=0.0, "LOWER PIVOT 0"); #endif if(M.val[M.col_ptr[k]] == 0.0) { printf("Lower Pivot: %d %f \n", M.row_idx[M.col_ptr[k]], M.val[M.col_ptr[k]]); return -1; } //printf("Lower tri. k: %d out: %f in: %f piv: %f \n", // k+bcol, y[k+bcol], x[k+bcol], M.val[M.col_ptr[k]]); //Replace with Entry divide in future //y[k+bcol] = x[k+bcol] / M.val[M.col_ptr[k]]; y(k+brow) = x(k+bcol) / M.val(M.col_ptr(k)); //for(Int i = M.col_ptr[k]+1; i < M.col_ptr[k+1]; i++) for(Int i = M.col_ptr(k)+1; i < M.col_ptr(k+1); ++i) { //Int j = gperm[M.row_idx[i]]; const Int j = gperm(M.row_idx(i)+brow); #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(j != BASKER_MAX_IDX,"Using nonperm\n"); #endif //x[j] -= M.val[i]*y[k+bcol]; //printf("gperm: %d x(%d) y(i) \n", // M.row_idx(i) + brow, j, k+bcol); x(j) -= M.val(i)*y(k+bcol); }//over all nnz in a column }//over each column return 0; }//end lower_tri_solve
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::test_solve() { ENTRY_1DARRAY x_known; ENTRY_1DARRAY x; ENTRY_1DARRAY y; #ifdef BASKER_DEBUG_SOLVE_RHS printf("test_solve called \n"); printf("Global pivot permuation\n"); printVec(gperm, gn); printf("\n"); printf("Global pivot permutation inverse\n"); printVec(gpermi, gn); printf("\n"); #endif BASKER_ASSERT(gn > 0, "solve testsolve gn"); MALLOC_ENTRY_1DARRAY(x_known, gn); init_value(x_known, gn , (Entry)1.0); //temp for(Int i = 0; i < gn; i++) { x_known(i) = (Entry) 1.0; } //JDB: used for other test //permute(x_known, order_csym_array, gn); MALLOC_ENTRY_1DARRAY(x, gn); init_value(x, gn, (Entry) 0.0); BASKER_ASSERT(gm > 0, "solve testsolve gm"); MALLOC_ENTRY_1DARRAY(y, gm); init_value(y, gm, (Entry) 0.0); if(btf_nblks > 0) { sort_matrix(BTF_C); //printMTX("C_BEFORE_SOLVE.mtx", BTF_C); } if(Options.btf == BASKER_TRUE) { //printf("btf_tabs_offset: %d ", btf_tabs_offset); //printf("btf_nblks: %d \n", btf_nblks); if(btf_tabs_offset != 0) { //printf("BTF_A spmv\n"); spmv(BTF_A, x_known,y); if(btf_nblks> 1) { //printf("btf_B spmv \n"); spmv(BTF_B, x_known, y); } } if(btf_nblks > 1) { //printf("btf_c spmv \n"); spmv(BTF_C, x_known, y); } //return -1; } else { //printf("other\n"); //spmv(BTF_A, x_known,y); } //printf("\n Before Test Points \n"); //printf("i: %d x: %f y: %f \n", 0, x_known(0), y(0)); //if(gn > 24) // { // printf("i: %d x: %f y: %f \n", 24, x_known(24), y(24)); // } //pivot permuation //printVec("gperm.csc", gpermi, gn); //printVec("gpermi.csc", gperm, gn); for(Int i = 0; i < gn; i++) { if(gperm(i) < 0) { printf("error: %d %d \n", i, gperm(i)); } if(gperm(i) > gn) { printf("serror: %d %d \n", i, gperm(i)); } x(gperm(i)) = y(i); } for(Int i = 0; i < gn; i++) { y(i) = x(i); x(i) = 0; } printf("RHS:\n"); printf("i: %d rhs: %g \n", 0, y(0)); printf("i: %d rhs: %g \n", 10, y(10)); printf("i: %d rhs: %g \n", 24, y(24)); printf("\n"); #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); //printf("Known Solution: \n"); //for(Int i = 0; i < gn; i++) // { // printf("%f, " , x_known(i)); // } printf("\n\n"); printf("RHS: \n"); for(Int i =0; i < gm; i++) { printf("%d %f,\n ", i, y(i)); } printf("\n\n"); #endif //Options.btf = BASKER_FALSE; if(Options.btf == BASKER_FALSE) { printf("before serial solve\n"); if(btf_tabs_offset != 0) { serial_solve(y,x); } //printf("After serial solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } else { //A\y -> y //serial_btf_solve(y,x); printf("before btf serial solve\n"); serial_btf_solve(y,x); //printf("After btf solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } Entry diff =0.0; for(Int i = 0; i < gn; i++) { diff += (x_known(i) - x(i)); } diff = diff/(Entry) gn; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); printf("Solve Compare: \n"); for(Int i = 0; i < gn; i++) { printf("%d %f %f \n", i, x_known(i), x(i)); } printf("\n\n"); #endif printf("\n Test Points \n"); printf("i: %d x: %f %f \n", 0, x_known(0), x(0)); if(gn > 24) { printf("i: %d x: %f %f \n", 10, x_known(10), x(10)); printf("i: %d x: %f %f \n", 24, x_known(24), x(24)); } printf("\n"); printf("TEST_SOLVE: ||x-x||/||x| = %e", diff); printf("\n"); if((diff > -1e-2) && (diff < 1e-2)) { printf("TEST PASSED \n"); } return 0; }//end test_solve