BASKER_INLINE int Basker<Int,Entry,Exe_Space>::nfactor_diag_error ( INT_1DARRAY threads_start ) { Int nthread_remalloc = 0; for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; }//end if NOERROR if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { printf("ERROR THREAD: %d DIAGBLK SINGULAR: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if SINGULAR if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { printf("ERROR THREADS: %d DIAGBLK NOMALLOC: %d\n", ti, thread_array(ti).error_blk); return BASKER_ERROR; }//end if NOMALLOC if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { BASKER_ASSERT(thread_array(ti).error_blk > 0, "nfactor_diag_error error_blk"); printf("ERROR THREADS: %d DIAGBLK MALLOC: %d \n", ti, thread_array(ti).error_blk); //Clean the workspace printf("test: %d %d \n", thread_array(ti).iws_size*thread_array(ti).iws_mult, thread_array(ti).ews_size*thread_array(ti).ews_mult); for(Int i = 0; i < thread_array(ti).iws_size*thread_array(ti).iws_mult; i++) { thread_array(ti).iws(i) = (Int) 0; } for(Int i = 0; i < thread_array(ti).ews_size*thread_array(ti).ews_mult; i++) { thread_array(ti).ews(i) = (Entry) 0; } //Resize L BASKER_MATRIX &L = LBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, thread_array(ti).error_info); L.nnz = thread_array(ti).error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; } for(Int i = L.srow; i < (L.srow+L.nrow); i++) { gperm(i) = BASKER_MAX_IDX; } //Resize U BASKER_MATRIX &U = UBTF(thread_array(ti).error_blk); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, thread_array(ti).error_info); U.nnz = thread_array(ti).error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; } printf("Setting thread start(%d) %d \n", ti, thread_array(ti).error_blk); threads_start(ti) = thread_array(ti).error_blk; //Reset thread_array(ti).error_type = BASKER_ERROR_NOERROR; thread_array(ti).error_blk = BASKER_MAX_IDX; thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC }//for all threads if(nthread_remalloc == 0) { return BASKER_SUCCESS; } else { return nthread_remalloc; } //Should never be here BASKER_ASSERT(0==1, "nfactor_diag_error, should never"); return BASKER_SUCCESS; }//end nfactor_diag_error
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::serial_btf_solve ( ENTRY_1DARRAY y, ENTRY_1DARRAY x ) { for(Int i = 0; i < gn; ++i) { x(i) = y(i); y(i) = (Entry) 0.0; } //printf("Test \n"); //Start in C and go backwards //In first level, only due U\L\x->y for(Int b = (btf_nblks-btf_tabs_offset)-1; b>= 0; b--) { #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n btf b: %d \n", b); #endif //---Lower solve BASKER_MATRIX &LC = LBTF(b); //L\x -> y lower_tri_solve(LC,x,y); BASKER_MATRIX &UC = UBTF(b); //U\x -> y upper_tri_solve(UC,x,y); #ifdef BASKER_DEBUG_SOLVE_RHS printf("Before spmv\n"); printf("Inner Vector y print\n"); printVec(y, gn); printf("Inner Vector x print\n"); printVec(x, gn); printf("\n"); #endif //-----Update //if(b > btf_tabs_offset) { //x = BTF_C*y; //printf("spmv tab: %d \n", b+btf_tabs_offset); spmv_BTF(b+btf_tabs_offset, BTF_C, y, x); } #ifdef BASKER_DEBUG_SOLVE_RHS printf("After spmv\n"); printf("Inner Vector y print\n"); printVec(y, gn); printf("Inner Vector x print\n"); printVec(x, gn); #endif //BASKER_MATRIX &UC = UBTF[b]; //U\x -> y //upper_tri_solve(UC,x,y); } #ifdef BASKER_DEBUG_SOLVE_RHS printf("Done, BTF-C Solve \n"); printf("\n x \n"); printVec(x, gn); printf("\n y \n"); printVec(y, gn); printf("\n\n"); #endif //Update B //BTF_B*y -> x if(btf_tabs_offset != 0) { neg_spmv(BTF_B,y,x); } #ifdef BASKER_DEBUG_SOLVE_RHS printf("Done, SPMV BTF_B UPDATE \n"); printf("\n x \n"); printVec(x, gn); printf("\n y \n"); printVec(y, gn); printf("\n\n"); #endif //now do the forward backwared solve //L\x ->y serial_forward_solve(x,y); //U\y->x serial_backward_solve(y,x); //copy lower part down #ifdef BASKER_DEBUG_SOLVE_RHS printf("copying lower starting: %d \n", btf_tabs[btf_tabs_offset]); #endif for(Int i = btf_tabs(btf_tabs_offset); i < gn; ++i) { //x[i] = y[i]; x(i) = y(i); } //Comeback and fix return 0; }//end serial_btf_solve