BASKER_INLINE int Basker<Int,Entry,Exe_Space>::neg_spmv( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y) { //Add checks #ifdef BASKER_DEBUG_SOLVE_RHS printf("SPMV. scol: %d ncol: %d \n", M.scol, M.ncol); #endif const Int bcol = M.scol; const Int brow = M.srow; //for(Int k=M.scol; k < (M.scol+M.ncol); k++) for(Int k=0; k < M.ncol; ++k) { //for(Int i = M.col_ptr[k-bcol]; // i < M.col_ptr[k-bcol+1]; i++) for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //Int j = M.row_idx[i]; const Int j = M.row_idx(i); //y[j] -= M.val[i]*x[k]; y(j+brow) -= M.val(i)*x(k+bcol); } } return 0; }//neg_spmv
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::spmv( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y) { //Add checks #ifdef BASKER_DEBUG_SOLVE_RHS printf("SPMV. scol: %d ncol: %d nnz: %d \n", M.scol, M.ncol, M.nnz); M.info(); #endif const Int bcol = M.scol; const Int brow = M.srow; //for(Int k=M.scol; k < (M.scol+M.ncol); k++) for(Int k = 0; k < M.ncol; ++k) { //printf("k: %d \n", k); for(Int i = M.col_ptr(k); i<M.col_ptr(k+1); ++i) { const Int j = M.row_idx(i); //printf("j: %d i: %d idx1: %d idx2: %d \n", // j, i, j+brow, k+bcol); //y[j] += M.val[i]*x[k]; y(j+brow) += M.val(i)*x(k+bcol); } } return 0; }//spmv
BASKER_FINLINE void Basker<Int, Entry,Exe_Space>::csymamd_order ( BASKER_MATRIX &M, INT_1DARRAY p, INT_1DARRAY cmember ) { amd_flag = BASKER_TRUE; //Debug, #ifdef BASKER_DEBUG_ORDER_AMD printf("cmember: \n"); for(Int i = 0; i < M.ncol; ++i) { printf("(%d, %d), ", i, cmember(i)); } printf("\n"); #endif //If doing iluk, we will not want this. //See amd blk notes if(Options.incomplete == BASKER_TRUE) { for(Int i = 0; i < M.ncol; i++) { p(i) = i; } //printf("Short csym \n"); return; } INT_1DARRAY temp_p; BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough"); MALLOC_INT_1DARRAY(temp_p, M.ncol+1); init_value(temp_p, M.ncol+1, (Int) 0); my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), &(temp_p(0)), &(cmember(0))); for(Int i = 0; i < M.ncol; ++i) { p(temp_p(i)) = i; } }//end csymamd()
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::upper_tri_solve ( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { const Int bcol = M.scol; const Int brow = M.srow; //printf("Upper Tri Solve, scol: %d ncol: %d \n", // M.scol, M.ncol); //end over all columns for(Int k = M.ncol; k >= 1; k--) { //printf("Upper Tri Solve: k: %d \n", k); #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(M.val[M.col_ptr[k]-1]!=0.0,"UpperPivot\n"); #endif //if(M.val[M.col_ptr[k]-1]==0.0) if(M.val(M.col_ptr(k)-1)==0) { printf("Upper pivot: %d %f \n", M.row_idx[M.col_ptr[k]-1], M.val[M.col_ptr[k]-1]); return -1; } //printf("TEST, k: %d out: %f in: %f pivot: %f\n", // k, y[k+bcol-1], x[k+bcol-1], // M.val[M.col_ptr[k]-1]); //Comeback and do with and entry divide //y[k+bcol-1] = x[k+bcol-1] / M.val[M.col_ptr[k]-1]; y(k+brow-1) = x(k+bcol-1) / M.val(M.col_ptr(k)-1); //for(Int i = M.col_ptr[k]-2; i >= M.col_ptr[k-1]; i--) for(Int i = M.col_ptr(k)-2; i >= M.col_ptr(k-1); --i) { //Int j = M.row_idx[i]; const Int j = M.row_idx(i); // printf("Updating row_idx: %d %f %f \n", // j, x[j], M.val[i]*y[k+bcol-1]); //x[j] -= M.val[i]*y[k+bcol-1]; x(j+brow) -= M.val(i) * y(k+bcol-1); } }//end over all columns return 0; }//end upper_tri_solve
BASKER_FINLINE void Basker<Int,Entry,Exe_Space>::amd_order ( BASKER_MATRIX &M, INT_1DARRAY p ) { double amd_info[AMD_INFO]; amesos_amd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), &(p(0)), NULL, amd_info); }//end amd_order()
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::strong_component ( BASKER_MATRIX &M, Int &nblks, INT_1DARRAY &perm, INT_1DARRAY &CC ) { typedef long int l_int; INT_1DARRAY perm_in; MALLOC_INT_1DARRAY(perm_in, M.ncol); MALLOC_INT_1DARRAY(perm, M.ncol); //JDB:Note, this needs to be changed just fixed for int/long MALLOC_INT_1DARRAY(CC, M.ncol+1); for(l_int i = 0; i < M.ncol; i++) { perm_in(i) = i; } //printf("SC one \n"); //my_strong_component(M,nblks,perm,perm_in, CC); BaskerSSWrapper<Int>::my_strong_component(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), nblks, &(perm(0)), &(perm_in(0)), &(CC(0))); #ifdef BASKER_DEBUG_ORDER_BTF FILE *fp; fp = fopen("btf.txt", "w"); for(Int i = 0; i < M.ncol; i++) { fprintf(fp, "%d \n", perm(i)); } fclose(fp); #endif //printf("FOUND NBLKS: %d \n", nblks); return 0; }//end strong_component <long int>
BASKER_FINLINE void Basker<Int, Entry,Exe_Space>::csymamd_order ( BASKER_MATRIX &M, INT_1DARRAY p, INT_1DARRAY cmember ) { amd_flag = BASKER_TRUE; //Debug, #ifdef BASKER_DEBUG_ORDER_AMD printf("cmember: \n"); for(Int i = 0; i < M.ncol; ++i) { printf("(%d, %d), ", i, cmember(i)); } printf("\n"); #endif INT_1DARRAY temp_p; BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough"); MALLOC_INT_1DARRAY(temp_p, M.ncol+1); init_value(temp_p, M.ncol+1, (Int) 0); my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)), &(temp_p(0)), &(cmember(0))); for(Int i = 0; i < M.ncol; ++i) { p(temp_p(i)) = i; } }//end csymamd()
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::spmv_BTF ( Int tab, BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { //Tab = block in const Int bcol = btf_tabs(tab)- M.scol; const Int brow = M.srow; const Int ecol = btf_tabs(tab+1) - M.scol; Int erow = 0; if(tab > 0) { erow = btf_tabs(tab); } else { erow = brow-1; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("BTF_UPDATE, TAB: %d [%d %d] [%d %d] \n", tab, brow, erow, bcol, ecol); #endif //loop over each column for(Int k = bcol; k < ecol; ++k) { //for(Int i = M.col_ptr[k]; i < M.col_ptr[k+1]; i++) //printf("k: %d col_ptr: %d \n", k, M.col_ptr(k)); for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //Int j = M.row_idx[i]; const Int j = gperm(M.row_idx(i)); //printf("j: %d jp: %d \n", M.row_idx(i), j); if(j > erow) { #ifdef BASKER_DEBUG_SOLVE_RHS ///printf("break, k: %d j: %d erow: %d\n", // k, j, erow); #endif //break; //breaks for 1 colummn continue; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("BTF_UPDATE-val, j: %d y: %f x: %f, val: %f \n", j, y[j], x[k+M.scol], M.val[i]); #endif //for now just do a single function with zero //y[j] -= M.val[i]*x[k+M.scol]; y(j+brow) -= M.val(i)*x(k+M.scol); }//over all nnz in row } //printf("done\n"); return 0; }//end spmv_BTF();
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::lower_tri_solve ( BASKER_MATRIX &M, ENTRY_1DARRAY x, ENTRY_1DARRAY y ) { const Int bcol = M.scol; const Int brow = M.scol; //M.info(); //printf("Lower-Tri-Solve-Test, [%d %d %d %d] \n", // M.srow, M.nrow, M.scol, M.ncol); for(Int k = 0; k < M.ncol; ++k) { //Test if zero pivot value #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(M.val[M.col_ptr[k]]!=0.0, "LOWER PIVOT 0"); #endif if(M.val[M.col_ptr[k]] == 0.0) { printf("Lower Pivot: %d %f \n", M.row_idx[M.col_ptr[k]], M.val[M.col_ptr[k]]); return -1; } //printf("Lower tri. k: %d out: %f in: %f piv: %f \n", // k+bcol, y[k+bcol], x[k+bcol], M.val[M.col_ptr[k]]); //Replace with Entry divide in future //y[k+bcol] = x[k+bcol] / M.val[M.col_ptr[k]]; y(k+brow) = x(k+bcol) / M.val(M.col_ptr(k)); //for(Int i = M.col_ptr[k]+1; i < M.col_ptr[k+1]; i++) for(Int i = M.col_ptr(k)+1; i < M.col_ptr(k+1); ++i) { //Int j = gperm[M.row_idx[i]]; const Int j = gperm(M.row_idx(i)+brow); #ifdef BASKER_DEBUG_SOLVE_RHS BASKER_ASSERT(j != BASKER_MAX_IDX,"Using nonperm\n"); #endif //x[j] -= M.val[i]*y[k+bcol]; //printf("gperm: %d x(%d) y(i) \n", // M.row_idx(i) + brow, j, k+bcol); x(j) -= M.val(i)*y(k+bcol); }//over all nnz in a column }//over each column return 0; }//end lower_tri_solve
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::permute_col ( BASKER_MATRIX &M, INT_1DARRAY col ) { if((M.ncol == 0)||(M.nnz == 0)) return 0; Int n = M.ncol; Int nnz = M.nnz; //printf("Using n: %d nnz: %d \n", n, nnz); INT_1DARRAY temp_p; MALLOC_INT_1DARRAY(temp_p, n+1); init_value(temp_p, n+1, (Int)0); INT_1DARRAY temp_i; MALLOC_INT_1DARRAY(temp_i, nnz); init_value(temp_i, nnz, (Int)0); ENTRY_1DARRAY temp_v; MALLOC_ENTRY_1DARRAY(temp_v, nnz); init_value(temp_v, nnz, (Entry)0.0); //printf("done with init \n"); //Determine column ptr of output matrix for(Int j = 0; j < n; j++) { Int i = col (j); temp_p (i+1) = M.col_ptr (j+1) - M.col_ptr (j); } //Get ptrs from lengths temp_p (0) = 0; for(Int j = 0; j < n; j++) { temp_p (j+1) = temp_p (j+1) + temp_p (j); } //copy idxs for(Int ii = 0; ii < n; ii++) { Int ko = temp_p (col (ii) ); for(Int k = M.col_ptr (ii); k < M.col_ptr (ii+1); k++) { temp_i (ko) = M.row_idx (k); temp_v (ko) = M.val (k); ko++; } } //copy back int A for(Int ii=0; ii < n+1; ii++) { M.col_ptr (ii) = temp_p (ii); } for(Int ii=0; ii < nnz; ii++) { M.row_idx (ii) = temp_i (ii); M.val (ii) = temp_v (ii); } FREE_INT_1DARRAY(temp_p); FREE_INT_1DARRAY(temp_i); FREE_ENTRY_1DARRAY(temp_v); return 0; }//end permute_col(int)
BASKER_INLINE void BaskerMatrix<Int,Entry,Exe_Space>::convert2D ( BASKER_MATRIX &M, BASKER_BOOL alloc, Int kid ) { if(nnz == 0) { for(Int i = 0; i < ncol+1; i++) { col_ptr(i) = 0; } MALLOC_INT_1DARRAY(row_idx, 1); row_idx(0) = (Int) 0; MALLOC_ENTRY_1DARRAY(val, 1); val(0) = (Entry) 0; return; } //info(); //We could check some flag ?? //We assume a pre-scan has already happened if(alloc == BASKER_TRUE) { //printf("ALLOC\n"); if(nnz > 0) { BASKER_ASSERT(nnz > 0, "matrix row nnz 2"); MALLOC_INT_1DARRAY(row_idx, nnz); } else if(nnz ==0) { BASKER_ASSERT((nnz+1)>0, "matrix row nnz 3"); MALLOC_INT_1DARRAY(row_idx, nnz+1); } } //init_value(row_idx, nnz, (Int) 0); //printf("clear row: %d \n", nnz); for(Int i = 0; i < nnz; ++i) { //printf("clear row_idx(%d) \n", i); row_idx(i) = 0; } if(alloc == BASKER_TRUE) { if(nnz > 0) { BASKER_ASSERT(nnz > 0, "matrix nnz 4"); MALLOC_ENTRY_1DARRAY(val, nnz); } else if(nnz == 0) { BASKER_ASSERT((nnz+1) > 0, "matrix nnz 5"); MALLOC_ENTRY_1DARRAY(val, nnz+1); } } //init_value(val, nnz, (Entry) 0); for(Int i = 0; i < nnz; ++i) { val(i) = 0; } Int temp_count = 0; for(Int k = scol; k < scol+ncol; ++k) { //note col_ptr[k-scol] contains the starting index if(col_ptr(k-scol) == BASKER_MAX_IDX) { col_ptr(k-scol) = temp_count; //printf("continue called, k: %d \n", k); continue; } for(Int i = col_ptr(k-scol); i < M.col_ptr(k+1); i++) { Int j = M.row_idx(i); //printf("i: %d j:%d \n", i,j); if(j >= srow+nrow) { break; } //printf("writing row_dix: %d i: %d val: %d nnz: %d srow: %d nrow: %d \n", // temp_count, i, j, nnz, // srow, nrow); //BASKER_ASSERT(temp_count < nnz, "2DConvert, too many values"); //row_idx[temp_count] = j; if(j-srow <0) { std::cout << "kid: " << kid << " j: " << j << " srow: " << srow << " k: " << k << " idx: " << i << std::endl; BASKER_ASSERT(0==1, "j-srow NO"); } row_idx(temp_count) = j-srow; val(temp_count) = M.val(i); temp_count++; } col_ptr(k-scol) = temp_count; } //col_ptr[0] = 0; //NO!!1 //Slide over the col counts for(Int i = ncol; i > 0; i--) { col_ptr(i) = col_ptr(i-1); } col_ptr(0) = (Int) 0; //info(); //print(); }//end convert2d(Matrix)
BASKER_INLINE int Basker<Int, Entry,Exe_Space>::break_into_parts2 ( BASKER_MATRIX &M, Int nblks, INT_1DARRAY btf_tabs ) { #ifdef BASKER_DEBUG_ORDER_BTF printf("break_into_parts2 called \n"); printf("nblks: %d \n", nblks); #endif Options.btf = BASKER_TRUE; //Alg. // A -> [BTF_A BTF_B] // [0 BTF_C] //1. Run backward through the btf_tabs to find size C //2. Form A,B,C based on size in 1. //Short circuit, //If nblks == 1, than only BTF_A exists if(nblks == 1) { //#ifdef BASKER_DEBUG_ORDER_BTF printf("Short Circuit part_call \n"); //#endif BTF_A = A; //Options.btf = BASKER_FALSE; btf_tabs_offset = 1; return 0; } //Step 1. //Find total work estimate Int total_work_estimate = 0; for(Int b = 0; b < nblks; b++) { total_work_estimate += btf_blk_work(b); } //Set a class variable to use later btf_total_work = total_work_estimate; //printf("Total work estimate: %d \n", // total_work_estimate); //printf("num_threads: %d epsilon: %f \n", // num_threads, // ((double)1/num_threads) + // ((double)BASKER_BTF_IMBALANCE)); Int break_size = ceil((double)total_work_estimate*( ((double)1/num_threads) + ((double)BASKER_BTF_IMBALANCE))); printf("Break size: %d \n", break_size); Int t_size = 0; Int scol = M.ncol; Int blk_idx = nblks; BASKER_BOOL move_fwd = BASKER_TRUE; while(move_fwd==BASKER_TRUE) { //printf("------TEST blk_idx: %d \n", // blk_idx); Int blk_work = btf_blk_work(blk_idx-1); Int blk_size = btf_tabs(blk_idx) - btf_tabs(blk_idx-1); #ifdef BASKER_DEBUG_ORDER_BTF printf(" \n move_fwd loop \n"); BASKER_ASSERT(blk_idx>=0, "btf blk idx off"); BASKER_ASSERT(blk_work>=0, "btk_work wrong"); BASKER_ASSERT(blk_size>0, "btf blk size wrong"); printf("blk_idx: %d blk_work: %d break_size: %d \n", blk_idx, blk_work, break_size); #endif //Should be end //if(((blk_work < break_size) || // (blk_size < BASKER_BTF_SMALL)) && // (blk_idx > 1)) //Continue to be in btf if(((blk_work < break_size) && (blk_idx > 1))) { #ifdef BASKER_DEBUG_ORDER_BTF printf("first choice \n"); #endif t_size = t_size+blk_size; blk_idx = blk_idx-1; scol = btf_tabs[blk_idx]; } //break due to size else if(blk_work >= break_size) { printf("break due to size\n"); move_fwd = BASKER_FALSE; } //break due to end else if(blk_idx == 1) { printf("break last blk\n"); blk_idx = 0; t_size = t_size + blk_size; scol = btf_tabs[blk_idx]; move_fwd = BASKER_FALSE; } //should not be called else { BASKER_ASSERT(1==0, "btf order break"); move_fwd = BASKER_FALSE; } }//end while(move_fwd) //#ifdef BASKER_DEBUG_ORDER_BTF printf("Done finding BTF2 cut. Cut size: %d scol: %d \n", t_size, scol); printf("Done finding BTF2 cut. blk_idx: %d \n", blk_idx); //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n"); BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n"); //#endif //Comeback and change btf_tabs_offset = blk_idx; //2. Move into Blocks if(btf_tabs_offset != 0) { //--Move A into BTF_A; BTF_A.set_shape(0, scol, 0, scol); BTF_A.nnz = M.col_ptr(scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Init BTF_A. ncol: %d nnz: %d \n", scol, BTF_A.nnz); #endif if(BTF_A.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr"); MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1); BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz"); MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz); MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz); BTF_A.fill(); } Int annz = 0; for(Int k = 0; k < scol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("copy column: %d into A_BTF, [%d %d] \n", k, M.col_ptr(k), M.col_ptr(k+1)); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //printf("annz: %d i: %d \n", annz, i); BTF_A.row_idx(annz) = M.row_idx(i); BTF_A.val(annz) = M.val(i); annz++; } BTF_A.col_ptr(k+1) = annz; } }//no A //Fill in B and C at the same time INT_1DARRAY cws; BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC"); MALLOC_INT_1DARRAY(cws, M.ncol-scol+1); init_value(cws, M.ncol-scol+1, (Int)M.ncol); BTF_B.set_shape(0 , scol, scol, M.ncol-scol); BTF_C.set_shape(scol, M.ncol-scol, scol, M.ncol-scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Set Shape BTF_B: %d %d %d %d \n", BTF_B.srow, BTF_B.nrow, BTF_B.scol, BTF_B.ncol); printf("Set Shape BTF_C: %d %d %d %d \n", BTF_C.srow, BTF_C.nrow, BTF_C.scol, BTF_C.nrow); #endif //Scan and find nnz //We can do this much better!!!! Int bnnz = 0; Int cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx(i)); #endif bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Lower, %d %d \n", scol, M.row_idx(i)); #endif cnnz++; } }//over all nnz in k }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF_B nnz: %d \n", bnnz); printf("BTF_C nnz: %d \n", cnnz); #endif BTF_B.nnz = bnnz; BTF_C.nnz = cnnz; //Malloc need space if((BTF_B.v_fill == BASKER_FALSE) && (BTF_B.nnz > 0)) //if(BTF_B.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol"); MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1); BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz"); MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz); MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz); BTF_B.fill(); } if(BTF_C.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol"); MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1); BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz"); MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz); MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz); BTF_C.fill(); } //scan again (Very bad!!!) bnnz = 0; cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx[i]); #endif BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit"); //BTF_B.row_idx[bnnz] = M.row_idx[i]; //Note: do not offset because B srow = 0 BTF_B.row_idx(bnnz) = M.row_idx(i); BTF_B.val(bnnz) = M.val(i); bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz Lower,k: %d %d %d %f \n", k, scol, M.row_idx[i], M.val(i)); #endif //BTF_C.row_idx[cnnz] = M.row_idx[i]; BTF_C.row_idx(cnnz) = M.row_idx(i)-scol; BTF_C.val(cnnz) = M.val(i); cnnz++; } }//over all nnz in k if(BTF_B.nnz > 0) { BTF_B.col_ptr(k-scol+1) = bnnz; } BTF_C.col_ptr(k-scol+1) = cnnz; }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("After BTF_B nnz: %d \n", bnnz); printf("After BTF_C nnz: %d \n", cnnz); #endif //printf("\n\n"); //printf("DEBUG\n"); //BTF_C.print(); //printf("\n\n"); return 0; }//end break_into_parts2 (based on imbalance)
BASKER_INLINE int Basker<Int, Entry,Exe_Space>::break_into_parts ( BASKER_MATRIX &M, Int nblks, INT_1DARRAY btf_tabs ) { #ifdef BASKER_DEBUG_ORDER_BTF printf("break_into_parts called \n"); printf("nblks: %d \n", nblks); #endif Options.btf = BASKER_TRUE; //Alg. // A -> [BTF_A BTF_B] // [0 BTF_C] //1. Run backward through the btf_tabs to find size C //2. Form A,B,C based on size in 1. //Short circuit, //If nblks == 1, than only BTF_A exists if(nblks == 1) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Short Circuit part_call \n"); #endif BTF_A = A; //Options.btf = BASKER_FALSE; btf_tabs_offset = 1; return 0; } //Step 1. Int t_size = 0; Int scol = M.ncol; Int blk_idx = nblks; BASKER_BOOL move_fwd = BASKER_TRUE; while(move_fwd==BASKER_TRUE) { Int blk_size = btf_tabs(blk_idx)- btf_tabs(blk_idx-1); #ifdef BASKER_DEBUG_ORDER_BTF printf("move_fwd loop \n"); BASKER_ASSERT(blk_idx>=0, "btf blk idx off"); BASKER_ASSERT(blk_size>0, "btf blk size wrong"); printf("blk_idx: %d blk_size: %d \n", blk_idx, blk_size); std::cout << blk_size << std::endl; #endif if((blk_size < Options.btf_large) && ((((double)t_size+blk_size)/(double)M.ncol) < Options.btf_max_percent)) { #ifdef BASKER_DEBUG_ORDER_BTF printf("first choice \n"); printf("blksize test: %d %d %d \n", blk_size, Options.btf_large, BASKER_BTF_LARGE); printf("blkpercent test: %f %f %f \n", ((double)t_size+blk_size)/(double)M.ncol, Options.btf_max_percent, (double) BASKER_BTF_MAX_PERCENT); #endif t_size = t_size+blk_size; blk_idx = blk_idx-1; scol = btf_tabs[blk_idx]; } else { //printf("second choice \n"); //#ifdef BASKER_DEBUG_ORDER_BTF printf("Cut: blk_size: %d percent: %f \n", blk_size, ((double)t_size+blk_size)/(double)M.ncol); if((((double)t_size+blk_size)/(double)M.ncol) == 1.0) { blk_idx = 0; t_size = t_size + blk_size; scol = btf_tabs[blk_idx]; } //#endif move_fwd = BASKER_FALSE; } }//end while(move_fwd) #ifdef BASKER_DEBUG_ORDER_BTF printf("Done finding BTF cut. Cut size: %d scol: %d \n", t_size, scol); //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n"); BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n"); #endif //Comeback and change btf_tabs_offset = blk_idx; //2. Move into Blocks if(btf_tabs_offset != 0) { //--Move A into BTF_A; BTF_A.set_shape(0, scol, 0, scol); BTF_A.nnz = M.col_ptr(scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Init BTF_A. ncol: %d nnz: %d \n", scol, BTF_A.nnz); #endif if(BTF_A.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr"); MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1); BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz"); MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz); MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz); BTF_A.fill(); } Int annz = 0; for(Int k = 0; k < scol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("copy column: %d into A_BTF, [%d %d] \n", k, M.col_ptr(k), M.col_ptr(k+1)); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { //printf("annz: %d i: %d \n", annz, i); BTF_A.row_idx(annz) = M.row_idx(i); BTF_A.val(annz) = M.val(i); annz++; } BTF_A.col_ptr(k+1) = annz; } }//no A //Fill in B and C at the same time INT_1DARRAY cws; BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC"); MALLOC_INT_1DARRAY(cws, M.ncol-scol+1); init_value(cws, M.ncol-scol+1, (Int)M.ncol); BTF_B.set_shape(0 , scol, scol, M.ncol-scol); BTF_C.set_shape(scol, M.ncol-scol, scol, M.ncol-scol); #ifdef BASKER_DEBUG_ORDER_BTF printf("Set Shape BTF_B: %d %d %d %d \n", BTF_B.srow, BTF_B.nrow, BTF_B.scol, BTF_B.ncol); printf("Set Shape BTF_C: %d %d %d %d \n", BTF_C.srow, BTF_C.nrow, BTF_C.scol, BTF_C.nrow); #endif //Scan and find nnz //We can do this much better!!!! Int bnnz = 0; Int cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx(i)); #endif bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Lower, %d %d \n", scol, M.row_idx(i)); #endif cnnz++; } }//over all nnz in k }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF_B nnz: %d \n", bnnz); printf("BTF_C nnz: %d \n", cnnz); #endif BTF_B.nnz = bnnz; BTF_C.nnz = cnnz; //Malloc need space if((BTF_B.v_fill == BASKER_FALSE) && (BTF_B.nnz > 0)) //if(BTF_B.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol"); MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1); BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz"); MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz); MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz); BTF_B.fill(); } if(BTF_C.v_fill == BASKER_FALSE) { BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol"); MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1); BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz"); MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz); MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz); BTF_C.fill(); } //scan again (Very bad!!!) bnnz = 0; cnnz = 0; for(Int k = scol; k < M.ncol; ++k) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Scanning nnz, k: %d \n", k); #endif for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i) { if(M.row_idx(i) < scol) { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz to Upper, %d %d \n", scol, M.row_idx[i]); #endif BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit"); //BTF_B.row_idx[bnnz] = M.row_idx[i]; //Note: do not offset because B srow = 0 BTF_B.row_idx(bnnz) = M.row_idx(i); BTF_B.val(bnnz) = M.val(i); bnnz++; } else { #ifdef BASKER_DEBUG_ORDER_BTF printf("Adding nnz Lower,k: %d %d %d %f \n", k, scol, M.row_idx[i], M.val(i)); #endif //BTF_C.row_idx[cnnz] = M.row_idx[i]; BTF_C.row_idx(cnnz) = M.row_idx(i)-scol; BTF_C.val(cnnz) = M.val(i); cnnz++; } }//over all nnz in k if(BTF_B.nnz > 0) { BTF_B.col_ptr(k-scol+1) = bnnz; } BTF_C.col_ptr(k-scol+1) = cnnz; }//over all k #ifdef BASKER_DEBUG_ORDER_BTF printf("After BTF_B nnz: %d \n", bnnz); printf("After BTF_C nnz: %d \n", cnnz); #endif //printf("\n\n"); //printf("DEBUG\n"); //BTF_C.print(); //printf("\n\n"); return 0; }//end break_into_parts
void Basker<Int,Entry,Exe_Space>::btf_blk_amd ( BASKER_MATRIX &M, INT_1DARRAY p, INT_1DARRAY btf_nnz, INT_1DARRAY btf_work ) { // printf("=============BTF_BLK_AMD_CALLED========\n"); if(Options.incomplete == BASKER_TRUE) { //We note that AMD on incomplete ILUK //Seems realy bad and leads to a zero on the diag //Therefore, we simply return the natural ordering for(Int i = 0 ; i < M.ncol; i++) { p(i) = i; } //We will makeup work to be 1, //Since BTF is not supported in our iluk for(Int b = 0; b < btf_nblks; b++) { btf_nnz(b) = 1; btf_work(b) =1; } //printf("Short amd blk\n"); return; } //p == length(M) //Scan over all blks //Note, that this needs to be made parallel in the //future (Future Josh will be ok with this, right?) //This is a horrible way to do this!!!!! //KLU does this very nice, but they also make all the little blks INT_1DARRAY temp_col; MALLOC_INT_1DARRAY(temp_col, M.ncol+1); INT_1DARRAY temp_row; MALLOC_INT_1DARRAY(temp_row, M.nnz); //printf("Done with btf_blk_amd malloc \n"); //printf("blks: %d \n" , btf_nblks); for(Int b = 0; b < btf_nblks; b++) { Int blk_size = btf_tabs(b+1) - btf_tabs(b); //printf("blk: %d blk_size: %d \n", // b, blk_size); if(blk_size < 3) { //printf("debug, blk_size: %d \n", blk_size); for(Int ii = 0; ii < blk_size; ++ii) { //printf("set %d \n", btf_tabs(b)+ii-M.scol); p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol; } btf_work(b) = blk_size*blk_size*blk_size; btf_nnz(b) = (.5*(blk_size*blk_size) + blk_size); continue; } INT_1DARRAY tempp; MALLOC_INT_1DARRAY(tempp, blk_size+1); //Fill in temp matrix Int nnz = 0; Int column = 1; temp_col(0) = 0; for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++) { for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++) { if(M.row_idx(i) < btf_tabs(b)) continue; temp_row(nnz) = M.row_idx(i) - btf_tabs(b); nnz++; }// end over all row_idx temp_col(column) = nnz; column++; }//end over all columns k #ifdef BASKER_DEBUG_ORDER_AMD printf("col_ptr: "); for(Int i = 0 ; i < blk_size+1; i++) { printf("%d, ", temp_col(i)); } printf("\n"); printf("row_idx: "); for(Int i = 0; i < nnz; i++) { printf("%d, ", temp_row(i)); } printf("\n"); #endif double l_nnz = 0; double lu_work = 0; BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), &(temp_row(0)),&(tempp(0)), l_nnz, lu_work); btf_nnz(b) = l_nnz; btf_work(b) = lu_work; #ifdef BASKER_DEBUG_ORDER_AMD printf("blk: %d order: \n", b); for(Int ii = 0; ii < blk_size; ii++) { printf("%d, ", tempp(ii)); } #endif //Add to the bigger perm vector for(Int ii = 0; ii < blk_size; ii++) { //printf("loc: %d val: %d \n", //ii+btf_tabs(b), tempp(ii)+btf_tabs(b)); p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b); } FREE_INT_1DARRAY(tempp); }//over all blk_tabs #ifdef BASKER_DEBUG_AMD_ORDER printf("blk amd final order\n"); for(Int ii = 0; ii < M.ncol; ii++) { printf("%d, ", p(ii)); } printf("\n"); #endif FREE_INT_1DARRAY(temp_col); FREE_INT_1DARRAY(temp_row); }//end blk_amd()
void Basker<Int,Entry,Exe_Space>::blk_amd(BASKER_MATRIX &M, INT_1DARRAY p) { //p == length(M) //Scan over all blks //Note, that this needs to be made parallel in the //future (Future Josh will be ok with this, right?) //This is a horrible way to do this!!!!! //KLU does this very nice, but they also make all the little blks INT_1DARRAY temp_col; MALLOC_INT_1DARRAY(temp_col, M.ncol+1); INT_1DARRAY temp_row; MALLOC_INT_1DARRAY(temp_row, M.nnz); for(Int b = btf_tabs_offset; b < btf_nblks; b++) { Int blk_size = btf_tabs(b+1) - btf_tabs(b); if(blk_size < 3) { //printf("debug, blk_size: %d \n", blk_size); for(Int ii = 0; ii < blk_size; ++ii) { //printf("set %d \n", btf_tabs(b)+ii-M.scol); p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol; } continue; } INT_1DARRAY tempp; MALLOC_INT_1DARRAY(tempp, blk_size+1); //Fill in temp matrix Int nnz = 0; Int column = 1; temp_col(0) = 0; for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++) { for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++) { if(M.row_idx(i) < btf_tabs(b)) continue; temp_row(nnz) = M.row_idx(i) - btf_tabs(b); nnz++; }// end over all row_idx temp_col(column) = nnz; column++; }//end over all columns k #ifdef BASKER_DEBUG_ORDER_AMD printf("col_ptr: "); for(Int i = 0 ; i < blk_size+1; i++) { printf("%d, ", temp_col(i)); } printf("\n"); printf("row_idx: "); for(Int i = 0; i < nnz; i++) { printf("%d, ", temp_row(i)); } printf("\n"); #endif BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), &(temp_row(0)),&(tempp(0))); #ifdef BASKER_DEBUG_ORDER_AMD printf("blk: %d order: \n", b); for(Int ii = 0; ii < blk_size; ii++) { printf("%d, ", tempp(ii)); } #endif //Add to the bigger perm vector for(Int ii = 0; ii < blk_size; ii++) { //printf("loc: %d val: %d \n", //ii+btf_tabs(b), tempp(ii)+btf_tabs(b)); p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b); } FREE_INT_1DARRAY(tempp); }//over all blk_tabs #ifdef BASKER_DEBUG_AMD_ORDER printf("blk amd final order\n"); for(Int ii = 0; ii < M.ncol; ii++) { printf("%d, ", p(ii)); } printf("\n"); #endif FREE_INT_1DARRAY(temp_col); FREE_INT_1DARRAY(temp_row); }//end blk_amd()