local void sort_matrix (entry** m,lie_Index n,lie_Index c) { if (n>=3) { lie_Index i=split_mat(m,n,c); sort_matrix(m,i,c); sort_matrix(&m[i+1],n-i-1,c); } else if (n==2 && (*compare)(m[0],m[1],c)<0) swap_rows(m,m+1); }
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::InitMatrix(Int nrow, Int ncol, Int nnz, Int *col_ptr, Int *row_idx, Entry *val) { A.init_matrix("Original Matrix", nrow, ncol, nnz, col_ptr, row_idx, val); A.scol = 0; A.srow = 0; sort_matrix(A); matrix_flag = true; return 0; }//end InitMatrix (int, int , int, int *, int *, entry *)
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::Symbolic(Int nrow, Int ncol, Int nnz, Int *col_ptr, Int *row_idx, Entry *val) { //Init Matrix A. if(matrix_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN SYMBOLIC\n"); return BASKER_ERROR; } else { A.init_matrix("Original Matrix", nrow, ncol, nnz, col_ptr, row_idx, val); A.scol = 0; A.srow = 0; sort_matrix(A); matrix_flag = BASKER_TRUE; } //Init Ordering //Always will do btf_ordering //This should also call create tree if(order_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN ORDER\n"); return BASKER_ERROR; } else { printf("btf_order called \n"); //btf_order(); btf_order2(); if(btf_tabs_offset != 0) { basker_barrier.init(num_threads, 16, tree.nlvls ); } order_flag = BASKER_TRUE; //printf("btf_order done \n"); } //printf("\n\n+++++++++++++++BREAKER BREAKER++++++++\n\n"); if(symb_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN SFACTOR\n"); return BASKER_ERROR; } else { sfactor(); symb_flag = BASKER_TRUE; } //printf("\nTEST ALM\n"); //ALM(0)(0).info(); //printf("\n"); return 0; }//end Symbolic()
void Qksortmat(matrix* m,cmpfn_tp criterion) { compare=set_ordering(criterion,m->ncols,defaultgrp); sort_matrix(m->elm,m->nrows,m->ncols); }
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::test_solve() { ENTRY_1DARRAY x_known; ENTRY_1DARRAY x; ENTRY_1DARRAY y; #ifdef BASKER_DEBUG_SOLVE_RHS printf("test_solve called \n"); printf("Global pivot permuation\n"); printVec(gperm, gn); printf("\n"); printf("Global pivot permutation inverse\n"); printVec(gpermi, gn); printf("\n"); #endif BASKER_ASSERT(gn > 0, "solve testsolve gn"); MALLOC_ENTRY_1DARRAY(x_known, gn); init_value(x_known, gn , (Entry)1.0); //temp for(Int i = 0; i < gn; i++) { //x_known(i) = (Entry)(i+1); x_known(i) = (Entry) 1.0; } //JDB: used for other test //permute(x_known, order_csym_array, gn); MALLOC_ENTRY_1DARRAY(x, gn); init_value(x, gn, (Entry) 0.0); BASKER_ASSERT(gm > 0, "solve testsolve gm"); MALLOC_ENTRY_1DARRAY(y, gm); init_value(y, gm, (Entry) 0.0); if(btf_nblks > 0) { sort_matrix(BTF_C); //printMTX("C_BEFORE_SOLVE.mtx", BTF_C); } if(Options.btf == BASKER_TRUE) { //printf("btf_tabs_offset: %d ", btf_tabs_offset); //printf("btf_nblks: %d \n", btf_nblks); if(btf_tabs_offset != 0) { //printf("BTF_A spmv\n"); spmv(BTF_A, x_known,y); if(btf_nblks> 1) { //printf("btf_B spmv \n"); spmv(BTF_B, x_known, y); } } if(btf_nblks > 1) { //printf("btf_c spmv \n"); spmv(BTF_C, x_known, y); } //return -1; } else { //printf("other\n"); //spmv(BTF_A, x_known,y); } //printf("\n Before Test Points \n"); //printf("i: %d x: %f y: %f \n", 0, x_known(0), y(0)); //if(gn > 24) // { // printf("i: %d x: %f y: %f \n", 24, x_known(24), y(24)); // } //pivot permuation //printVec("gperm.csc", gpermi, gn); for(Int i = 0; i < gn; i++) { x(gpermi(i)) = y(i); } for(Int i = 0; i < gn; i++) { y(i) = x(i); x(i) = 0; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); //printf("Known Solution: \n"); //for(Int i = 0; i < gn; i++) // { // printf("%f, " , x_known(i)); // } printf("\n\n"); printf("RHS: \n"); for(Int i =0; i < gm; i++) { printf("%d %f,\n ", i, y(i)); } printf("\n\n"); #endif if(Options.btf == BASKER_FALSE) { //printf("before serial solve\n"); if(btf_tabs_offset != 0) { serial_solve(y,x); } //printf("After serial solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } else { //A\y -> y //serial_btf_solve(y,x); //printf("before btf serial solve\n"); serial_btf_solve(y,x); //printf("After btf solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } Entry diff =0.0; for(Int i = 0; i < gn; i++) { diff += (x_known(i) - x(i)); } diff = diff/(Entry) gn; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); printf("Solve Compare: \n"); for(Int i = 0; i < gn; i++) { printf("%d %f %f \n", i, x_known(i), x(i)); } printf("\n\n"); #endif printf("\n Test Points \n"); printf("i: %d x: %f %f \n", 0, x_known(0), x(0)); if(gn > 24) { printf("i: %d x: %f %f \n", 10, x_known(10), x(10)); printf("i: %d x: %f %f \n", 24, x_known(24), x(24)); } printf("\n"); printf("TEST_SOLVE: ||x-x||/||x| = %e", diff); printf("\n"); if((diff > -1e-2) && (diff < 1e-2)) { printf("TEST PASSED \n"); } return 0; }//end test_solve
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::btf_order() { //1. Matching ordering on whole matrix //currently finds matching and permutes //found bottle-neck to work best with circuit problems sort_matrix(A); //printMTX("A_nonmatch.mtx", A); match_ordering(0); //printf("DEBUG1: done match\n"); //for debuging sort_matrix(A); //printMTX("A_match.mtx", A); //2. BTF ordering on whole matrix // Gets estimate of work on all blocks //currently finds btf-hybrid and permutes //A -> [BTF_A, BTF_C; 0 , BTF B] printf("outter num_threads:%d \n", num_threads); MALLOC_INT_1DARRAY(btf_schedule, num_threads+1); init_value(btf_schedule, num_threads+1, 0); find_btf(A); if(btf_tabs_offset != 0) { // printf("A/B block stuff called\n"); //3. ND on BTF_A //currently finds ND and permute BTF_A //Would like to change so finds permuation, //and move into 2D-Structure //printMTX("A_BTF_FROM_A.mtx", BTF_A); sort_matrix(BTF_A); scotch_partition(BTF_A); //need to do a row perm on BTF_B too if(btf_nblks > 1) { permute_row(BTF_B, part_tree.permtab); } //needed because moving into 2D-Structure, //assumes sorted columns sort_matrix(BTF_A); if(btf_nblks > 1) { sort_matrix(BTF_B); sort_matrix(BTF_C); } //For debug //printMTX("A_BTF_PART_AFTER.mtx", BTF_A); //4. Init tree structure //This reduces the ND ordering into that fits, //thread counts init_tree_thread(); //5. Permute BTF_A //Constrained symamd on A INT_1DARRAY cmember; MALLOC_INT_1DARRAY(cmember, BTF_A.ncol+1); init_value(cmember,BTF_A.ncol+1,(Int) 0); for(Int i = 0; i < tree.nblks; ++i) { for(Int j = tree.col_tabs(i); j < tree.col_tabs(i+1); ++j) { cmember(j) = i; } } //INT_1DARRAY csymamd_perm = order_csym_array; MALLOC_INT_1DARRAY(order_csym_array, BTF_A.ncol+1); //MALLOC_INT_1DARRAY(csymamd_perm, BTF_A.ncol+1); init_value(order_csym_array, BTF_A.ncol+1,(Int) 0); //init_value(csymamd_perm, BTF_A.ncol+1,(Int) 0); csymamd_order(BTF_A, order_csym_array, cmember); //csymamd_order(BTF_A, csymamd_perm, cmember); //permute(BTF_A, csymamd_perm, csymamd_perm); permute_col(BTF_A, order_csym_array); sort_matrix(BTF_A); permute_row(BTF_A, order_csym_array); sort_matrix(BTF_A); //printMTX("A_BTF_AMD.mtx", BTF_A); if(btf_nblks > 1) { permute_row(BTF_B, order_csym_array); sort_matrix(BTF_B); //printMTX("B_BTF_AMD.mtx", BTF_B); sort_matrix(BTF_C); //printMTX("C_BTF_AMD.mtx", BTF_C); } //6. Move to 2D Structure //finds the shapes for both view and submatrices, //need to be changed over to just submatrices matrix_to_views_2D(BTF_A); //finds the starting point of A for submatrices find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS kokkos_order_init_2D<Int,Entry,Exe_Space> iO(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); Kokkos::fence(); #else //Comeback #endif //printMTX("BTF_A.mtx", BTF_A); }//if btf_tab_offset == 0 if(btf_nblks > 1) { sort_matrix(BTF_C); //printMTX("C_TEST.mtx", BTF_C); //Permute C MALLOC_INT_1DARRAY(order_c_csym_array, BTF_C.ncol+1); init_value(order_c_csym_array, BTF_C.ncol+1,(Int) 0); printf("BEFORE \n"); //csymamd_order(BTF_C, order_c_csym_array, cmember); blk_amd(BTF_C, order_c_csym_array); printf("After perm\n"); permute_col(BTF_C, order_c_csym_array); sort_matrix(BTF_C); permute_row(BTF_C, order_c_csym_array); sort_matrix(BTF_C); if(btf_tabs_offset != 0) { permute_col(BTF_B, order_c_csym_array); sort_matrix(BTF_B); //printMTX("BTF_B.mtx", BTF_B); } //printMTX("BTF_C.mtx", BTF_C); } //printf("Done with ordering\n"); return 0; }//end btf_order
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::find_btf2 ( BASKER_MATRIX &M ) { Int nblks = 0; strong_component(M,nblks,order_btf_array,btf_tabs); btf_nblks = nblks; btf_flag = BASKER_TRUE; //#ifdef BASKER_DEBUG_ORDER_BTF printf("BTF nblks returned: %d \n", nblks); //BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS"); //#endif #ifdef BASKER_DEBUG_ORDER_BTF if(nblks<2) { printf("BTF did not find enough blks\n"); } #endif //#ifdef BASKER_DEBUG_ORDER_BTF /* printf("\nBTF perm: \n"); for(Int i=0; i <M.nrow; i++) { printf("%d, ", order_btf_array(i)); //printf("%d, ", btf_perm(i)); } */ printf("num_threads: %d \n", num_threads); printf("\n\nBTF tabs: \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", btf_tabs(i)); } printf("\n"); // #endif permute_col(M, order_btf_array); permute_row(M, order_btf_array); MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol); init_value(order_blk_amd_array, M.ncol, (Int)0); MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1); init_value(btf_blk_nnz, nblks+1, (Int) 0); MALLOC_INT_1DARRAY(btf_blk_work, nblks+1); init_value(btf_blk_work, nblks+1, (Int) 0); //Find AMD blk ordering, get nnz, and get work btf_blk_amd( M, order_blk_amd_array, btf_blk_nnz, btf_blk_work); #ifdef BASKER_DEBUG_ORDER_BTF printf("blk_perm:\n"); for(Int i = 0; i < M.ncol; i++) { printf("(%d,%d) ", i, order_blk_amd_array(i)); } printf("\n"); printf("id/blk_size/blk_nnz/work: \n"); for(Int i = 0; i < nblks; i++) { printf("(%d, %d, %d, %d) ", i, btf_tabs(i+1)-btf_tabs(i), btf_blk_nnz(i), btf_blk_work(i)); } printf("\n"); #endif //printMTX("A_BEFORE.mtx", M); //printVec("AMD.txt", order_blk_amd_array, M.ncol); permute_col(M, order_blk_amd_array); permute_row(M, order_blk_amd_array); sort_matrix(M); //changed col to row, error. //print to see issue //printMTX("A_AMD.mtx", M); break_into_parts2(M, nblks, btf_tabs); //find schedule find_btf_schedule(M, nblks, btf_tabs); #ifdef BASKER_DEBUG_ORDER_BTF printf("------------BTF CUT: %d --------------\n", btf_tabs(btf_tabs_offset)); #endif return 0; }//end find BTF(nnz)
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::Factor(Int nrow, Int ncol, Int nnz, Int *col_ptr, Int *row_idx, Entry *val) { int err = 0; if (Options.verbose == BASKER_TRUE) { std::cout << "Basker Factor Called" << std::endl; std::cout << "Matrix: " << nrow << " " << ncol << " " << nnz << std::endl; } /* int err = A.copy_values(nrow, ncol, nnz, col_ptr, row_idx, val); */ if((Options.same_pattern == BASKER_TRUE) && (Options.no_pivot == BASKER_FALSE)) { printf("Warning: Same Pattern will not allow pivoting\n"); Options.no_pivot = BASKER_TRUE; } if(Options.transpose == BASKER_FALSE) { //printf("=======NO TRANS=====\n"); //A.init_matrix("Original Matrix", // nrow, ncol, nnz, col_ptr, row_idx, val); //A.scol = 0; //A.srow = 0; A.copy_values(nrow, ncol, nnz, col_ptr, row_idx, val); //printf("Copy done\n"); //printMTX("A_LOAD.mtx", A); } else { //printf("======TRANS=====\n"); //Will transpose and put in A using little extra matrix_transpose(0, nrow, 0, ncol, nnz, col_ptr, row_idx, val, A); } sort_matrix(A); if(Options.verbose_matrix_out == BASKER_TRUE) { printMTX("A_Factor.mtx", A); } matrix_flag = BASKER_TRUE; if(err == BASKER_ERROR) { return BASKER_ERROR; } //err = sfactor_copy(); err = sfactor_copy2(); if (Options.verbose == BASKER_TRUE) { printf("Basker Copy Structure Done \n"); } //printf("Done with sfactor_copy: %d \n", err); if(err == BASKER_ERROR) { return BASKER_ERROR; } //printf("before notoken\n"); //Kokkos::Impl::Timer timer; if(Options.incomplete == BASKER_FALSE) { err = factor_notoken(0); //printf("Notoken called\n"); } else { err = factor_inc_lvl(0); } if(err == BASKER_ERROR) { return BASKER_ERROR; } if(Options.verbose == BASKER_TRUE) { printf("Basker Factor Done \n"); } /* std::cout << "Raw Factor Time: " << timer.seconds() << std::endl; */ //DEBUG_PRINT(); // NDE MALLOC_ENTRY_1DARRAY(x_view_ptr_copy, gn); //used in basker_solve_rhs - move alloc MALLOC_ENTRY_1DARRAY(y_view_ptr_copy, gm); MALLOC_INT_1DARRAY(perm_inv_comp_array , gm); //y MALLOC_INT_1DARRAY(perm_comp_array, gn); //x MALLOC_INT_1DARRAY(perm_comp_iworkspace_array, gn); MALLOC_ENTRY_1DARRAY(perm_comp_fworkspace_array, gn); permute_composition_for_solve(); factor_flag = BASKER_TRUE; return 0; }//end Factor()
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::Symbolic(Int nrow, Int ncol, Int nnz, Int *col_ptr, Int *row_idx, Entry *val) { // printf("befor symbolic\n"); if(Options.verbose == BASKER_TRUE) { std::cout << "Basker Symbolic" << std::endl; std::cout << "Matrix: " << nrow << " " << ncol << " " << nnz << std::endl; } //Init Matrix A. if(matrix_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN SYMBOLIC\n"); return BASKER_ERROR; } else { //Kokkos::Impl::Timer timer_move; if(Options.transpose == BASKER_FALSE) { //printf("=======NO TRANS=====\n"); A.init_matrix("Original Matrix", nrow, ncol, nnz, col_ptr, row_idx, val); A.scol = 0; A.srow = 0; } else { //printf("======TRANS=====\n"); //Will transpose and put in A using little extra matrix_transpose(0, nrow, 0, ncol, nnz, col_ptr, row_idx, val, A); } sort_matrix(A); if(Options.verbose == BASKER_TRUE) { printf("Basker Matrix Loaded \n"); } if(Options.verbose_matrix_out == BASKER_TRUE) { printMTX("A_Symbolic.mtx", A); } matrix_flag = BASKER_TRUE; //std::cout << "Transpose A: " << timer_move.seconds() // << std::endl; } //Init Ordering //Always will do btf_ordering //This should also call create tree if(order_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN ORDER\n"); return BASKER_ERROR; } else { //printf("btf_order called \n"); //btf_order(); Kokkos::Impl::Timer timer_order; /* if(Options.incomplete == BASKER_TRUE) { order_incomplete(); } else { btf_order2(); } */ btf_order2(); if(Options.verbose == BASKER_TRUE) { printf("Basker Ordering Found \n"); } //if(btf_tabs_offset != 0) if((Options.btf == BASKER_TRUE) && (btf_tabs_offset != 0)) { basker_barrier.init(num_threads, 16, tree.nlvls ); } order_flag = BASKER_TRUE; if(Options.verbose == BASKER_TRUE) { printf("Basker P2P Thread Barriers Init\n"); } //std::cout << "Time Order/Init arrays " // << timer_order.seconds() // << std::endl; //printf("btf_order done \n"); } //printf("\n\n+++++++++++++++BREAKER BREAKER++++++++\n\n"); if(symb_flag == BASKER_TRUE) { printf("YOU CANNOT RERUN SFACTOR\n"); return BASKER_ERROR; } else { if(Options.incomplete == BASKER_FALSE) { sfactor(); } else { sfactor_inc(); } if(Options.verbose == BASKER_TRUE) { printf("Basker Nonzero Counts Found \n"); } symb_flag = BASKER_TRUE; } if(Options.verbose == BASKER_TRUE) { printf("Basker Symbolic Done \n"); } //printf("\nTEST ALM\n"); //ALM(0)(0).info(); //printf("\n"); return 0; }//end Symbolic()
//unsigned long long fm_elim(int rows, int cols, int** A, int* c) unsigned long long fm_elim(Arena* arena, int rows, int cols, fix_p** A, fix_p* c) { //1 int r; int s; fix_p** t; fix_p* q; int n1; int n2; int i; int j; long br; long Br; int qj_lt_0; int s_p; fix_p** old_t; fix_p* old_q; int k; int l; int curr_row; fix_p tjr; fix_p* qj; fix_p* tj; r = cols; s = rows; t = check_out_matrix_copy(arena, A, rows, cols); q = check_out_vector_copy(arena, c, rows); printf("\n\nNew system\n"); // print_system(s, r, t, q); // printf("\n"); while(1){ //2 sort_matrix(s, r-1, &n1, &n2, t, q); printf("s: %d\nn1: %d\nn2: %d\n", s, n1, n2); printf("\n"); printf("After sort:\n"); print_system(s, r, t, q); printf("\n"); // 3 qj = q; for(j = 0; j < n2; j++){ tjr = fix_p_div(65536, t[j][r-1]); //65536 = 1 *qj = fix_p_mul(*qj, tjr); qj++; tj = t[j]; for(i = 0; i < r; i++){ *tj = fix_p_mul(*tj, tjr); tj++; } } printf("After div:\n"); print_system(s, r, t, q); printf("\n"); // printf("br: %f\tBr: %f\n", fix_p2double(br), fix_p2double(Br)); // printf("r == %d\n", r); // 5 if(r == 1){ // printf("r == 1\n"); // 4 br = 0; // printf("brs:\n"); if(n2 > n1){ for(j = n1; j < n2; j++){ if(q[j] > br){ br = q[j]; } } }else{ br = LONG_MIN; } Br = LONG_MAX; // printf("\nBrs:\n"); if(n1 > 0){ for(j = 0; j < n1; j++){ if(q[j] < Br){ Br = q[j]; } } }else{ Br = LONG_MAX; } qj_lt_0 = 0; for(i = n2; i < s; i++){ if(q[i] < 0){ qj_lt_0 = 1; break; } } // printf("br > Br: %d\nqj_lt_0: %d\n", br > Br, qj_lt_0); if(br > Br || qj_lt_0){ // printf("br > Br || qj_lt_0\n"); hand_back_matrix(arena, &t); hand_back_vector(arena, &q); return 0; }else{ // printf("not br > Br || qj_lt_0\n"); hand_back_matrix(arena, &t); hand_back_vector(arena, &q); return 1; } } // 6 s_p = s - n2 + n1 * (n2 - n1); if(s_p == 0){ // printf("s_p == 0\n"); // print_system(s, r, t, q); for(j = 0; j < n1; j++){ fix_p sum = 0; for(i = 0; i < r; i++){ sum += t[j][i]; } if(sum != q[j]){ // printf("i:%d j:%d sum:%f q:%f\n", i,j,fix_p2double(sum),fix_p2double(q[j])); // free_up_t_q(s,t,q); hand_back_matrix(arena, &t); hand_back_vector(arena, &q); return 0; } // printf("+ %f\n",fix_p2double(q[j])); } hand_back_matrix(arena, &t); hand_back_vector(arena, &q); return 1; } // printf("\n"); // 7 old_t = t; old_q = q; // t = init_matrix(s_p, r - 1); // q = init_vector(s_p); t = check_out_matrix(arena, s_p); q = check_out_vector(arena, s_p); // printf("s_p: %d\n", s_p); // printf("\n"); // print_system(s, r, old_t, old_q); // printf("\n"); curr_row = 0; fix_p old_qk; fix_p* old_tk; fix_p* old_tl; for(k = 0; k < n1; k++){ old_qk = old_q[k]; old_tk = old_t[k]; for(l = n1; l < n2; l++){ old_tl = old_t[l]; for(i = 0; i < r-1; i++){ // printf("%ld\n",t[curr_row][i]); // printf("k:%d i:%d l:%d\n", k ,i, l); t[curr_row][i] = old_tk[i] - old_tl[i]; } q[curr_row] = old_qk - old_q[l]; curr_row++; } } for(j = n2; j < s; j++){ for(i = 0; i < r-1; i++){ t[curr_row][i] = old_t[j][i]; } q[curr_row] = old_q[j]; curr_row++; } hand_back_matrix(arena, &old_t); hand_back_vector(arena, &old_q); r = r - 1; s = s_p; // printf("\n"); // print_system(s, r, t, q); // printf("\n"); } }