int main(int argc, char **argv) { int success; if (argc != 4) { fprintf(stderr, "Usage: %s <vector size in K> <seq sort size in K> <seq merge size in K>\n", argv[0]); return 1; } N = atol(argv[1]) * 1024L; MIN_SORT_SIZE = atol(argv[2]) * 1024L; MIN_MERGE_SIZE = atol(argv[3]) * 1024L; T *data = malloc(N*sizeof(T)); T *tmp = malloc(N*sizeof(T)); double init_time = omp_get_wtime(); initialize(N, data); touch(N, tmp); init_time = omp_get_wtime() - init_time; double sort_time = omp_get_wtime(); multisort(N, data, tmp); sort_time = omp_get_wtime() - sort_time; success = check_solution(N, data); if (!success) printf ("SORTING FAILURE\n"); else printf ("SORTING SUCCESS\n"); fprintf(stdout, "Multisort program (using %d threads)\n", omp_get_num_threads() ); fprintf(stdout, " Initialization time in seconds = %g\n", init_time); fprintf(stdout, " Multisort time in seconds = %g\n", sort_time); fprintf(stdout, "\n"); return 0; }
int main(int argc, char **argv) { char **grid; if (error_check(argc, argv)) { print_error(); return (0); } grid = set_grid_parameters(argv); if (grid == NULL) print_error(); sudoku(grid); if (grid[9] == ERROR) print_error(); else { grid[9] = UNIQUE; if (sudoku(grid) && check_solution(grid)) print_grid(grid); else print_error(); } free(grid); return (0); }
int solve_sudoku(sudoku_board board, int side){ int i=0; while (1){ //printf("\tALONE\n"); if (alone(board, side)) continue; if (check_solution(board, side)==0) break; //printf("\t\tSINGLETON\n"); if (singleton(board, side, 0, 0, side, side)) continue; if (check_solution(board, side)==0) break; //printf("\t\t\tPAIRS\n"); if (pairs(board, side, 0, 0, side, side)) continue; if (check_solution(board, side)==0) break; //print_board(board, side); //printf("\t\t\t\tBAD POSSIBLE\n"); i=bad_possible_elimination(board, side); //printf(" %d\n", i); if(i) continue; if (check_solution(board, side)==0) break; //printf("\t\t\t\t\tRECURSIVE\n"); recursive_solution(board, side, 0, 0); break; } /*switch (check_solution(board, side)) { case 0: printf("\n--OK--\n"); break; case 1: printf("\n--ERRORS--\n"); break; case 2: printf("\n--MISSES--\n"); break; default: break; }*/ return 0; }
int recursive_solution(sudoku_board board, int side, int row, int column){ int i=0, test, ret; // char aux; //print_board(side); //printf("row= %d column=%d\n", row+1, column+1); // scanf("%c", &aux); test = check_solution(board, side); //printf("test= %d\n", test); if (test == 0) { //VALID BOARD //printf("\t1-VALID BOARD\n"); return 0; } else{ //ERRORS FOUND IN BOARD //printf("\t2-INVALID BORRD\n"); if (get_value(&board[row][column])) { //printf("\t\t3-FILLED WITH %d\n",board[row][column].value); return next(board, side, row, column); } else { //printf("\t\t4-NOT FILLED \n"); for (i=0; i < side; ++i) { //printf("\t\t\ti= %d\n",i+1); //printf("\t\t\tpossible?= %d\n",possible_value(board, side, row, column, i+1)); if (possible_value(board, side, row, column, i+1)) { //printf("\t\t\t\tTRY i= %d \n", i+1); try_value(&board[row][column], i+1); ret = next(board, side, row, column); //printf("\t\t\t\tNEXT RETURN= %d \n", ret); if (ret) { //printf("\t\t\t\t\t7-CONTINUE\n"); continue; } else { //printf("\t\t\t\t\t6-OK BOARD FOUND\n"); return 0; } } else { //printf("\t\t\t\t5-IMPOSSIBLE i=%d \n", i+1); continue; } } //printf("\t\t8- ALL TRIED AND FAIL PREVEW \n"); try_value(&board[row][column], 0); return 1; } return 1; } return 0; }
int next(sudoku_board board, int side, int row, int column){ if (row == (side-1) && column == (side-1)){ return check_solution(board, side); } if(column == (side-1)){ return recursive_solution(board, side, row+1, 0); } else { return recursive_solution(board, side, row, column+1); } }
int testing_cgetmi(int argc, char **argv){ PLASMA_Complex32_t *A, *B; int m, n, mb, nb; int i, ret, size; /* Check for number of arguments*/ if (argc != 4){ USAGE("GETMI", "M N MB NB ntdbypb with \n", " - M : the number of rows of the matrix \n" " - N : the number of columns of the matrix \n" " - MB : the number of rows of each block \n" " - NB : the number of columns of each block \n"); return -1; } m = atoi(argv[0]); n = atoi(argv[1]); mb = atoi(argv[2]); nb = atoi(argv[3]); size = m*n*sizeof(PLASMA_Complex32_t); A = (PLASMA_Complex32_t *)malloc(size); B = (PLASMA_Complex32_t *)malloc(size); LAPACKE_clarnv_work(1, ISEED, m*n, A); for(i=0; i<6; i++) { memcpy(B, A, size); printf(" - TESTING CGETMI (%4s) ...", formatstr[i]); ret = PLASMA_cgetmi( m, n, A, format[i], mb, nb ); if (ret != PLASMA_SUCCESS) { printf("Failed\n"); continue; } if ( check_solution(m, n, mb, nb, B, A, (int (*)(int, int, int, int, int, int))formatmap[i]) == 0 ) printf("............ PASSED !\n"); else printf("... FAILED !\n"); } free( A ); free( B ); return 0; }
int ombre_check_cylindre(t_ray *st, int i) { st->a = pow(st->l_x, 2) + pow(st->l_z, 2); st->b = 2 * (st->l_x * (st->p_x - st->info.cylindre[i].x) + st->l_z * (st->p_z - st->info.cylindre[i].z)); st->c = pow(st->p_x - st->info.cylindre[i].x, 2) + pow(st->p_z - st->info.cylindre[i].z, 2) - pow(st->info.cylindre[i].rayon, 2); st->d = pow(st->b, 2) - (4 * st->a * st->c); if (st->d <= 0) return (0); if (check_solution(st) == 0) return (0); if (st->t < st->l) return (1); return (0); }
void backtrack(int a[], int k, int n) { int c[N]; int candidates; if(check_solution(a,k,n)) process_solution(a,k); else{ k=k+1; construct_candidates(a,k,c,&candidates); for(int i=0;i<candidates;i++){ a[k]=c[i]; backtrack(a,k,n); /* if(FALSE) return; */ } } }
/*It solves the problem. If we are deleteting duplicates, it checks that the solution is valid.*/ int solve(Satellite *sats, int *combination, int *solution) { int i, j, k; long long int combs; int n; int valid; float r, num; combs = number_of_combinations(sats); // printf("Combinations: %ld\n", combs); get_golden_index_max(sats); for (k = 0; k < nsats; k++) { combination[k] = 0; // We start from the combination 1 1 .. 1 for (j = 0; j < sats[k].golden_index - 1; j++) { // delete_duplicates(sats, j, k); } } // print_F_matrix(sats); for (i = 1; i < combs; i++) { printf("%d ", i); next_combination(sats, combination); valid = check_solution(sats, combination); if (!valid) continue; // print_array("comb", combination, nsats); n = total_occurrences(sats, combination); // printf("tic: %.2f\n", tic); r = total_reward(sats, combination); num = r / n; printf("%.2f\n", num); if (num > max) { max = num; copy_solution(combination, solution); } } return 0; }
/* main() * * Main function for the program. Takes a file containing the puzzle to be * checked as an argument or from stdin and creates a new 9x9 2-Dimensional * array to store the solution while it is being examined. Calls functions to * read in the solution and check the puzzle. Calls exit(0) if the program * returns from the check function successfully to indicate that the puzzle was * indeed correct. */ int main(int argc, char *argv[]) { FILE *srcfile; UArray2_T solution = UArray2_new(9, 9, sizeof(int)); assert(argc <= 2); if (argc == 1) { srcfile = stdin; assert(srcfile != NULL); } else { srcfile = fopen (argv[1], "rb"); assert(srcfile != NULL); } read_in_solution(solution, srcfile); fclose(srcfile); check_solution(solution); UArray2_free(&solution); exit(0); }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_Complex32_t *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA CGESV */ info = PLASMA_Alloc_Workspace_cgesv_incpiv(N, &L, &IPIV); info = PLASMA_cgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in CGESV example ! \n"); else printf("-- Run of CGESV example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); exit(0); }
int main(int argc, char **argv) { const char *url = NULL; const char *credit_addr = NULL; int opt; int platformidx = -1, deviceidx = -1; char *pend; int verbose = 1; int interval = 90; int nthreads = 0; int worksize = 0; int nrows = 0, ncols = 0; int invsize = 0; int verify_mode = 0; int safe_mode = 0; char *devstrs[MAX_DEVS]; int ndevstrs = 0; vg_context_t *vcp = NULL; vg_ocl_context_t *vocp = NULL; int res; int thread_started = 0; pubkeybatch_t *active_pkb = NULL; float active_pkb_value = 0; server_context_t *scp = NULL; pubkeybatch_t *pkb; int was_sleeping = 0; struct timeval tv; struct timespec sleepy; pthread_mutex_init(&soln_lock, NULL); pthread_cond_init(&soln_cond, NULL); if (argc == 1) { usage(argv[0]); return 1; } while ((opt = getopt(argc, argv, "u:a:vqp:d:w:t:g:b:VD:Sh?i:")) != -1) { switch (opt) { case 'u': url = optarg; break; case 'a': credit_addr = optarg; break; case 'v': verbose = 2; break; case 'q': verbose = 0; break; case 'i': interval = atoi(optarg); if (interval < 10) { fprintf(stderr, "Invalid interval '%s'\n", optarg); return 1; } break; case 'p': platformidx = atoi(optarg); break; case 'd': deviceidx = atoi(optarg); break; case 'w': worksize = atoi(optarg); if (worksize == 0) { fprintf(stderr, "Invalid work size '%s'\n", optarg); return 1; } break; case 't': nthreads = atoi(optarg); if (nthreads == 0) { fprintf(stderr, "Invalid thread count '%s'\n", optarg); return 1; } break; case 'g': nrows = 0; ncols = strtol(optarg, &pend, 0); if (pend && *pend == 'x') { nrows = strtol(pend+1, NULL, 0); } if (!nrows || !ncols) { fprintf(stderr, "Invalid grid size '%s'\n", optarg); return 1; } break; case 'b': invsize = atoi(optarg); if (!invsize) { fprintf(stderr, "Invalid modular inverse size '%s'\n", optarg); return 1; } if (invsize & (invsize - 1)) { fprintf(stderr, "Modular inverse size must be " "a power of 2\n"); return 1; } break; case 'V': verify_mode = 1; break; case 'S': safe_mode = 1; break; case 'D': if (ndevstrs >= MAX_DEVS) { fprintf(stderr, "Too many OpenCL devices (limit %d)\n", MAX_DEVS); return 1; } devstrs[ndevstrs++] = optarg; break; default: usage(argv[0]); return 1; } } #if OPENSSL_VERSION_NUMBER < 0x10000000L /* Complain about older versions of OpenSSL */ if (verbose > 0) { fprintf(stderr, "WARNING: Built with " OPENSSL_VERSION_TEXT "\n" "WARNING: Use OpenSSL 1.0.0d+ for best performance\n"); } #endif curl_easy_init(); vcp = vg_prefix_context_new(0, 128, 0); vcp->vc_verbose = verbose; vcp->vc_output_match = output_match_work_complete; vcp->vc_output_timing = vg_output_timing_console; if (!url) { fprintf(stderr, "ERROR: No server URL specified\n"); return 1; } if (!credit_addr) { fprintf(stderr, "ERROR: No reward address specified\n"); return 1; } if (!vg_b58_decode_check(credit_addr, NULL, 0)) { fprintf(stderr, "ERROR: Invalid reward address specified\n"); return 1; } scp = server_context_new(url, credit_addr); scp->verbose = verbose; /* Get the initial bounty list, abort on failure */ if (server_context_getwork(scp)) return 1; /* Set up OpenCL */ res = 0; if (ndevstrs) { for (opt = 0; opt < ndevstrs; opt++) { vocp = vg_ocl_context_new_from_devstr(vcp, devstrs[opt], safe_mode, verify_mode); if (!vocp) { fprintf(stderr, "Could not open device '%s', ignoring\n", devstrs[opt]); } else { res++; } } } else { vocp = vg_ocl_context_new(vcp, platformidx, deviceidx, safe_mode, verify_mode, worksize, nthreads, nrows, ncols, invsize); if (vocp) res++; } if (!res) { vg_ocl_enumerate_devices(); return 1; } if (verbose > 1) dump_work(&scp->items); while (1) { if (avl_root_empty(&scp->items)) server_context_getwork(scp); pkb = most_valuable_pkb(scp); /* If the work item is the same as the one we're executing, keep it */ if (pkb && active_pkb && server_pubkeybatch_equal(scp, active_pkb, pkb)) pkb = active_pkb; if (thread_started && (!active_pkb || (pkb != active_pkb))) { /* If a thread is running, stop it */ vg_context_stop_threads(vcp); thread_started = 0; if (active_pkb) { check_solution(scp, active_pkb); active_pkb = NULL; } vg_context_clear_all_patterns(vcp); if (verbose > 1) dump_work(&scp->items); } if (!pkb) { if (!was_sleeping) { fprintf(stderr, "No work available, sleeping\n"); was_sleeping = 1; } } else if (!active_pkb) { workitem_t *wip; was_sleeping = 0; active_pkb_value = 0; vcp->vc_pubkey_base = pkb->pubkey; for (wip = workitem_avl_first(&pkb->items); wip != NULL; wip = workitem_avl_next(wip)) { fprintf(stderr, "Searching for pattern: \"%s\" " "Reward: %f Value: %f BTC/Gkey\n", wip->pattern, wip->reward, wip->value); vcp->vc_addrtype = wip->addrtype; if (!vg_context_add_patterns(vcp, &wip->pattern, 1)) { fprintf(stderr, "WARNING: could not add pattern\n"); } else { active_pkb_value += wip->value; } assert(vcp->vc_npatterns); } fprintf(stderr, "\nTotal value for current work: %f BTC/Gkey\n", active_pkb_value); res = vg_context_start_threads(vcp); if (res) return 1; thread_started = 1; active_pkb = pkb; } /* Wait for something to happen */ gettimeofday(&tv, NULL); sleepy.tv_sec = tv.tv_sec; sleepy.tv_nsec = tv.tv_usec * 1000; sleepy.tv_sec += interval; pthread_mutex_lock(&soln_lock); res = 0; if (!soln_private_key) res = pthread_cond_timedwait(&soln_cond, &soln_lock, &sleepy); pthread_mutex_unlock(&soln_lock); if (res == 0) { if (check_solution(scp, active_pkb)) active_pkb = NULL; } else if (res == ETIMEDOUT) { free_pkb_tree(&scp->items, active_pkb); } } return 0; }
int bad_possible_elimination(sudoku_board board, int side){ int i, j, k, n, m,aa=0, abc, alone_ok=0; sudoku_board test_board; // print_board(board, side); for (i=0; i<side; ++i) {//ROW for (j=0; j<side; ++j) {//COLUMN //printf("row=%d column=%d\n", i, j); //printf("\tvalue = %d\n", get_value(&board[i][j])); if (get_value(&board[i][j])==0) { for (k=0; k<side; ++k) {//POSSIBLE //printf("k=%d possible=%d\n", k, board[i][j].possible[k]); if (board[i][j].possible[k]){ test_board = create_board(side); for (n=0; n<side; ++n) for (m=0; m<side; ++m){ if (get_value(&board[n][m])) found_value(test_board, side, n, m, get_value(&board[n][m])); } found_value(test_board, side, i, j, board[i][j].possible[k]); alone_ok=0; for (n=0; n<side; ++n) { if ((board[i][n].possibles)==1) { for (m=0; m<side; ++m) { if (board[i][n].possible[m]) { alone_ok = 1; found_value(test_board, side, i, n, board[i][n].possible[m]); } } } if ((board[n][j].possibles)==1) { for (m=0; m<side; ++m) { if (board[n][j].possible[m]) { alone_ok = 1; found_value(test_board, side, n, j, board[n][j].possible[m]); } } } } if (singleton(test_board, side, i, j, i+1, j+1)) alone_ok = 1; if (pairs(test_board, side, i, j, i+1, j+1)) alone_ok = 1; if (alone_ok) { while (1){ if (alone(board, side)) continue; if (singleton(board, side, 0, 0, side, side)) continue; if (pairs(board, side, 0, 0, side, side)) continue; break; } } abc = check_solution(test_board, side); //printf("%d\n", aa); switch (abc) { case 0: found_value(board, side, i, j, board[i][j].possible[k]); delete_board(test_board, side); return -1; case 1: delete_possible(&board[i][j], board[i][j].possible[k], side); ++aa; break; default: break; } delete_board(test_board, side); } } } } } return aa; }
int optimize_transport(BASIS *basis) { /** Declare |optimize_transport| scalars */ int i_enter,j_enter; int arcindex; int halfnodes; int subroot; int prev; int orient; int status; double deltadual; /** Declare |optimize_transport| arrays */ int *family; int *pred; int *brother; int *son; status=init_basis(basis); /* construct initial basis */ if (status) return status; /** Define simplifications for |optimize_transport| */ family = basis->family; pred = basis->pred; brother = basis->brother; son = basis->son; halfnodes=basis->no_nodes/2; deltadual=find_entering_arc(basis,&i_enter,&j_enter,&arcindex); while(i_enter>=0){ /* there is an entering arc */ basis->pivot++; #ifdef PRINT printf("+-----------------------------------+\n"); printf("| pivot %3d |\n",basis->pivot); printf("+-----------------------------------+\n"); #endif subroot = find_cycle(basis,i_enter,j_enter,arcindex,&orient); #ifdef PRINT printf("\nentering arc : %d --> %d \n",i_enter,j_enter); printf(" arcindex : %d\n",arcindex); printf(" redcost : %8.4lf",deltadual); #endif /* update duals in the smaller subtrees */ if(family[subroot]<=halfnodes) update_dual(basis,subroot,(orient>0)?deltadual:-deltadual); else { /* separate trees, update duals, then reconnect */ basis->rerooted++; prev = pred[subroot]; /* remove root from successors of prev */ son[prev]= brother[subroot]; update_dual(basis,0,(orient>0)?-deltadual:deltadual); brother[subroot]=son[prev]; /* insert root to successors of prev */ son[prev]=subroot; } #ifdef PRINT print_basis(basis); #endif deltadual=find_entering_arc(basis,&i_enter,&j_enter,&arcindex); } status=check_solution(basis); return status; }
// Rebuild the solution_t data structure using 'x' void solution_reset (instance_t *inst, solution_t *s, int *x) { int i, j, k, l; // Empty all heaps for (j = 0; j < inst->m; j++) { s->besti1[j] = s->besti2[j] = -1; for (i = 0; i < inst->n; i++) s->heap[j][i] = s->heap_inv[j][i] = -1; } // Compute total opening costs and heap size s->heap_size = 0; s->total_cost = 0.0; if (NULL != x) { for (s->heap_size = i = 0; i < inst->n; i++) { s->x[i] = x[i]; if (x[i] == 1) { ++s->heap_size; s->total_cost += inst->f[i]; } } } // Populate heaps, compute move costs and total solution cost if (NULL == x || s->heap_size == 0) { // Special case where all locations are closed s->total_cost = inst->ub; for (i = 0; i < inst->n; i++) { s->x[i] = 0; s->flip_gain[i] = s->total_cost - inst->f[i]; for (j = 0; j < inst->m; j++) s->flip_gain[i] -= inst->c[i][j]; } } else { // General case // Populate heaps by simply adding all open locations in increasing service costs, // as this verifies the heap property. for (j = 0; j < inst->m; j++) { for (i = k = l = 0; k < inst->n && l < s->heap_size; k++) { i = inst->inc[j][k]; if (x[i] == 1) { s->heap[j][l] = i; s->heap_inv[j][i] = l; if (l == 0) { s->besti1[j] = i; s->total_cost += inst->c[i][j]; } else if (l == 1) s->besti2[j] = i; ++l; } } assert(l == s->heap_size); } // Compute move costs for (i = 0; i < inst->n; i++) { if (s->x[i] && s->heap_size == 1) { for (s->flip_gain[i] = - inst->ub + inst->f[i], j = 0; j < inst->m; j++) s->flip_gain[i] += inst->c[i][j]; } else if (s->x[i]) { assert(s->heap_size >= 2); for (s->flip_gain[i] = inst->f[i], j = 0; j < inst->m; j++) if (s->besti1[j] == i) s->flip_gain[i] -= inst->c[s->besti2[j]][j] - inst->c[i][j]; } else { assert(s->x[i] == 0 && s->heap_size >= 1); for (s->flip_gain[i] = -inst->f[i], j = 0; j < inst->m; j++) if (inst->c[s->besti1[j]][j] > inst->c[i][j]) s->flip_gain[i] += inst->c[s->besti1[j]][j] - inst->c[i][j]; } } } check_solution(inst, s); }
// Performs one iteration of the primal process void primal_run (primal_t *primal) { instance_t *inst = primal->inst; solution_t *s = primal->sol; int i, w, n, aspiration, n_best, n_free, n_diver; double best_gain; check_solution(inst, s); //% Algorithm 1 Step 4 (cont'd) // Perform moves to conform with the improving partial solution for (i = 0; i < inst->n; i++) if (primal->improving_partial_x[i] != -1 && primal->tabu[i] != INFINITY) { primal->tabu[i] = INFINITY; if (s->x[i] != primal->improving_partial_x[i]) { solution_flip(inst, s, i); check_solution(inst, s); ++primal->n_moves; if (s->total_cost < primal->best_z) { primal->n_moves_at_last_improvement = primal->n_moves; primal->best_z = s->total_cost; memcpy(primal->best_x, s->x, inst->n * sizeof(int)); } } } // Perturb tabu state using the guiding solution if (primal->n_moves_at_last_improvement + inst->request_period < primal->n_moves) { primal->n_moves_at_last_improvement = primal->n_moves; for (i = 0; i < inst->n; i++) if (primal->tabu[i] != INFINITY && s->x[i] == primal->guiding_x[i]) primal->tabu[i] = ((double) primal->n_moves) + primal->tabu_length; } // Analyze search state //% Algorithm 1 step 1 for (aspiration = 0, best_gain = -INFINITY, n_best = n_free = n_diver = 0, i = 0; i < inst->n; i++) if (primal->tabu[i] == INFINITY) { assert(s->x[i] == primal->improving_partial_x[i]); } else { ++n_free; // count the number of unfixed locations if ((primal->n_moves == 0 || (s->total_cost - s->flip_gain[i] <= primal->best_z - bound_eps)) && s->flip_gain[i] != INFINITY) aspiration = 1; // aspiration criterion is satisfied if ((double) primal->n_moves > primal->tabu[i] || aspiration) { ++n_diver; // count the number of non-tabu locations if (s->flip_gain[i] > best_gain + bound_eps && best_gain != INFINITY) n_best = 1, best_gain = s->flip_gain[i]; // store the best location to perform a move on else if (fabs(s->flip_gain[i] - best_gain) < 2.0 * bound_eps || (s->flip_gain[i] == INFINITY && best_gain == INFINITY)) ++n_best; // count the number of best locations } } // Select the 1OPT move to perform w = -1; if (best_gain <= bound_eps) { // if there exist no improving moves if (n_free == 0) { // if there exist no moves //% Algorithm 1 step 1(d) return; } if (n_diver == 0) { // if there exist no non-tabu moves // select one location at random //% Algorithm 1 step 1(c) n = RngStream_RandInt(primal->rng, 0, n_free - 1); for (i = 0; i < inst->n && w == -1; i++) if (primal->tabu[i] != INFINITY && n-- <= 0) w = i; } else { // there exist non-tabu moves // select one location at random //% Algorithm 1 step 1(b) n = RngStream_RandInt(primal->rng, 0, n_diver - 1); for (i = 0; i < inst->n && w == -1; i++) if (((double) primal->n_moves > primal->tabu[i] || aspiration) && n-- <= 0) w = i; } } else { // there exists at least one improving move // select one of the best at random //% Algorithm 1 step 1(a) assert(n_best > 0); n = RngStream_RandInt(primal->rng, 0, n_best - 1); for (w = -1, i = 0; i < inst->n && w == -1; i++) if (((double) primal->n_moves > primal->tabu[i] || aspiration) && (fabs(s->flip_gain[i] - best_gain) < 2.0 * bound_eps || (s->flip_gain[i] == INFINITY && best_gain == INFINITY)) && n-- <= 0) w = i; } // check that in all cases we have found something assert(w >= 0); assert(w < inst->n); assert(primal->tabu[w] != INFINITY); // Update search state //% Algorithm 1 step 2 tabu_tenure(primal, w, (s->flip_gain[w] > bound_eps)); //% Algorithm 1 step 3(a,b,c) solution_flip(inst, s, w); check_solution(inst, s); //% Algorithm 1 step 3(d) if (s->total_cost < primal->best_z) { // Update best known solution primal->n_moves_at_last_improvement = primal->n_moves; primal->best_z = s->total_cost; memcpy(primal->best_x, s->x, inst->n * sizeof(int)); } }
int main (int argc, char *argv[]) { int i, n, ib, nb, nz, nv, celldim, phydim; int nn, type, *elems = 0, idata[5]; cgsize_t ne; char *p, basename[33], title[65]; float value, *var; SOLUTION *sol; FILE *fp; if (argc < 2) print_usage (usgmsg, NULL); ib = 0; basename[0] = 0; while ((n = getargs (argc, argv, options)) > 0) { switch (n) { case 'a': ascii = 1; break; case 'b': ib = atoi (argarg); break; case 'B': strncpy (basename, argarg, 32); basename[32] = 0; break; case 'w': weighting = 1; break; case 'S': usesol = atoi (argarg); break; } } if (argind > argc - 2) print_usage (usgmsg, "CGNSfile and/or Tecplotfile not given"); if (!file_exists (argv[argind])) FATAL (NULL, "CGNSfile does not exist or is not a file"); /* open CGNS file */ printf ("reading CGNS file from %s\n", argv[argind]); nb = open_cgns (argv[argind], 1); if (!nb) FATAL (NULL, "no bases found in CGNS file"); if (*basename && 0 == (ib = find_base (basename))) FATAL (NULL, "specified base not found"); if (ib > nb) FATAL (NULL, "base index out of range"); cgnsbase = ib ? ib : 1; if (cg_base_read (cgnsfn, cgnsbase, basename, &celldim, &phydim)) FATAL (NULL, NULL); if (celldim != 3 || phydim != 3) FATAL (NULL, "cell and physical dimension must be 3"); printf (" using base %d - %s\n", cgnsbase, basename); if (NULL == (p = strrchr (argv[argind], '/')) && NULL == (p = strrchr (argv[argind], '\\'))) strncpy (title, argv[argind], sizeof(title)); else strncpy (title, ++p, sizeof(title)); title[sizeof(title)-1] = 0; if ((p = strrchr (title, '.')) != NULL) *p = 0; read_zones (); if (!nZones) FATAL (NULL, "no zones in the CGNS file"); /* verify dimensions fit in an integer */ for (nz = 0; nz < nZones; nz++) { if (Zones[nz].nverts > CG_MAX_INT32) FATAL(NULL, "zone size too large to write with integers"); if (Zones[nz].type == CGNS_ENUMV(Unstructured)) { count_elements (nz, &ne, &type); if (ne > CG_MAX_INT32) FATAL(NULL, "too many elements to write with integers"); } } nv = 3 + check_solution (); /* open Tecplot file */ printf ("writing %s Tecplot data to <%s>\n", ascii ? "ASCII" : "binary", argv[++argind]); if (NULL == (fp = fopen (argv[argind], ascii ? "w+" : "w+b"))) FATAL (NULL, "couldn't open Tecplot output file"); /* write file header */ if (ascii) fprintf (fp, "TITLE = \"%s\"\n", title); else { fwrite ("#!TDV75 ", 1, 8, fp); i = 1; write_ints (fp, 1, &i); write_string (fp, title); } /* write variables */ if (ascii) { fprintf (fp, "VARIABLES = \"X\", \"Y\", \"Z\""); if (usesol) { sol = Zones->sols; for (n = 0; n < sol->nflds; n++) fprintf (fp, ",\n\"%s\"", sol->flds[n].name); } } else { write_ints (fp, 1, &nv); write_string (fp, "X"); write_string (fp, "Y"); write_string (fp, "Z"); if (usesol) { sol = Zones->sols; for (n = 0; n < sol->nflds; n++) write_string (fp, sol->flds[n].name); } } /* write zones */ if (!ascii) { for (nz = 0; nz < nZones; nz++) { if (Zones[nz].type == CGNS_ENUMV(Structured)) { idata[0] = 0; /* BLOCK */ idata[1] = -1; /* color not specified */ idata[2] = (int)Zones[nz].dim[0]; idata[3] = (int)Zones[nz].dim[1]; idata[4] = (int)Zones[nz].dim[2]; } else { count_elements (nz, &ne, &type); idata[0] = 2; /* FEBLOCK */ idata[1] = -1; /* color not specified */ idata[2] = (int)Zones[nz].dim[0]; idata[3] = (int)ne; idata[4] = type; } value = 299.0; write_floats (fp, 1, &value); write_string (fp, Zones[nz].name); write_ints (fp, 5, idata); } value = 357.0; write_floats (fp, 1, &value); } for (nz = 0; nz < nZones; nz++) { printf (" zone %d...", nz+1); fflush (stdout); read_zone_grid (nz+1); ne = 0; type = 2; nn = (int)Zones[nz].nverts; var = (float *) malloc (nn * sizeof(float)); if (NULL == var) FATAL (NULL, "malloc failed for temp float array"); if (Zones[nz].type == CGNS_ENUMV(Unstructured)) elems = volume_elements (nz, &ne, &type); if (ascii) { if (Zones[nz].type == CGNS_ENUMV(Structured)) fprintf (fp, "\nZONE T=\"%s\", I=%d, J=%d, K=%d, F=BLOCK\n", Zones[nz].name, (int)Zones[nz].dim[0], (int)Zones[nz].dim[1], (int)Zones[nz].dim[2]); else fprintf (fp, "\nZONE T=\"%s\", N=%d, E=%d, F=FEBLOCK, ET=%s\n", Zones[nz].name, nn, (int)ne, type == 2 ? "TETRAHEDRON" : "BRICK"); } else { value = 299.0; write_floats (fp, 1, &value); i = 0; write_ints (fp, 1, &i); i = 1; for (n = 0; n < nv; n++) write_ints (fp, 1, &i); } for (n = 0; n < nn; n++) var[n] = (float)Zones[nz].verts[n].x; write_floats (fp, nn, var); for (n = 0; n < nn; n++) var[n] = (float)Zones[nz].verts[n].y; write_floats (fp, nn, var); for (n = 0; n < nn; n++) var[n] = (float)Zones[nz].verts[n].z; write_floats (fp, nn, var); if (usesol) { read_solution_field (nz+1, usesol, 0); sol = &Zones[nz].sols[usesol-1]; if (sol->location != CGNS_ENUMV(Vertex)) cell_vertex_solution (nz+1, usesol, weighting); for (nv = 0; nv < sol->nflds; nv++) { for (n = 0; n < nn; n++) var[n] = (float)sol->flds[nv].data[n]; write_floats (fp, nn, var); } } free (var); if (Zones[nz].type == CGNS_ENUMV(Unstructured)) { if (!ascii) { i = 0; write_ints (fp, 1, &i); } nn = 1 << type; for (i = 0, n = 0; n < ne; n++, i += nn) write_ints (fp, nn, &elems[i]); free (elems); } puts ("done"); } fclose (fp); cg_close (cgnsfn); return 0; }
int testing_dsyr2k(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYR2K", "alpha beta M N LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - N : number of columns and rows of matrix C and number of row of matrix A and B\n" " - K : number of columns of matrix A and B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } double alpha = (double) atol(argv[0]); double beta = (double) atol(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int NKmax = max(N, K); int NminusOne = N - 1; double eps; int info_solution; int info, u, t; size_t LDAxK = LDA*NKmax; size_t LDBxK = LDB*NKmax; size_t LDCxN = LDC*N; double *A = (double *)malloc(LDAxK*sizeof(double)); double *B = (double *)malloc(LDBxK*sizeof(double)); double *C = (double *)malloc(LDCxN*sizeof(double)); double *Cinit = (double *)malloc(LDCxN*sizeof(double)); double *Cfinal = (double *)malloc(LDCxN*sizeof(double)); double *WORK = (double *)malloc(2*LDC*sizeof(double)); double *D = (double *) malloc(LDC *sizeof(double)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Cinit) || (!Cfinal) || (!D) ){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA DSYR2K ROUTINE ------- \n"); printf(" Size of the Matrix C %d by %d\n", N, K); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING DSYR2K */ /* Initialize A,B */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_dlarnv_work(IONE, ISEED, LDBxK, B); /* Initialize C */ LAPACKE_dlarnv_work(IONE, ISEED, LDC, D); dlagsy(&N, &NminusOne, D, C, &LDC, ISEED, WORK, &info); free(D); free(WORK); for (u=0; u<2; u++) { for (t=0; t<2; t++) { memcpy(Cinit, C, LDCxN*sizeof(double)); memcpy(Cfinal, C, LDCxN*sizeof(double)); /* PLASMA DSYR2K */ PLASMA_dsyr2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING DSYR2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING DSYR2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_desc *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); return EXIT_SUCCESS; } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Allocate L and IPIV */ info = PLASMA_Alloc_Workspace_zgetrf_incpiv(N, N, &L, &IPIV); /* LU factorization of the matrix A */ info = PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); /* Solve the problem */ info = PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB); info = PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex64_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in ZGETRS example ! \n"); else printf("-- Run of ZGETRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); return EXIT_SUCCESS; }
int main(int argc, char** argv) { double res, resAtr, resFac; El::Initialize(argc, argv); bmpi::communicator world; int rank = world.rank(); skybase::context_t context(23234); // Setup problem and righthand side // Using Skylark's uniform generator (as opposed to Elemental's) // will insure the same A and b are generated regardless of the number // of processors. matrix_type A = skyutil::uniform_matrix_t<matrix_type>::generate(m, n, El::DefaultGrid(), context); matrix_type b = skyutil::uniform_matrix_t<matrix_type>::generate(m, 1, El::DefaultGrid(), context); regression_problem_type problem(m, n, A); boost::mpi::timer timer; double telp; sol_type x(n,1); rhs_type r(b); // Using QR timer.restart(); exact_solver_type<skyalg::qr_l2_solver_tag> exact_solver(problem); exact_solver.solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Exact (QR):\t\t\t||r||_2 = " << boost::format("%.2f") % res << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; double res_opt = res; skybase::Gemv(El::NORMAL, -1.0, problem.input_matrix, x, 1.0, r); // Using SNE (semi-normal equations) timer.restart(); exact_solver_type<skyalg::sne_l2_solver_tag>(problem).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Exact (SNE):\t\t\t||r||_2 = " << boost::format("%.2f") % res << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; res_opt = res; // Again, using SNE, only with the computed interface (example; to be removed.) cmatrix CA(A); regression_problem_type1 problem1(m, n, CA); timer.restart(); exact_solver_type1<skyalg::sne_l2_solver_tag>(problem1).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Exact (SNE) (COMPUTED):\t\t\t||r||_2 = " << boost::format("%.2f") % res << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; res_opt = res; // Using SVD timer.restart(); exact_solver_type<skyalg::svd_l2_solver_tag>(problem).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Exact (SVD):\t\t\t||r||_2 = " << boost::format("%.2f") % res << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; res_opt = res; // Using LSQR skyalg::krylov_iter_params_t lsqrparams; lsqrparams.am_i_printing = rank == 0; lsqrparams.log_level = 0; timer.restart(); exact_solver_type< skyalg::iterative_l2_solver_tag< skyalg::lsqr_tag > >(problem, lsqrparams) .solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Exact (LSQR):\t\t\t||r||_2 = " << boost::format("%.2f") % res << "\t\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; // Using sketch-and-solve #if 0 timer.restart(); sketched_solver_type<skysk::JLT_t>(problem, t, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Sketch-and-Solve (JLT):\t\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; #endif timer.restart(); sketched_solver_type<skysk::CWT_t>(problem, t, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Sketch-and-Solve (CWT):\t\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; timer.restart(); sketched_solver_type<skysk::FJLT_t>(problem, t, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Sketch-and-Solve (FJLT):\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; // Accelerate-using-sketching #if 0 timer.restart(); accelerated_exact_solver_type_sb<skysk::JLT_t>(problem, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Simplified Blendenpik (JLT):\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; #endif timer.restart(); accelerated_exact_solver_type_sb<skysk::FJLT_t>(problem, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Simplified Blendenpik (FJLT):\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; timer.restart(); accelerated_exact_solver_type_sb<skysk::CWT_t>(problem, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Simplified Blendenpik (CWT):\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; timer.restart(); accelerated_exact_solver_type_blendenpik(problem, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "Blendenpik:\t\t\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; timer.restart(); accelerated_exact_solver_type_lsrn(problem, context).solve(b, x); telp = timer.elapsed(); check_solution(problem, b, x, r, res, resAtr, resFac); if (rank == 0) std::cout << "LSRN:\t\t\t\t||r||_2 = " << boost::format("%.2f") % res << " (x " << boost::format("%.5f") % (res / res_opt) << ")" << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr << "\t\tTime: " << boost::format("%.2e") % telp << " sec" << std::endl; return 0; }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; double *A1 = (double *)malloc(LDA*N*sizeof(double)); double *A2 = (double *)malloc(LDA*N*sizeof(double)); double *B1 = (double *)malloc(LDB*NRHS*sizeof(double)); double *B2 = (double *)malloc(LDB*NRHS*sizeof(double)); double *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_dgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_dgeqrf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_dgeqrs(M, N, NRHS, A2, LDA, T, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in DGEQRS example ! \n"); else printf("-- Run of DGEQRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
int testing_dsposv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 4){ USAGE("CPOSV", "N LDA NRHS LDB", " - N : the size of the matrix\n" " - LDA : leading dimension of the matrix A\n" " - NRHS : number of RHS\n" " - LDB : leading dimension of the RHS B\n"); return -1; } int N = atoi(argv[0]); int LDA = atoi(argv[1]); int NRHS = atoi(argv[2]); int LDB = atoi(argv[3]); int ITER; double eps; int uplo; int info; int info_solution = 0; /*, info_factorization;*/ int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; double *A1 = (double *)malloc(LDA*N *sizeof(double)); double *A2 = (double *)malloc(LDA*N *sizeof(double)); double *B1 = (double *)malloc(LDB*NRHS*sizeof(double)); double *B2 = (double *)malloc(LDB*NRHS*sizeof(double)); double *WORK = (double *)malloc(2*LDA *sizeof(double)); double *D = (double *)malloc(LDA*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A1) || (!A2) || (!B1) || (!B2) ){ printf("Out of Memory \n "); exit(0); } eps = LAPACKE_dlamch_work('e'); /*------------------------------------------------------------- * TESTING DSPOSV */ /* Initialize A1 and A2 for Symmetric Positif Matrix (Hessenberg in the complex case) */ LAPACKE_dlarnv_work(IONE, ISEED, LDA, D); dlagsy(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); free(D); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i] + N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; printf("\n"); printf("------ TESTS FOR PLASMA DSPOSV ROUTINE ------ \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* PLASMA DSPOSV */ uplo = PlasmaLower; info = PLASMA_dsposv(uplo, N, NRHS, A2, LDA, B1, LDB, B2, LDB, &ITER); if (info != PLASMA_SUCCESS ) { printf("PLASMA_dsposv is not completed: info = %d\n", info); info_solution = 1; } else { printf(" Solution obtained with %d iterations\n", ITER); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); } if (info_solution == 0){ printf("***************************************************\n"); printf(" ---- TESTING DSPOSV ..................... PASSED !\n"); printf("***************************************************\n"); } else{ printf("***************************************************\n"); printf(" - TESTING DSPOSV .. FAILED !\n"); printf("***************************************************\n"); } free(A1); free(A2); free(B1); free(B2); free(WORK); return 0; }
int testing_zsymm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient \n" " - beta : beta coefficient \n" " - M : number of rows of matrices A and C \n" " - N : number of columns of matrices B and C \n" " - LDA : leading dimension of matrix A \n" " - LDB : leading dimension of matrix B \n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); PLASMA_Complex64_t beta = (PLASMA_Complex64_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int MNmax = max(M, N); double eps; int info_solution; int i, j, s, u; int LDAxM = LDA*MNmax; int LDBxN = LDB*N; int LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxM*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZSYMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZSYMM */ /* Initialize A */ PLASMA_zplgsy( (double)0., MNmax, A, LDA, 51 ); /* Initialize B */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); /* Initialize C */ LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C); for (s=0; s<2; s++) { for (u=0; u<2; u++) { /* Initialize Cinit / Cfinal */ for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA ZSYMM */ PLASMA_zsymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
bool Hungarian::solve() { int i, j, m, n, k, l, s, t, q, unmatched, cost; m = m_rows; n = m_cols; int INF = std::numeric_limits<int>::max(); //vertex alternating paths, vector<int> col_vertex(m), row_vertex(n), unchosen_row(m), parent_row(n), row_dec(m), col_inc(n), slack_row(m), slack(n); cost=0; for (i=0;i<m_rows;i++) { col_vertex[i]=0; unchosen_row[i]=0; row_dec[i]=0; slack_row[i]=0; } for (j=0;j<m_cols;j++) { row_vertex[j]=0; parent_row[j] = 0; col_inc[j]=0; slack[j]=0; } //Double check assignment matrix is 0 m_assignment.assign(m, vector<int>(n, HUNGARIAN_NOT_ASSIGNED)); // Begin subtract column minima in order to start with lots of zeroes 12 if (verbose) { fprintf(stderr, "Using heuristic\n"); } for (l=0;l<n;l++) { s = m_costmatrix[0][l]; for (k=1;k<m;k++) { if (m_costmatrix[k][l] < s) { s=m_costmatrix[k][l]; } cost += s; } if (s!=0) { for (k=0;k<m;k++) { m_costmatrix[k][l]-=s; } } //pre-initialize state 16 row_vertex[l]= -1; parent_row[l]= -1; col_inc[l]=0; slack[l]=INF; } // End subtract column minima in order to start with lots of zeroes 12 // Begin initial state 16 t=0; for (k=0;k<m;k++) { bool row_done = false; s=m_costmatrix[k][0]; for (l=0;l<n;l++) { if(l > 0) { if (m_costmatrix[k][l] < s) { s = m_costmatrix[k][l]; } row_dec[k]=s; } if (s == m_costmatrix[k][l] && row_vertex[l]<0) { col_vertex[k]=l; row_vertex[l]=k; if (verbose) { fprintf(stderr, "matching col %d==row %d\n",l,k); } row_done = true; break; } } if(!row_done) { col_vertex[k]= -1; if (verbose) { fprintf(stderr, "node %d: unmatched row %d\n",t,k); } unchosen_row[t++]=k; } } // End initial state 16 bool checked = false; // Begin Hungarian algorithm 18 //is matching already complete? if (t == 0) { checked = check_solution(row_dec, col_inc, col_vertex); if (checked) { //finish assignment, wrap up and done. bool assign = assign_solution(row_dec, col_inc, col_vertex); return true; } else { if(verbose) { fprintf(stderr, "Could not solve. Error.\n"); } return false; } } unmatched=t; while (1) { if (verbose) { fprintf(stderr, "Matched %d rows.\n",m-t); } q=0; bool try_matching; while (1) { while (q<t) { // Begin explore node q of the forest 19 k=unchosen_row[q]; s=row_dec[k]; for (l=0;l<n;l++) { if (slack[l]) { int del; del=m_costmatrix[k][l]-s+col_inc[l]; if (del<slack[l]) { if (del==0) { if (row_vertex[l]<0) { goto breakthru; } slack[l]=0; parent_row[l]=k; if (verbose){ fprintf(stderr, "node %d: row %d==col %d--row %d\n", t,row_vertex[l],l,k);} unchosen_row[t++]=row_vertex[l]; } else { slack[l]=del; slack_row[l]=k; } } } } // End explore node q of the forest 19 q++; } // Begin introduce a new zero into the matrix 21 s=INF; for (l=0;l<n;l++) { if (slack[l] && slack[l]<s) { s=slack[l]; } } for (q=0;q<t;q++) { row_dec[unchosen_row[q]]+=s; } for (l=0;l<n;l++) { //check slack if (slack[l]) { slack[l]-=s; if (slack[l]==0) { // Begin look at a new zero 22 k=slack_row[l]; if (verbose) { fprintf(stderr, "Decreasing uncovered elements by %d produces zero at [%d,%d]\n", s,k,l); } if (row_vertex[l]<0) { for (j=l+1;j<n;j++) if (slack[j]==0) { col_inc[j]+=s; } goto breakthru; } else { parent_row[l]=k; if (verbose) { fprintf(stderr, "node %d: row %d==col %d--row %d\n",t,row_vertex[l],l,k);} unchosen_row[t++]=row_vertex[l]; } // End look at a new zero 22 } } else { col_inc[l]+=s; } } // End introduce a new zero into the matrix 21 } breakthru: // Begin update the matching 20 if (verbose) { fprintf(stderr, "Breakthrough at node %d of %d!\n",q,t); } while (1) { j=col_vertex[k]; col_vertex[k]=l; row_vertex[l]=k; if (verbose) { fprintf(stderr, "rematching col %d==row %d\n",l,k); } if (j<0) { break; } k=parent_row[j]; l=j; } // End update the matching 20 if (--unmatched == 0) { checked = check_solution(row_dec, col_inc, col_vertex); if (checked) { //finish assignment, wrap up and done. bool assign = assign_solution(row_dec, col_inc, col_vertex); return true; } else { if(verbose) { fprintf(stderr, "Could not solve. Error.\n"); } return false; } } // Begin get ready for another stage 17 t=0; for (l=0;l<n;l++) { parent_row[l]= -1; slack[l]=INF; } for (k=0;k<m;k++) { if (col_vertex[k]<0) { if (verbose) { fprintf(stderr, "node %d: unmatched row %d\n",t,k);} unchosen_row[t++]=k; } } // End get ready for another stage 17 }// back to while loop }
int main() { igraph_sparsemat_t A, B, C; igraph_vector_t b, x; long int i; /* lsolve */ #define DIM 10 #define EDGES (DIM*DIM/6) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int r=RNG_INTEGER(0, DIM-1); long int c=RNG_INTEGER(0, r); igraph_real_t value=RNG_INTEGER(1,5); igraph_sparsemat_entry(&A, r, c, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_lsolve(&B, &b, &x); if (! check_solution(&B, &x, &b)) { return 1; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&B); #undef DIM #undef EDGES /* ltsolve */ #define DIM 10 #define EDGES (DIM*DIM/6) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int r=RNG_INTEGER(0, DIM-1); long int c=RNG_INTEGER(0, r); igraph_real_t value=RNG_INTEGER(1,5); igraph_sparsemat_entry(&A, r, c, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_ltsolve(&B, &b, &x); igraph_sparsemat_transpose(&B, &A, /*values=*/ 1); if (! check_solution(&A, &x, &b)) { return 2; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&B); igraph_sparsemat_destroy(&A); #undef DIM #undef EDGES /* usolve */ #define DIM 10 #define EDGES (DIM*DIM/6) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int r=RNG_INTEGER(0, DIM-1); long int c=RNG_INTEGER(0, r); igraph_real_t value=RNG_INTEGER(1,5); igraph_sparsemat_entry(&A, r, c, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_sparsemat_transpose(&B, &A, /*values=*/ 1); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_usolve(&A, &b, &x); if (! check_solution(&A, &x, &b)) { return 3; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&B); igraph_sparsemat_destroy(&A); #undef DIM #undef EDGES /* utsolve */ #define DIM 10 #define EDGES (DIM*DIM/6) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int r=RNG_INTEGER(0, DIM-1); long int c=RNG_INTEGER(0, r); igraph_real_t value=RNG_INTEGER(1,5); igraph_sparsemat_entry(&A, r, c, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_sparsemat_transpose(&B, &A, /*values=*/ 1); igraph_sparsemat_destroy(&B); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_utsolve(&A, &b, &x); igraph_sparsemat_transpose(&A, &B, /*values=*/ 1); if (! check_solution(&B, &x, &b)) { return 4; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&B); igraph_sparsemat_destroy(&A); #undef DIM #undef EDGES /* cholsol */ /* We need a positive definite matrix, so we create a full-rank matrix first and then calculate A'A, which will be positive definite. */ #define DIM 10 #define EDGES (DIM*DIM/6) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int from=RNG_INTEGER(0, DIM-1); long int to=RNG_INTEGER(0, DIM-1); igraph_real_t value=RNG_INTEGER(1, 5); igraph_sparsemat_entry(&A, from, to, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_sparsemat_transpose(&B, &A, /*values=*/ 1); igraph_sparsemat_multiply(&A, &B, &C); igraph_sparsemat_destroy(&A); igraph_sparsemat_destroy(&B); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_cholsol(&C, &b, &x, /*order=*/ 0); if (! check_solution(&C, &x, &b)) { return 5; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&C); #undef DIM #undef EDGES /* lusol */ #define DIM 10 #define EDGES (DIM*DIM/4) igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM); for (i=0; i<DIM; i++) { igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3)); } for (i=0; i<EDGES; i++) { long int from=RNG_INTEGER(0, DIM-1); long int to=RNG_INTEGER(0, DIM-1); igraph_real_t value=RNG_INTEGER(1, 5); igraph_sparsemat_entry(&A, from, to, value); } igraph_sparsemat_compress(&A, &B); igraph_sparsemat_destroy(&A); igraph_sparsemat_dupl(&B); igraph_vector_init(&b, DIM); for (i=0; i<DIM; i++) { VECTOR(b)[i] = RNG_INTEGER(1,10); } igraph_vector_init(&x, DIM); igraph_sparsemat_lusol(&B, &b, &x, /*order=*/ 0, /*tol=*/ 1e-10); if (! check_solution(&B, &x, &b)) { return 6; } igraph_vector_destroy(&b); igraph_vector_destroy(&x); igraph_sparsemat_destroy(&B); #undef DIM #undef EDGES return 0; }
int testing_dtrmm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 5 ) { USAGE("TRMM", "alpha M N LDA LDB", " - alpha : alpha coefficient\n" " - M : number of rows of matrices B\n" " - N : number of columns of matrices B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n"); return -1; } double alpha = (double) atol(argv[0]); int M = atoi(argv[1]); int N = atoi(argv[2]); int LDA = atoi(argv[3]); int LDB = atoi(argv[4]); double eps; int info_solution; int s, u, t, d, i; int LDAxM = LDA*max(M,N); int LDBxN = LDB*max(M,N); double *A = (double *)malloc(LDAxM*sizeof(double)); double *B = (double *)malloc(LDBxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *Bfinal = (double *)malloc(LDBxN*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Binit) || (!Bfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA DTRMM ROUTINE ------- \n"); printf(" Size of the Matrix B : %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING DTRMM */ /* Initialize A, B, C */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxM, A); LAPACKE_dlarnv_work(IONE, ISEED, LDBxN, B); for(i=0;i<max(M,N);i++) A[LDA*i+i] = A[LDA*i+i] + 2.0; for (s=0; s<2; s++) { for (u=0; u<2; u++) { #ifdef COMPLEX for (t=0; t<3; t++) { #else for (t=0; t<2; t++) { #endif for (d=0; d<2; d++) { memcpy(Binit, B, LDBxN*sizeof(double)); memcpy(Bfinal, B, LDBxN*sizeof(double)); /* PLASMA DTRMM */ PLASMA_dtrmm(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Bfinal, LDB); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Binit, Bfinal, LDB); printf("***************************************************\n"); if (info_solution == 0) { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ...... PASSED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } else { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ... FAILED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } printf("***************************************************\n"); } } } } free(A); free(B); free(Binit); free(Bfinal); return 0; } /*-------------------------------------------------------------- * Check the solution */ static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, int M, int N, double alpha, double *A, int LDA, double *Bref, double *Bplasma, int LDB) { int info_solution; double Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result; double eps; double mzone = (double)-1.0; double *work = (double *)malloc(max(M, N)* sizeof(double)); int Am, An; if (side == PlasmaLeft) { Am = M; An = M; } else { Am = N; An = N; } Anorm = LAPACKE_dlantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag), Am, An, A, LDA, work); Binitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); Bplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work); cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB); Blapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); cblas_daxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n", Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm); result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps); printf("============\n"); printf("Checking the norm of the difference against reference DTRMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result); if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int testing_zgesv_incpiv(int argc, char **argv) { /* Check for valid arguments*/ if (argc != 4){ USAGE("GESV_INCPIV", "N LDA NRHS LDB", " - N : the size of the matrix\n" " - LDA : leading dimension of the matrix A\n" " - NRHS : number of RHS\n" " - LDB : leading dimension of the matrix B\n"); return -1; } int N = atoi(argv[0]); int LDA = atoi(argv[1]); int NRHS = atoi(argv[2]); int LDB = atoi(argv[3]); double eps; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_Complex64_t *L; int *IPIV; /* Check if unable to allocate memory */ if ( (!A1) || (!A2)|| (!B1) || (!B2) ) { printf("Out of Memory \n "); return -2; } eps = BLAS_dfpinfo(blas_eps); /*---------------------------------------------------------- * TESTING ZGESV */ /* Initialize A1 and A2 Matrix */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA ZGESV */ PLASMA_Alloc_Workspace_zgesv_incpiv(N, &L, &IPIV); PLASMA_zgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA INCPIV ZGESV ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGESV ............... PASSED !\n"); printf("***************************************************\n"); } else{ printf("************************************************\n"); printf(" - TESTING INCPIV ZGESV ... FAILED !\n"); printf("************************************************\n"); } /*------------------------------------------------------------- * TESTING ZGETRF + ZGETRS */ /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Plasma routines */ PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); PLASMA_zgetrs_incpiv(PlasmaNoTrans, N, NRHS, A2, LDA, L, IPIV, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA ZGETRF + ZGETRS ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGETRF + ZGETRS ..... PASSED !\n"); printf("***************************************************\n"); } else{ printf("***************************************************\n"); printf(" - TESTING INCPIV ZGETRF + ZGETRS ... FAILED !\n"); printf("***************************************************\n"); } /*------------------------------------------------------------- * TESTING ZGETRF + ZTRSMPL + ZTRSM */ /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA routines */ PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB); PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA INCPIV ZGETRF + ZTRSMPL + ZTRSM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... PASSED !\n"); printf("***************************************************\n"); } else{ printf("**************************************************\n"); printf(" - TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... FAILED !\n"); printf("**************************************************\n"); } free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); return 0; }
int main(int argc, char* argv[]) { int seed,i; //cout<<"c This is NuMVC, a local search solver for the Minimum Vertex Cover (and also Maximum Independent Set) problem."<<endl; if(build_instance(argv[1])!=1){ cout<<"can't open instance file"<<endl; return -1; } optimal_size=0; i=2; //sscanf(argv[i++],"%d",&cand_count);//if you want to stop the algorithm only cutoff time is reached, set optimal_size to 0. //sscanf(argv[i++],"%d",&edge_cand); sscanf(argv[i++],"%d",&seed); sscanf(argv[i++],"%d",&cutoff_time); srand(seed); //cout<<seed<<' '; //cout<<argv[1]<<' '; times(&start); start_time = start.tms_utime + start.tms_stime; init_sol(); #ifdef individual_analysis_on_init_sls_mode times(&finish); init_time = double(finish.tms_utime - start.tms_utime + finish.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK); init_time = round(init_time * 100)/100.0; #endif //if(c_size + uncov_stack_fill_pointer > optimal_size ) //{ //cout<<"c Start local search..."<<endl; cover_LS(); //} #ifdef individual_analysis_on_init_sls_mode times(&finish); sls_time = double(finish.tms_utime - start.tms_utime + finish.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK) - init_time; sls_step_speed_per_ms = double(step) * 0.001 / sls_time; #endif //check solution if(check_solution()==1) { cout << "o " << best_c_size << endl; //cout << best_c_size << ' '; //print_mvc_solution(); cout << "c searchSteps " << best_step << endl; //printf("%ld ", best_step); cout << "c solveTime " << best_comp_time << endl; //cout << "c stepVelocity(/0.001ms) " << (long double)(best_step) / (best_comp_time * 1000000) << endl; /*cout<<"c Best found vertex cover size = "<<best_c_size<<endl; print_solution(); cout<<"c searchSteps = "<<best_step<<endl; cout<<"c solveTime = "<<best_comp_time<<endl;*/ //cout<<best_c_size<<' '<<best_comp_time<<' '<<best_step<<endl; #ifdef individual_analysis_on_init_sls_mode //cout<<"c initTime " << init_time << endl; //cout<<"c slsTime " << sls_time << endl; cout<<"c stepSpeed(/ms) "<< sls_step_speed_per_ms << endl; #endif } else { cout<<"the solution is wrong."<<endl; //print_solution(); } free_memory(); return 0; }
int main () { int cores = 2; int N = 10 ; int LDA = 10 ; int NRHS = 5 ; int LDB = 10 ; int info; int info_solution; int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)) { printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 for Symmetric Positive Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++) { A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA routines */ info = PLASMA_cpotrf(PlasmaLower, N, A2, LDA); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaConjTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in CTRSM example ! \n"); else printf("-- Run of CTRSM example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dsygvdx */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time; double *h_A, *h_R, *h_work; #if defined(PRECISION_z) || defined(PRECISION_c) double *rwork; magma_int_t lrwork; #endif /* Matrix size */ double *w1, *w2; magma_int_t *iwork; magma_int_t N, n2, info, lwork, liwork; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1};; magma_int_t info_ortho = 0; magma_int_t info_solution = 0; magma_int_t info_reduction = 0; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); magma_range_t range = MagmaRangeAll; if (opts.fraction != 1) range = MagmaRangeI; if ( opts.check && opts.jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); opts.jobz = MagmaVec; } printf("using: itype = %d, jobz = %s, range = %s, uplo = %s, check = %d, fraction = %6.4f\n", (int) opts.itype, lapack_vec_const(opts.jobz), lapack_range_const(range), lapack_uplo_const(opts.uplo), (int) opts.check, opts.fraction); printf(" N M GPU Time (sec) ||I-Q'Q||/. ||A-QDQ'||/. ||D-D_magma||/.\n"); printf("=======================================================================\n"); magma_int_t threads = magma_get_parallel_numthreads(); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; n2 = N*N; #if defined(PRECISION_z) || defined(PRECISION_c) lwork = magma_dbulge_get_lq2(N, threads) + 2*N + N*N; lrwork = 1 + 5*N +2*N*N; #else lwork = magma_dbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N; #endif liwork = 3 + 5*N; /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( h_A, double, n2 ); TESTING_MALLOC_CPU( w1, double, N ); TESTING_MALLOC_CPU( w2, double, N ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); TESTING_MALLOC_PIN( h_R, double, n2 ); TESTING_MALLOC_PIN( h_work, double, lwork ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_MALLOC_PIN( rwork, double, lrwork ); #endif /* Initialize the matrix */ lapackf77_dlarnv( &ione, ISEED, &n2, h_A ); magma_dmake_symmetric( N, h_A, N ); magma_int_t m1 = 0; double vl = 0; double vu = 0; magma_int_t il = 0; magma_int_t iu = 0; if (range == MagmaRangeI) { il = 1; iu = (int) (opts.fraction*N); } if (opts.warmup) { // ================================================================== // Warmup using MAGMA // ================================================================== lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); if (opts.ngpu == 1) { //printf("calling dsyevdx_2stage 1 GPU\n"); magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } else { //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu); magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } } // =================================================================== // Performs operation using MAGMA // =================================================================== lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); gpu_time = magma_wtime(); if (opts.ngpu == 1) { //printf("calling dsyevdx_2stage 1 GPU\n"); magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } else { //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu); magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } gpu_time = magma_wtime() - gpu_time; printf("%5d %5d %7.2f ", (int) N, (int) m1, gpu_time ); if ( opts.check ) { double eps = lapackf77_dlamch("E"); //printf("\n"); //printf("------ TESTS FOR MAGMA DSYEVD ROUTINE ------- \n"); //printf(" Size of the Matrix %d by %d\n", (int) N, (int) N); //printf("\n"); //printf(" The matrix A is randomly generated for each test.\n"); //printf("============\n"); //printf(" The relative machine precision (eps) is %8.2e\n",eps); //printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the orthogonality, reduction and the eigen solutions */ if (opts.jobz == MagmaVec) { info_ortho = check_orthogonality(N, N, h_R, N, eps); info_reduction = check_reduction(opts.uplo, N, 1, h_A, w1, N, h_R, eps); } //printf("------ CALLING LAPACK DSYEVD TO COMPUTE only eigenvalue and verify elementswise ------- \n"); lapackf77_dsyevd("N", "L", &N, h_A, &N, w2, h_work, &lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, &lrwork, #endif iwork, &liwork, &info); info_solution = check_solution(N, w2, w1, eps); if ( (info_solution == 0) && (info_ortho == 0) && (info_reduction == 0) ) { printf(" ok\n"); //printf("***************************************************\n"); //printf(" ---- TESTING DSYEVD ...................... PASSED !\n"); //printf("***************************************************\n"); } else { printf(" failed\n"); status += 1; //printf("************************************************\n"); //printf(" - TESTING DSYEVD ... FAILED !\n"); //printf("************************************************\n"); } } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( w1 ); TESTING_FREE_CPU( w2 ); TESTING_FREE_CPU( iwork ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_FREE_PIN( rwork ); #endif fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } /* Shutdown */ TESTING_FINALIZE(); return status; }