示例#1
0
int main(int argc, char **argv) {
 	int success;

	if (argc != 4) {
		fprintf(stderr, "Usage: %s <vector size in K> <seq sort size in K> <seq merge size in K>\n", argv[0]);
		return 1;
	}

	N = atol(argv[1]) * 1024L;
	MIN_SORT_SIZE = atol(argv[2]) * 1024L;
	MIN_MERGE_SIZE = atol(argv[3]) * 1024L;
	
	T *data = malloc(N*sizeof(T));
	T *tmp = malloc(N*sizeof(T));
	
	double init_time = omp_get_wtime();
	initialize(N, data);
	touch(N, tmp);
	init_time = omp_get_wtime() - init_time;

	double sort_time = omp_get_wtime();
	multisort(N, data, tmp);
   sort_time = omp_get_wtime() - sort_time;

	success = check_solution(N, data);
	if (!success) printf ("SORTING FAILURE\n"); 
	else printf ("SORTING SUCCESS\n"); 

   fprintf(stdout, "Multisort program (using %d threads)\n", omp_get_num_threads() );
   fprintf(stdout, "   Initialization time in seconds = %g\n", init_time);
   fprintf(stdout, "   Multisort time in seconds = %g\n", sort_time);
   fprintf(stdout, "\n");

	return 0;
}
示例#2
0
int		main(int argc, char **argv)
{
	char **grid;

	if (error_check(argc, argv))
	{
		print_error();
		return (0);
	}
	grid = set_grid_parameters(argv);
	if (grid == NULL)
		print_error();
	sudoku(grid);
	if (grid[9] == ERROR)
		print_error();
	else
	{
		grid[9] = UNIQUE;
		if (sudoku(grid) && check_solution(grid))
			print_grid(grid);
		else
			print_error();
	}
	free(grid);
	return (0);
}
示例#3
0
int solve_sudoku(sudoku_board board, int side){
	int i=0;
	
	while (1){
		//printf("\tALONE\n");
		if (alone(board, side))
			continue;
		if (check_solution(board, side)==0)
			break;
		//printf("\t\tSINGLETON\n");
		if (singleton(board, side, 0, 0, side, side))
			continue;
		if (check_solution(board, side)==0)
			break;
		//printf("\t\t\tPAIRS\n");
		if (pairs(board, side, 0, 0, side, side))
			continue;
		if (check_solution(board, side)==0)
			break;
		//print_board(board, side);
		//printf("\t\t\t\tBAD POSSIBLE\n");
		i=bad_possible_elimination(board, side);
		//printf(" %d\n", i);
		if(i)
			continue;
		if (check_solution(board, side)==0)
			break;
		//printf("\t\t\t\t\tRECURSIVE\n");
		recursive_solution(board, side, 0, 0);
		break;
	}
	
	/*switch (check_solution(board, side)) {
		case 0:
			printf("\n--OK--\n");
			break;
		case 1:
			printf("\n--ERRORS--\n");
			break;
		case 2:
			printf("\n--MISSES--\n");
			break;
		default:
			break;
	}*/
	return 0;
}
示例#4
0
int recursive_solution(sudoku_board board, int side, int row, int column){
	int i=0, test, ret;
	
	
	//	char aux;
	//print_board(side);
	//printf("row= %d  column=%d\n", row+1, column+1);
	//	scanf("%c", &aux);
	
	
	
	test = check_solution(board, side);
	//printf("test= %d\n", test);
	
	if (test == 0) {
		//VALID BOARD
		//printf("\t1-VALID BOARD\n");
		return 0;
	}
	else{
		//ERRORS FOUND IN BOARD
		//printf("\t2-INVALID BORRD\n");
		if (get_value(&board[row][column])) {
			//printf("\t\t3-FILLED WITH %d\n",board[row][column].value);
			return next(board, side, row, column);
		}
		else {
			//printf("\t\t4-NOT FILLED \n");
			for (i=0; i < side; ++i) {
				//printf("\t\t\ti= %d\n",i+1);
				//printf("\t\t\tpossible?= %d\n",possible_value(board, side, row, column, i+1));
				if (possible_value(board, side, row, column, i+1)) {
					//printf("\t\t\t\tTRY i= %d \n", i+1);
					try_value(&board[row][column], i+1);
					ret = next(board, side, row, column);
					//printf("\t\t\t\tNEXT RETURN= %d \n", ret);
					if (ret) {
						//printf("\t\t\t\t\t7-CONTINUE\n");	
						continue;
					}
					else {
						//printf("\t\t\t\t\t6-OK BOARD FOUND\n");	
						return 0;
					}
					
				}
				else {
					//printf("\t\t\t\t5-IMPOSSIBLE i=%d \n", i+1);
					continue;
				}
			}
			//printf("\t\t8- ALL TRIED AND FAIL PREVEW \n");
			try_value(&board[row][column], 0);
			return 1;
		}
		return 1;
	}
	return 0;
}
示例#5
0
int next(sudoku_board board, int side, int row, int column){
	if (row == (side-1) && column == (side-1)){
		return check_solution(board, side);
	}
	if(column == (side-1)){
		return recursive_solution(board, side, row+1, 0);
	}
	else {
		return recursive_solution(board, side, row, column+1);
	}
}
示例#6
0
int testing_cgetmi(int argc, char **argv){

    PLASMA_Complex32_t *A, *B;
    int m, n, mb, nb;
    int i, ret, size;

    /* Check for number of arguments*/
    if (argc != 4){
        USAGE("GETMI", "M N MB NB ntdbypb with \n",
              "   - M       : the number of rows of the matrix    \n"
              "   - N       : the number of columns of the matrix \n"
              "   - MB      : the number of rows of each block    \n"
              "   - NB      : the number of columns of each block \n");
        return -1;
    }

    m      = atoi(argv[0]);
    n      = atoi(argv[1]);
    mb     = atoi(argv[2]);
    nb     = atoi(argv[3]);

    size = m*n*sizeof(PLASMA_Complex32_t);
    A = (PLASMA_Complex32_t *)malloc(size);
    B = (PLASMA_Complex32_t *)malloc(size);
    LAPACKE_clarnv_work(1, ISEED, m*n, A);

    for(i=0; i<6; i++) {
        memcpy(B, A, size);

        printf(" - TESTING CGETMI (%4s) ...", formatstr[i]);
        ret = PLASMA_cgetmi( m, n, A, format[i], mb, nb );

        if (ret != PLASMA_SUCCESS) {
            printf("Failed\n");
            continue;
        }

        if ( check_solution(m, n, mb, nb, B, A, 
                            (int (*)(int, int, int, int, int, int))formatmap[i]) == 0 )
            printf("............ PASSED !\n");
        else
            printf("... FAILED !\n");
    }

    free( A ); free( B );

    return 0;
}
示例#7
0
int	        ombre_check_cylindre(t_ray *st, int i)
{
  st->a = pow(st->l_x, 2) + pow(st->l_z, 2);
  st->b = 2 * (st->l_x * (st->p_x - st->info.cylindre[i].x)
	       + st->l_z * (st->p_z - st->info.cylindre[i].z));
  st->c = pow(st->p_x - st->info.cylindre[i].x, 2)
    + pow(st->p_z - st->info.cylindre[i].z, 2)
    - pow(st->info.cylindre[i].rayon, 2);
  st->d = pow(st->b, 2) - (4 * st->a * st->c);
  if (st->d <= 0)
    return (0);
  if (check_solution(st) == 0)
    return (0);
  if (st->t < st->l)
    return (1);
  return (0);
}
示例#8
0
void backtrack(int a[], int k, int n)
{
	int c[N];
	int candidates;
	if(check_solution(a,k,n))
		process_solution(a,k);

	else{
		k=k+1;
		construct_candidates(a,k,c,&candidates);
		for(int i=0;i<candidates;i++){
			a[k]=c[i];
			backtrack(a,k,n);
/*			if(FALSE)
				return; */
		}
	}
}
示例#9
0
/*It solves the problem. If we are deleteting duplicates, it checks that the
 solution is valid.*/
int solve(Satellite *sats, int *combination, int *solution) {
  int i, j, k;
  long long int combs;
  int n;
  int valid;
  float r, num;

  combs = number_of_combinations(sats);
  //  printf("Combinations: %ld\n", combs);
  get_golden_index_max(sats);
  for (k = 0; k < nsats; k++) {
    combination[k] = 0; // We start from the combination 1 1 .. 1
    for (j = 0; j < sats[k].golden_index - 1; j++) {
      // delete_duplicates(sats, j, k);
    }
  }
  // print_F_matrix(sats);
  for (i = 1; i < combs; i++) {
    printf("%d ", i);
    next_combination(sats, combination);
    valid = check_solution(sats, combination);
    if (!valid)
      continue;
    //  print_array("comb", combination, nsats);
    n = total_occurrences(sats, combination);
    //  printf("tic: %.2f\n", tic);
    r = total_reward(sats, combination);
    num = r / n;
    printf("%.2f\n", num);
    if (num > max) {
      max = num;
      copy_solution(combination, solution);
    }
  }
  return 0;
}
示例#10
0
/*                                 main()
 *
 * Main function for the program. Takes a file containing the puzzle to be
 * checked as an argument or from stdin and creates a new 9x9 2-Dimensional
 * array to store the solution while it is being examined. Calls functions to
 * read in the solution and check the puzzle. Calls exit(0) if the program
 * returns from the check function successfully to indicate that the puzzle was
 * indeed correct.
 */
int main(int argc, char *argv[])
{
    FILE *srcfile;
    UArray2_T solution = UArray2_new(9, 9, sizeof(int));

    assert(argc <= 2);

    if (argc == 1) {

        srcfile = stdin;
        assert(srcfile != NULL);
    }
    else {
        srcfile = fopen (argv[1], "rb");
        assert(srcfile != NULL);
    }

    read_in_solution(solution, srcfile);
    fclose(srcfile);

    check_solution(solution);
    UArray2_free(&solution);
    exit(0);
}
示例#11
0
int main ()
{

    int cores = 2;
    int N     = 10;
    int LDA   = 10;
    int NRHS  = 5;
    int LDB   = 10;
    int info;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A1));
    PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A2));
    PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B1));
    PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B2));
    PLASMA_Complex32_t *L;
    int *IPIV;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /*Plasma Initialize*/
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Initialize A1 and A2 Matrix */
    LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* PLASMA CGESV */
    info = PLASMA_Alloc_Workspace_cgesv_incpiv(N, &L, &IPIV);
    info = PLASMA_cgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB);

    /* Check the factorization and the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
       printf("-- Error in CGESV example ! \n");
    else
       printf("-- Run of CGESV example successful ! \n");

    free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L);

    PLASMA_Finalize();

    exit(0);
}
示例#12
0
int
main(int argc, char **argv)
{
	const char *url = NULL;
	const char *credit_addr = NULL;
	int opt;
	int platformidx = -1, deviceidx = -1;
	char *pend;
	int verbose = 1;
	int interval = 90;
	int nthreads = 0;
	int worksize = 0;
	int nrows = 0, ncols = 0;
	int invsize = 0;
	int verify_mode = 0;
	int safe_mode = 0;

	char *devstrs[MAX_DEVS];
	int ndevstrs = 0;

	vg_context_t *vcp = NULL;
	vg_ocl_context_t *vocp = NULL;

	int res;
	int thread_started = 0;
	pubkeybatch_t *active_pkb = NULL;
	float active_pkb_value = 0;

	server_context_t *scp = NULL;
	pubkeybatch_t *pkb;
	int was_sleeping = 0;

	struct timeval tv;
	struct timespec sleepy;

	pthread_mutex_init(&soln_lock, NULL);
	pthread_cond_init(&soln_cond, NULL);

	if (argc == 1) {
		usage(argv[0]);
		return 1;
	}

	while ((opt = getopt(argc, argv,
			     "u:a:vqp:d:w:t:g:b:VD:Sh?i:")) != -1) {
		switch (opt) {
		case 'u':
			url = optarg;
			break;
		case 'a':
			credit_addr = optarg;
			break;
		case 'v':
			verbose = 2;
			break;
		case 'q':
			verbose = 0;
			break;
		case 'i':
			interval = atoi(optarg);
			if (interval < 10) {
				fprintf(stderr,
					"Invalid interval '%s'\n", optarg);
				return 1;
			}
			break;
		case 'p':
			platformidx = atoi(optarg);
			break;
		case 'd':
			deviceidx = atoi(optarg);
			break;
		case 'w':
			worksize = atoi(optarg);
			if (worksize == 0) {
				fprintf(stderr,
					"Invalid work size '%s'\n", optarg);
				return 1;
			}
			break;
		case 't':
			nthreads = atoi(optarg);
			if (nthreads == 0) {
				fprintf(stderr,
					"Invalid thread count '%s'\n", optarg);
				return 1;
			}
			break;
		case 'g':
			nrows = 0;
			ncols = strtol(optarg, &pend, 0);
			if (pend && *pend == 'x') {
				nrows = strtol(pend+1, NULL, 0);
			}
			if (!nrows || !ncols) {
				fprintf(stderr,
					"Invalid grid size '%s'\n", optarg);
				return 1;
			}
			break;
		case 'b':
			invsize = atoi(optarg);
			if (!invsize) {
				fprintf(stderr,
					"Invalid modular inverse size '%s'\n",
					optarg);
				return 1;
			}
			if (invsize & (invsize - 1)) {
				fprintf(stderr,
					"Modular inverse size must be "
					"a power of 2\n");
				return 1;
			}
			break;
		case 'V':
			verify_mode = 1;
			break;
		case 'S':
			safe_mode = 1;
			break;
		case 'D':
			if (ndevstrs >= MAX_DEVS) {
				fprintf(stderr,
					"Too many OpenCL devices (limit %d)\n",
					MAX_DEVS);
				return 1;
			}
			devstrs[ndevstrs++] = optarg;
			break;
		default:
			usage(argv[0]);
			return 1;
		}
	}

#if OPENSSL_VERSION_NUMBER < 0x10000000L
	/* Complain about older versions of OpenSSL */
	if (verbose > 0) {
		fprintf(stderr,
			"WARNING: Built with " OPENSSL_VERSION_TEXT "\n"
			"WARNING: Use OpenSSL 1.0.0d+ for best performance\n");
	}
#endif
	curl_easy_init();

	vcp = vg_prefix_context_new(0, 128, 0);

	vcp->vc_verbose = verbose;

	vcp->vc_output_match = output_match_work_complete;
	vcp->vc_output_timing = vg_output_timing_console;


	if (!url) {
		fprintf(stderr, "ERROR: No server URL specified\n");
		return 1;
	}
	if (!credit_addr) {
		fprintf(stderr, "ERROR: No reward address specified\n");
		return 1;
	}
	if (!vg_b58_decode_check(credit_addr, NULL, 0)) {
		fprintf(stderr, "ERROR: Invalid reward address specified\n");
		return 1;
	}

	scp = server_context_new(url, credit_addr);
	scp->verbose = verbose;

	/* Get the initial bounty list, abort on failure */
	if (server_context_getwork(scp))
		return 1;

	/* Set up OpenCL */
	res = 0;
	if (ndevstrs) {
		for (opt = 0; opt < ndevstrs; opt++) {
			vocp = vg_ocl_context_new_from_devstr(vcp, devstrs[opt],
							      safe_mode,
							      verify_mode);
			if (!vocp) {
				fprintf(stderr,
				"Could not open device '%s', ignoring\n",
					devstrs[opt]);
			} else {
				res++;
			}
		}
	} else {
		vocp = vg_ocl_context_new(vcp, platformidx, deviceidx,
					  safe_mode, verify_mode,
					  worksize, nthreads,
					  nrows, ncols, invsize);
		if (vocp)
			res++;
	}
	if (!res) {
		vg_ocl_enumerate_devices();
		return 1;
	}

	if (verbose > 1)
		dump_work(&scp->items);

	while (1) {
		if (avl_root_empty(&scp->items))
			server_context_getwork(scp);

		pkb = most_valuable_pkb(scp);

		/* If the work item is the same as the one we're executing,
		   keep it */
		if (pkb && active_pkb &&
		    server_pubkeybatch_equal(scp, active_pkb, pkb))
			pkb = active_pkb;

		if (thread_started && (!active_pkb || (pkb != active_pkb))) {
			/* If a thread is running, stop it */
			vg_context_stop_threads(vcp);
			thread_started = 0;
			if (active_pkb) {
				check_solution(scp, active_pkb);
				active_pkb = NULL;
			}
			vg_context_clear_all_patterns(vcp);

			if (verbose > 1)
				dump_work(&scp->items);
		}

		if (!pkb) {
			if (!was_sleeping) {
				fprintf(stderr,
					"No work available, sleeping\n");
				was_sleeping = 1;
			}

		} else if (!active_pkb) {
			workitem_t *wip;
			was_sleeping = 0;
			active_pkb_value = 0;
			vcp->vc_pubkey_base = pkb->pubkey;
			for (wip = workitem_avl_first(&pkb->items);
			     wip != NULL;
			     wip = workitem_avl_next(wip)) {
				fprintf(stderr,
					"Searching for pattern: \"%s\" "
					"Reward: %f Value: %f BTC/Gkey\n",
					wip->pattern,
					wip->reward,
					wip->value);
				vcp->vc_addrtype = wip->addrtype;
				if (!vg_context_add_patterns(vcp,
							     &wip->pattern,
							     1)) {
					fprintf(stderr,
					   "WARNING: could not add pattern\n");
				}
				else {
					active_pkb_value += wip->value;
				}
				
				assert(vcp->vc_npatterns);
			}

			fprintf(stderr, 
				"\nTotal value for current work: %f BTC/Gkey\n", 
				active_pkb_value);
			res = vg_context_start_threads(vcp);
			if (res)
				return 1;
			thread_started = 1;
			active_pkb = pkb;
		}

		/* Wait for something to happen */
		gettimeofday(&tv, NULL);
		sleepy.tv_sec = tv.tv_sec;
		sleepy.tv_nsec = tv.tv_usec * 1000;
		sleepy.tv_sec += interval;

		pthread_mutex_lock(&soln_lock);
		res = 0;
		if (!soln_private_key)
			res = pthread_cond_timedwait(&soln_cond,
						     &soln_lock, &sleepy);
		pthread_mutex_unlock(&soln_lock);

		if (res == 0) {
			if (check_solution(scp, active_pkb))
				active_pkb = NULL;
		}
		else if (res == ETIMEDOUT) {
			free_pkb_tree(&scp->items, active_pkb);
		}
	}

	return 0;
}
示例#13
0
int bad_possible_elimination(sudoku_board board, int side){
	int i, j, k, n, m,aa=0, abc, alone_ok=0;
	sudoku_board test_board;
	//	print_board(board, side);
	
	for (i=0; i<side; ++i) {//ROW
		for (j=0; j<side; ++j) {//COLUMN
			//printf("row=%d  column=%d\n", i, j);
			//printf("\tvalue = %d\n", get_value(&board[i][j]));
			if (get_value(&board[i][j])==0) {
				for (k=0; k<side; ++k) {//POSSIBLE
					//printf("k=%d   possible=%d\n", k, board[i][j].possible[k]);
					if (board[i][j].possible[k]){
						test_board = create_board(side);
						for (n=0; n<side; ++n) 
							for (m=0; m<side; ++m){
								if (get_value(&board[n][m])) 
									found_value(test_board, side, n, m, get_value(&board[n][m]));
							}
						found_value(test_board, side, i, j, board[i][j].possible[k]);
						
						alone_ok=0;
						for (n=0; n<side; ++n) {
							if ((board[i][n].possibles)==1) {
								for (m=0; m<side; ++m) {
									if (board[i][n].possible[m]) {
										alone_ok = 1;
										found_value(test_board, side, i, n, board[i][n].possible[m]);
									}
								}
							}
							if ((board[n][j].possibles)==1) {
								for (m=0; m<side; ++m) {
									if (board[n][j].possible[m]) {
										alone_ok = 1;
										found_value(test_board, side, n, j, board[n][j].possible[m]);
									}
								}
							}
						}
						if (singleton(test_board, side, i, j, i+1, j+1))
							alone_ok = 1;
						if (pairs(test_board, side, i, j, i+1, j+1))
							alone_ok = 1;
						
						if (alone_ok) {
							while (1){
								if (alone(board, side))
									continue;

								if (singleton(board, side, 0, 0, side, side))
									continue;
								
								if (pairs(board, side, 0, 0, side, side))
									continue;
								break;
							}
						}
						
						
						
						abc = check_solution(test_board, side);
						//printf("%d\n", aa);
						switch (abc) {
							case 0:
								found_value(board, side, i, j, board[i][j].possible[k]);
								delete_board(test_board, side);
								return -1;
							case 1:
								delete_possible(&board[i][j], board[i][j].possible[k], side);
								++aa;
								break;
							default:
								break;
						}
						delete_board(test_board, side);
					}
				}
			}
		}
	}
	
	
	return aa;
}
示例#14
0
int optimize_transport(BASIS *basis)
{
	/** Declare |optimize_transport| scalars */

	int i_enter,j_enter;
	int arcindex;
	int halfnodes;
	int subroot;
	int prev;
	int orient;		
	int status;
	double deltadual;		


	/** Declare |optimize_transport| arrays */

	int *family;
	int *pred;
	int *brother;
	int *son;




	status=init_basis(basis);  /* construct initial basis */
	if (status) return status; 

	/** Define simplifications for |optimize_transport| */

	family = basis->family;
	pred = basis->pred;
	brother = basis->brother;
	son = basis->son;



	halfnodes=basis->no_nodes/2;

	deltadual=find_entering_arc(basis,&i_enter,&j_enter,&arcindex); 

	while(i_enter>=0){		/* there is an entering arc  */

		basis->pivot++;		

#ifdef PRINT
		printf("+-----------------------------------+\n");
		printf("|            pivot %3d              |\n",basis->pivot);
		printf("+-----------------------------------+\n");
#endif

		subroot = find_cycle(basis,i_enter,j_enter,arcindex,&orient);

#ifdef PRINT
		printf("\nentering arc : %d  --> %d \n",i_enter,j_enter);
		printf("    arcindex : %d\n",arcindex);
		printf("     redcost : %8.4lf",deltadual);
#endif

		/* update duals in the smaller subtrees */
		if(family[subroot]<=halfnodes)
			update_dual(basis,subroot,(orient>0)?deltadual:-deltadual);
		else { /* separate trees, update duals, then reconnect */
			basis->rerooted++;
			prev = pred[subroot];
			/* remove root from successors of prev */
			son[prev]= brother[subroot];

			update_dual(basis,0,(orient>0)?-deltadual:deltadual);

			brother[subroot]=son[prev];      /* insert root to successors of prev */
			son[prev]=subroot;
		}

#ifdef PRINT    
		print_basis(basis);
#endif 

		deltadual=find_entering_arc(basis,&i_enter,&j_enter,&arcindex);
	}

	status=check_solution(basis);

	return status;  
}
示例#15
0
// Rebuild the solution_t data structure using 'x' 
void solution_reset (instance_t *inst, solution_t *s, int *x)
{
	int i, j, k, l;

	// Empty all heaps 
	for (j = 0; j < inst->m; j++) {
		s->besti1[j] = s->besti2[j] = -1;
		for (i = 0; i < inst->n; i++) 
			s->heap[j][i] = s->heap_inv[j][i] = -1;
	}

	// Compute total opening costs and heap size
	s->heap_size = 0;
	s->total_cost = 0.0;
	if (NULL != x) {
		for (s->heap_size = i = 0; i < inst->n; i++) {
			s->x[i] = x[i];
			if (x[i] == 1) {
				++s->heap_size;
				s->total_cost += inst->f[i];
			}
		}
	}

	// Populate heaps, compute move costs and total solution cost
	if (NULL == x || s->heap_size == 0) { // Special case where all locations are closed
		s->total_cost = inst->ub;
		for (i = 0; i < inst->n; i++) {
			s->x[i] = 0;
			s->flip_gain[i] = s->total_cost - inst->f[i];
			for (j = 0; j < inst->m; j++) 
				s->flip_gain[i] -= inst->c[i][j];
		}
	} else { // General case
		// Populate heaps by simply adding all open locations in increasing service costs,
		// as this verifies the heap property.
		for (j = 0; j < inst->m; j++) {
			for (i = k = l = 0; k < inst->n && l < s->heap_size; k++) {
				i = inst->inc[j][k];
				if (x[i] == 1) {
					s->heap[j][l] = i;
					s->heap_inv[j][i] = l;
					if (l == 0) {
						s->besti1[j] = i;
						s->total_cost += inst->c[i][j];
					} else if (l == 1)
						s->besti2[j] = i;
					++l;
				}
			}
			assert(l == s->heap_size);
		}

		// Compute move costs
		for (i = 0; i < inst->n; i++) {
			if (s->x[i] && s->heap_size == 1) {
				for (s->flip_gain[i] = - inst->ub + inst->f[i], j = 0; j < inst->m; j++) 
					s->flip_gain[i] += inst->c[i][j];
			} else if (s->x[i]) {
				assert(s->heap_size >= 2);
				for (s->flip_gain[i] = inst->f[i], j = 0; j < inst->m; j++) 
					if (s->besti1[j] == i)
						s->flip_gain[i] -= inst->c[s->besti2[j]][j] - inst->c[i][j];
			} else {
				assert(s->x[i] == 0 && s->heap_size >= 1);
				for (s->flip_gain[i] = -inst->f[i], j = 0; j < inst->m; j++) 
					if (inst->c[s->besti1[j]][j] > inst->c[i][j])
						s->flip_gain[i] += inst->c[s->besti1[j]][j] - inst->c[i][j];
			}
		}
	}

	check_solution(inst, s);
}
示例#16
0
// Performs one iteration of the primal process
void primal_run (primal_t *primal)
{
	instance_t *inst = primal->inst;
	solution_t *s = primal->sol;
	int i, w, n, aspiration, n_best, n_free, n_diver;
	double best_gain;

	check_solution(inst, s);

	//% Algorithm 1 Step 4 (cont'd)
	// Perform moves to conform with the improving partial solution
	for (i = 0; i < inst->n; i++) 
		if (primal->improving_partial_x[i] != -1 && primal->tabu[i] != INFINITY) {
			primal->tabu[i] = INFINITY;
			if (s->x[i] != primal->improving_partial_x[i]) {
				solution_flip(inst, s, i);
				check_solution(inst, s);
				++primal->n_moves;
				if (s->total_cost < primal->best_z) {
					primal->n_moves_at_last_improvement = primal->n_moves;
					primal->best_z = s->total_cost;
					memcpy(primal->best_x, s->x, inst->n * sizeof(int));
				}
			}
		}
	// Perturb tabu state using the guiding solution
	if (primal->n_moves_at_last_improvement + inst->request_period < primal->n_moves) {
		primal->n_moves_at_last_improvement = primal->n_moves;
		for (i = 0; i < inst->n; i++) 
			if (primal->tabu[i] != INFINITY && s->x[i] == primal->guiding_x[i])
				primal->tabu[i] = ((double) primal->n_moves) + primal->tabu_length; 
	}

	// Analyze search state
	//% Algorithm 1 step 1
	for (aspiration = 0, best_gain = -INFINITY, n_best = n_free = n_diver = 0, i = 0; i < inst->n; i++) 
		if (primal->tabu[i] == INFINITY) {
			assert(s->x[i] == primal->improving_partial_x[i]);
		} else {
			++n_free; // count the number of unfixed locations
			if ((primal->n_moves == 0 || (s->total_cost - s->flip_gain[i] <= primal->best_z - bound_eps)) && s->flip_gain[i] != INFINITY) 
				aspiration = 1; // aspiration criterion is satisfied
			if ((double) primal->n_moves > primal->tabu[i] || aspiration) {
				++n_diver; // count the number of non-tabu locations
				if (s->flip_gain[i] > best_gain + bound_eps && best_gain != INFINITY)
					n_best = 1, best_gain = s->flip_gain[i]; // store the best location to perform a move on 
				else if (fabs(s->flip_gain[i] - best_gain) < 2.0 * bound_eps || (s->flip_gain[i] == INFINITY && best_gain == INFINITY))
					++n_best; // count the number of best locations
			}
		}

	// Select the 1OPT move to perform
	w = -1; 
	if (best_gain <= bound_eps) { // if there exist no improving moves
		if (n_free == 0) { // if there exist no moves
			//% Algorithm 1 step 1(d)
			return;
		}
		if (n_diver == 0) { // if there exist no non-tabu moves
			// select one location at random
			//% Algorithm 1 step 1(c)
			n = RngStream_RandInt(primal->rng, 0, n_free - 1);
			for (i = 0; i < inst->n && w == -1; i++)
				if (primal->tabu[i] != INFINITY && n-- <= 0) 
					w = i;
		} else { // there exist non-tabu moves
			// select one location at random
			//% Algorithm 1 step 1(b)
			n = RngStream_RandInt(primal->rng, 0, n_diver - 1);
			for (i = 0; i < inst->n && w == -1; i++)
				if (((double) primal->n_moves > primal->tabu[i] || aspiration) && n-- <= 0) 
					w = i;
		}
	} else { // there exists at least one improving move
		// select one of the best at random
		//% Algorithm 1 step 1(a)
		assert(n_best > 0);
		n = RngStream_RandInt(primal->rng, 0, n_best - 1);
		for (w = -1, i = 0; i < inst->n && w == -1; i++) 
			if (((double) primal->n_moves > primal->tabu[i] || aspiration) 
					&& (fabs(s->flip_gain[i] - best_gain) < 2.0 * bound_eps || (s->flip_gain[i] == INFINITY && best_gain == INFINITY))
					&& n-- <= 0) 
				w = i;
	}

	// check that in all cases we have found something
	assert(w >= 0); 
	assert(w < inst->n);
	assert(primal->tabu[w] != INFINITY);

	// Update search state
	//% Algorithm 1 step 2
	tabu_tenure(primal, w, (s->flip_gain[w] > bound_eps)); 
	//% Algorithm 1 step 3(a,b,c)
	solution_flip(inst, s, w);
	check_solution(inst, s);
	//% Algorithm 1 step 3(d)
	if (s->total_cost < primal->best_z) {
		// Update best known solution
		primal->n_moves_at_last_improvement = primal->n_moves;
		primal->best_z = s->total_cost;
		memcpy(primal->best_x, s->x, inst->n * sizeof(int));
	}
}
示例#17
0
int main (int argc, char *argv[])
{
    int i, n, ib, nb, nz, nv, celldim, phydim;
    int nn, type, *elems = 0, idata[5];
    cgsize_t ne;
    char *p, basename[33], title[65];
    float value, *var;
    SOLUTION *sol;
    FILE *fp;

    if (argc < 2)
        print_usage (usgmsg, NULL);

    ib = 0;
    basename[0] = 0;
    while ((n = getargs (argc, argv, options)) > 0) {
        switch (n) {
            case 'a':
                ascii = 1;
                break;
            case 'b':
                ib = atoi (argarg);
                break;
            case 'B':
                strncpy (basename, argarg, 32);
                basename[32] = 0;
                break;
            case 'w':
                weighting = 1;
                break;
            case 'S':
                usesol = atoi (argarg);
                break;
        }
    }

    if (argind > argc - 2)
        print_usage (usgmsg, "CGNSfile and/or Tecplotfile not given");
    if (!file_exists (argv[argind]))
        FATAL (NULL, "CGNSfile does not exist or is not a file");

    /* open CGNS file */

    printf ("reading CGNS file from %s\n", argv[argind]);
    nb = open_cgns (argv[argind], 1);
    if (!nb)
        FATAL (NULL, "no bases found in CGNS file");
    if (*basename && 0 == (ib = find_base (basename)))
        FATAL (NULL, "specified base not found");
    if (ib > nb) FATAL (NULL, "base index out of range");
    cgnsbase = ib ? ib : 1;
    if (cg_base_read (cgnsfn, cgnsbase, basename, &celldim, &phydim))
        FATAL (NULL, NULL);
    if (celldim != 3 || phydim != 3)
        FATAL (NULL, "cell and physical dimension must be 3");
    printf ("  using base %d - %s\n", cgnsbase, basename);

    if (NULL == (p = strrchr (argv[argind], '/')) &&
        NULL == (p = strrchr (argv[argind], '\\')))
        strncpy (title, argv[argind], sizeof(title));
    else
        strncpy (title, ++p, sizeof(title));
    title[sizeof(title)-1] = 0;
    if ((p = strrchr (title, '.')) != NULL)
        *p = 0;

    read_zones ();
    if (!nZones)
        FATAL (NULL, "no zones in the CGNS file");
    
    /* verify dimensions fit in an integer */

    for (nz = 0; nz < nZones; nz++) {
        if (Zones[nz].nverts > CG_MAX_INT32)
	    FATAL(NULL, "zone size too large to write with integers");
	if (Zones[nz].type == CGNS_ENUMV(Unstructured)) {
            count_elements (nz, &ne, &type);
            if (ne > CG_MAX_INT32)
	        FATAL(NULL, "too many elements to write with integers");
        }
     }

    nv = 3 + check_solution ();

    /* open Tecplot file */

    printf ("writing %s Tecplot data to <%s>\n",
        ascii ? "ASCII" : "binary", argv[++argind]);
    if (NULL == (fp = fopen (argv[argind], ascii ? "w+" : "w+b")))
        FATAL (NULL, "couldn't open Tecplot output file");

    /* write file header */

    if (ascii)
        fprintf (fp, "TITLE = \"%s\"\n", title);
    else {
        fwrite ("#!TDV75 ", 1, 8, fp);
        i = 1;
        write_ints (fp, 1, &i);
        write_string (fp, title);
    }

    /* write variables */

    if (ascii) {
        fprintf (fp, "VARIABLES = \"X\", \"Y\", \"Z\"");
        if (usesol) {
            sol = Zones->sols;
            for (n = 0; n < sol->nflds; n++)
                fprintf (fp, ",\n\"%s\"", sol->flds[n].name);
        }
    }
    else {
        write_ints (fp, 1, &nv);
        write_string (fp, "X");
        write_string (fp, "Y");
        write_string (fp, "Z");
        if (usesol) {
            sol = Zones->sols;
            for (n = 0; n < sol->nflds; n++)
                write_string (fp, sol->flds[n].name);
        }
    }

    /* write zones */

    if (!ascii) {
        for (nz = 0; nz < nZones; nz++) {
            if (Zones[nz].type == CGNS_ENUMV(Structured)) {
                idata[0] = 0;          /* BLOCK */
                idata[1] = -1;         /* color not specified */
                idata[2] = (int)Zones[nz].dim[0];
                idata[3] = (int)Zones[nz].dim[1];
                idata[4] = (int)Zones[nz].dim[2];
            }
            else {
                count_elements (nz, &ne, &type);
                idata[0] = 2;          /* FEBLOCK */
                idata[1] = -1;         /* color not specified */
                idata[2] = (int)Zones[nz].dim[0];
                idata[3] = (int)ne;
                idata[4] = type;
            }
            value = 299.0;
            write_floats (fp, 1, &value);
            write_string (fp, Zones[nz].name);
            write_ints (fp, 5, idata);
        }
        value = 357.0;
        write_floats (fp, 1, &value);
    }

    for (nz = 0; nz < nZones; nz++) {
        printf ("  zone %d...", nz+1);
        fflush (stdout);
        read_zone_grid (nz+1);
        ne = 0;
        type = 2;
        nn = (int)Zones[nz].nverts;
        var = (float *) malloc (nn * sizeof(float));
        if (NULL == var)
            FATAL (NULL, "malloc failed for temp float array");
        if (Zones[nz].type == CGNS_ENUMV(Unstructured))
            elems = volume_elements (nz, &ne, &type);

        if (ascii) {
            if (Zones[nz].type == CGNS_ENUMV(Structured))
                fprintf (fp, "\nZONE T=\"%s\", I=%d, J=%d, K=%d, F=BLOCK\n",
                    Zones[nz].name, (int)Zones[nz].dim[0],
                    (int)Zones[nz].dim[1], (int)Zones[nz].dim[2]);
            else
                fprintf (fp, "\nZONE T=\"%s\", N=%d, E=%d, F=FEBLOCK, ET=%s\n",
                    Zones[nz].name, nn, (int)ne, type == 2 ? "TETRAHEDRON" : "BRICK");
        }
        else {
            value = 299.0;
            write_floats (fp, 1, &value);
            i = 0;
            write_ints (fp, 1, &i);
            i = 1;
            for (n = 0; n < nv; n++)
                write_ints (fp, 1, &i);
        }

        for (n = 0; n < nn; n++)
            var[n] = (float)Zones[nz].verts[n].x;
        write_floats (fp, nn, var);
        for (n = 0; n < nn; n++)
            var[n] = (float)Zones[nz].verts[n].y;
        write_floats (fp, nn, var);
        for (n = 0; n < nn; n++)
            var[n] = (float)Zones[nz].verts[n].z;
        write_floats (fp, nn, var);

        if (usesol) {
            read_solution_field (nz+1, usesol, 0);
            sol = &Zones[nz].sols[usesol-1];
            if (sol->location != CGNS_ENUMV(Vertex))
                cell_vertex_solution (nz+1, usesol, weighting);
            for (nv = 0; nv < sol->nflds; nv++) {
                for (n = 0; n < nn; n++)
                    var[n] = (float)sol->flds[nv].data[n];
                write_floats (fp, nn, var);
            }
        }

        free (var);

        if (Zones[nz].type == CGNS_ENUMV(Unstructured)) {
            if (!ascii) {
                i = 0;
                write_ints (fp, 1, &i);
            }
            nn = 1 << type;
            for (i = 0, n = 0; n < ne; n++, i += nn)
                write_ints (fp, nn, &elems[i]);
            free (elems);
        }
        puts ("done");
    }

    fclose (fp);
    cg_close (cgnsfn);
    return 0;
}
示例#18
0
int testing_dsyr2k(int argc, char **argv)
{
    /* Check for number of arguments*/
    if ( argc != 7 ){
        USAGE("SYR2K", "alpha beta M N LDA LDB LDC",
              "   - alpha : alpha coefficient\n"
              "   - beta : beta coefficient\n"
              "   - N : number of columns and rows of matrix C and number of row of matrix A and B\n"
              "   - K : number of columns of matrix A and B\n"
              "   - LDA : leading dimension of matrix A\n"
              "   - LDB : leading dimension of matrix B\n"
              "   - LDC : leading dimension of matrix C\n");
        return -1;
    }

    double alpha = (double) atol(argv[0]);
    double beta  = (double) atol(argv[1]);
    int N     = atoi(argv[2]);
    int K     = atoi(argv[3]);
    int LDA   = atoi(argv[4]);
    int LDB   = atoi(argv[5]);
    int LDC   = atoi(argv[6]);
    int NKmax = max(N, K);
    int NminusOne = N - 1;

    double eps;
    int info_solution;
    int info, u, t;
    size_t LDAxK = LDA*NKmax;
    size_t LDBxK = LDB*NKmax;
    size_t LDCxN = LDC*N;

    double *A      = (double *)malloc(LDAxK*sizeof(double));
    double *B      = (double *)malloc(LDBxK*sizeof(double));
    double *C      = (double *)malloc(LDCxN*sizeof(double));
    double *Cinit  = (double *)malloc(LDCxN*sizeof(double));
    double *Cfinal = (double *)malloc(LDCxN*sizeof(double));
    double *WORK   = (double *)malloc(2*LDC*sizeof(double));
    double             *D      = (double *)            malloc(LDC  *sizeof(double));

    /* Check if unable to allocate memory */
    if ( (!A) || (!B) || (!Cinit) || (!Cfinal) || (!D) ){
        printf("Out of Memory \n ");
        return -2;
    }

    eps = LAPACKE_dlamch_work('e');

    printf("\n");
    printf("------ TESTS FOR PLASMA DSYR2K ROUTINE -------  \n");
    printf("            Size of the Matrix C %d by %d\n", N, K);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 10.\n");

    /*----------------------------------------------------------
    *  TESTING DSYR2K
    */

    /* Initialize A,B */
    LAPACKE_dlarnv_work(IONE, ISEED, LDAxK, A);
    LAPACKE_dlarnv_work(IONE, ISEED, LDBxK, B);

    /* Initialize C */
    LAPACKE_dlarnv_work(IONE, ISEED, LDC, D);
    dlagsy(&N, &NminusOne, D, C, &LDC, ISEED, WORK, &info);
    free(D); free(WORK);

    for (u=0; u<2; u++) {
        for (t=0; t<2; t++) {

            memcpy(Cinit,  C, LDCxN*sizeof(double));
            memcpy(Cfinal, C, LDCxN*sizeof(double));
            
            /* PLASMA DSYR2K */
            PLASMA_dsyr2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC);

            /* Check the solution */
            info_solution = check_solution(uplo[u], trans[t], N, K, 
                                           alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC);

            if (info_solution == 0) {
                printf("***************************************************\n");
                printf(" ---- TESTING DSYR2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]);
                printf("***************************************************\n");
            }
            else {
                printf("************************************************\n");
                printf(" - TESTING DSYR2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]);
                printf("************************************************\n");
            }
        }
    }

    free(A); free(B); free(C);
    free(Cinit); free(Cfinal);

    return 0;
}
示例#19
0
int main ()
{

    int cores = 2;
    int N     = 10;
    int LDA   = 10;
    int NRHS  = 5;
    int LDB   = 10;
    int info;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1));
    PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2));
    PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1));
    PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2));
    PLASMA_desc *L;
    int *IPIV;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)){
        printf("Out of Memory \n ");
        return EXIT_SUCCESS;
    }

    /*Plasma Initialize*/
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Initialize A1 and A2 Matrix */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];


    /* Allocate L and IPIV */
    info = PLASMA_Alloc_Workspace_zgetrf_incpiv(N, N, &L, &IPIV);

    /* LU factorization of the matrix A */
    info = PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV);

    /* Solve the problem */
    info = PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB);
    info = PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, 
                        N, NRHS, (PLASMA_Complex64_t)1.0, A2, LDA, B2, LDB);

    /* Check the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
       printf("-- Error in ZGETRS example ! \n");
    else
       printf("-- Run of ZGETRS example successful ! \n");

    free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L);

    PLASMA_Finalize();

    return EXIT_SUCCESS;
}
示例#20
0
int main(int argc, char** argv) {
    double res, resAtr, resFac;

    El::Initialize(argc, argv);

    bmpi::communicator world;
    int rank = world.rank();

    skybase::context_t context(23234);

    // Setup problem and righthand side
    // Using Skylark's uniform generator (as opposed to Elemental's)
    // will insure the same A and b are generated regardless of the number
    // of processors.
    matrix_type A =
        skyutil::uniform_matrix_t<matrix_type>::generate(m,
            n, El::DefaultGrid(), context);
    matrix_type b =
        skyutil::uniform_matrix_t<matrix_type>::generate(m,
            1, El::DefaultGrid(), context);

    regression_problem_type problem(m, n, A);

    boost::mpi::timer timer;
    double telp;

    sol_type x(n,1);

    rhs_type r(b);

    // Using QR
    timer.restart();
    exact_solver_type<skyalg::qr_l2_solver_tag> exact_solver(problem);
    exact_solver.solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Exact (QR):\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
    double res_opt = res;

    skybase::Gemv(El::NORMAL, -1.0, problem.input_matrix, x, 1.0, r);

    // Using SNE (semi-normal equations)
    timer.restart();
    exact_solver_type<skyalg::sne_l2_solver_tag>(problem).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Exact (SNE):\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
    res_opt = res;

    // Again, using SNE, only with the computed interface (example; to be removed.)
    cmatrix CA(A);
    regression_problem_type1 problem1(m, n, CA);
    timer.restart();
    exact_solver_type1<skyalg::sne_l2_solver_tag>(problem1).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Exact (SNE) (COMPUTED):\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
    res_opt = res;

    // Using SVD
    timer.restart();
    exact_solver_type<skyalg::svd_l2_solver_tag>(problem).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Exact (SVD):\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << "\t\t\t\t\t\t\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
    res_opt = res;

    // Using LSQR
    skyalg::krylov_iter_params_t lsqrparams;
    lsqrparams.am_i_printing = rank == 0;
    lsqrparams.log_level = 0;
    timer.restart();
    exact_solver_type<
        skyalg::iterative_l2_solver_tag<
            skyalg::lsqr_tag > >(problem, lsqrparams)
        .solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Exact (LSQR):\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << "\t\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    // Using sketch-and-solve

#if 0 
    timer.restart();
    sketched_solver_type<skysk::JLT_t>(problem, t, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Sketch-and-Solve (JLT):\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
#endif

    timer.restart();
    sketched_solver_type<skysk::CWT_t>(problem, t, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Sketch-and-Solve (CWT):\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    timer.restart();
    sketched_solver_type<skysk::FJLT_t>(problem, t, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Sketch-and-Solve (FJLT):\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    // Accelerate-using-sketching
#if 0
    timer.restart();
    accelerated_exact_solver_type_sb<skysk::JLT_t>(problem, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Simplified Blendenpik (JLT):\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;
#endif

    timer.restart();
    accelerated_exact_solver_type_sb<skysk::FJLT_t>(problem, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Simplified Blendenpik (FJLT):\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    timer.restart();
    accelerated_exact_solver_type_sb<skysk::CWT_t>(problem, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Simplified Blendenpik (CWT):\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    timer.restart();
    accelerated_exact_solver_type_blendenpik(problem, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "Blendenpik:\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    timer.restart();
    accelerated_exact_solver_type_lsrn(problem, context).solve(b, x);
    telp = timer.elapsed();
    check_solution(problem, b, x, r, res, resAtr, resFac);
    if (rank == 0)
        std::cout << "LSRN:\t\t\t\t||r||_2 =  "
                  << boost::format("%.2f") % res
                  << " (x " << boost::format("%.5f") % (res / res_opt) << ")"
                  << "\t||r - r*||_2 / ||b - r*||_2 = " << boost::format("%.2e") % resFac
                  << "\t||A' * r||_2 = " << boost::format("%.2e") % resAtr
                  << "\t\tTime: " << boost::format("%.2e") % telp << " sec"
                  << std::endl;

    return 0;
}
示例#21
0
int main ()
{

    int cores = 2;
    int M     = 15;
    int N     = 10;
    int LDA   = 15;
    int NRHS  = 5;
    int LDB   = 15;

    int info;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    double *A1 = (double *)malloc(LDA*N*sizeof(double));
    double *A2 = (double *)malloc(LDA*N*sizeof(double));
    double *B1 = (double *)malloc(LDB*NRHS*sizeof(double));
    double *B2 = (double *)malloc(LDB*NRHS*sizeof(double));
    double *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Allocate T */
    PLASMA_Alloc_Workspace_dgeqrf(M, N, &T);

    /* Initialize A1 and A2 */
    LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1);
    for (i = 0; i < M; i++)
        for (j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i] ;

    /* Initialize B1 and B2 */
    LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for (i = 0; i < M; i++)
        for (j = 0; j < NRHS; j++)
             B2[LDB*j+i] = B1[LDB*j+i] ;

    /* Factorization QR of the matrix A2 */
    info = PLASMA_dgeqrf(M, N, A2, LDA, T);

    /* Solve the problem */
    info = PLASMA_dgeqrs(M, N, NRHS, A2, LDA, T, B2, LDB);

    /* Check the solution */
    info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
       printf("-- Error in DGEQRS example ! \n");
    else
       printf("-- Run of DGEQRS example successful ! \n");

    free(A1); free(A2); free(B1); free(B2); free(T);

    PLASMA_Finalize();

    exit(0);
}
示例#22
0
int testing_dsposv(int argc, char **argv)
{
    /* Check for number of arguments*/
    if (argc != 4){
        USAGE("CPOSV", "N LDA NRHS LDB",
              "   - N    : the size of the matrix\n"
              "   - LDA  : leading dimension of the matrix A\n"
              "   - NRHS : number of RHS\n"
              "   - LDB  : leading dimension of the RHS B\n");
        return -1;
    }

    int N    = atoi(argv[0]);
    int LDA  = atoi(argv[1]);
    int NRHS = atoi(argv[2]);
    int LDB  = atoi(argv[3]);
    int ITER;
    double eps;
    int uplo;
    int info;
    int info_solution = 0; /*, info_factorization;*/
    int i,j;
    int NminusOne = N-1;
    int LDBxNRHS = LDB*NRHS;

    double *A1   = (double *)malloc(LDA*N   *sizeof(double));
    double *A2   = (double *)malloc(LDA*N   *sizeof(double));
    double *B1   = (double *)malloc(LDB*NRHS*sizeof(double));
    double *B2   = (double *)malloc(LDB*NRHS*sizeof(double));
    double *WORK = (double *)malloc(2*LDA   *sizeof(double));
    double *D                = (double *)malloc(LDA*sizeof(double));

    /* Check if unable to allocate memory */
    if ( (!A1) || (!A2) || (!B1) || (!B2) ){
        printf("Out of Memory \n ");
        exit(0);
    }

    eps = LAPACKE_dlamch_work('e');

    /*-------------------------------------------------------------
    *  TESTING DSPOSV
    */

    /* Initialize A1 and A2 for Symmetric Positif Matrix (Hessenberg in the complex case) */
    LAPACKE_dlarnv_work(IONE, ISEED, LDA, D);
    dlagsy(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info);
    free(D);

    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    for ( i = 0; i < N; i++){
        A1[LDA*i+i] = A1[LDA*i+i] + N ;
        A2[LDA*i+i] = A1[LDA*i+i];
    }

    /* Initialize B1 and B2 */
    LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    printf("\n");
    printf("------ TESTS FOR PLASMA DSPOSV ROUTINE ------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n", eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /* PLASMA DSPOSV */
    uplo = PlasmaLower;
    info = PLASMA_dsposv(uplo, N, NRHS, A2, LDA, B1, LDB, B2, LDB, &ITER);

    if (info != PLASMA_SUCCESS ) {
        printf("PLASMA_dsposv is not completed: info = %d\n", info);
        info_solution = 1;
    } else {
        printf(" Solution obtained with %d iterations\n", ITER);

        /* Check the factorization and the solution */
        info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
    }
    
    if (info_solution == 0){
        printf("***************************************************\n");
        printf(" ---- TESTING DSPOSV ..................... PASSED !\n");
        printf("***************************************************\n");
    }
    else{
        printf("***************************************************\n");
        printf(" - TESTING DSPOSV .. FAILED !\n");
        printf("***************************************************\n");
    }

    free(A1); free(A2); free(B1); free(B2); free(WORK);
    
    return 0;
}
示例#23
0
int testing_zsymm(int argc, char **argv)
{
    /* Check for number of arguments*/
    if ( argc != 7 ){
        USAGE("SYMM", "alpha beta M N K LDA LDB LDC",
              "   - alpha : alpha coefficient \n"
              "   - beta : beta coefficient \n"
              "   - M : number of rows of matrices A and C \n"
              "   - N : number of columns of matrices B and C \n"
              "   - LDA : leading dimension of matrix A \n"
              "   - LDB : leading dimension of matrix B \n"
              "   - LDC : leading dimension of matrix C\n");
        return -1;
    }

    PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]);
    PLASMA_Complex64_t beta  = (PLASMA_Complex64_t) atol(argv[1]);
    int M     = atoi(argv[2]);
    int N     = atoi(argv[3]);
    int LDA   = atoi(argv[4]);
    int LDB   = atoi(argv[5]);
    int LDC   = atoi(argv[6]);
    int MNmax = max(M, N);

    double eps;
    int info_solution;
    int i, j, s, u;
    int LDAxM = LDA*MNmax;
    int LDBxN = LDB*N;
    int LDCxN = LDC*N;

    PLASMA_Complex64_t *A      = (PLASMA_Complex64_t *)malloc(LDAxM*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *B      = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *C      = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *Cinit  = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ((!A)||(!B)||(!Cinit)||(!Cfinal)){
        printf("Out of Memory \n ");
        return -2;
    }

    eps = LAPACKE_dlamch_work('e');

    printf("\n");
    printf("------ TESTS FOR PLASMA ZSYMM ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", M, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 10.\n");

    /*----------------------------------------------------------
    *  TESTING ZSYMM
    */

    /* Initialize A */
    PLASMA_zplgsy( (double)0., MNmax, A, LDA, 51 );

    /* Initialize B */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B);

    /* Initialize C */
    LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C);

    for (s=0; s<2; s++) {
        for (u=0; u<2; u++) {

            /* Initialize  Cinit / Cfinal */
            for ( i = 0; i < M; i++)
                for (  j = 0; j < N; j++)
                    Cinit[LDC*j+i] = C[LDC*j+i];
            for ( i = 0; i < M; i++)
                for (  j = 0; j < N; j++)
                    Cfinal[LDC*j+i] = C[LDC*j+i];

            /* PLASMA ZSYMM */
            PLASMA_zsymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC);

            /* Check the solution */
            info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC);

            if (info_solution == 0) {
                printf("***************************************************\n");
                printf(" ---- TESTING ZSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]);
                printf("***************************************************\n");
            }
            else {
                printf("************************************************\n");
                printf(" - TESTING ZSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]);
                printf("************************************************\n");
            }
        }
    }

    free(A); free(B); free(C);
    free(Cinit); free(Cfinal);

    return 0;
}
示例#24
0
bool Hungarian::solve()
{
	int i, j, m, n, k, l, s, t, q, unmatched, cost;

	m = m_rows;
	n = m_cols;

	int INF = std::numeric_limits<int>::max();

	//vertex alternating paths,
	vector<int> col_vertex(m), row_vertex(n), unchosen_row(m), parent_row(n),
				row_dec(m),  col_inc(n),  slack_row(m),    slack(n);

	cost=0;

	for (i=0;i<m_rows;i++) 
	{
		col_vertex[i]=0;
		unchosen_row[i]=0;
		row_dec[i]=0;
		slack_row[i]=0;
	}

	for (j=0;j<m_cols;j++) 
	{
		row_vertex[j]=0;
		parent_row[j] = 0;
		col_inc[j]=0;
		slack[j]=0;
	}

	//Double check assignment matrix is 0
	m_assignment.assign(m, vector<int>(n, HUNGARIAN_NOT_ASSIGNED));

  // Begin subtract column minima in order to start with lots of zeroes 12
	if (verbose)
	{
		fprintf(stderr, "Using heuristic\n");
	}

	for (l=0;l<n;l++)
	{
		s = m_costmatrix[0][l];

		for (k=1;k<m;k++) 
		{
			if (m_costmatrix[k][l] < s)
			{
				s=m_costmatrix[k][l];
			}
			cost += s;
		}

		if (s!=0)
		{
			for (k=0;k<m;k++)
			{
				m_costmatrix[k][l]-=s;
			}
		}

		//pre-initialize state 16
		row_vertex[l]= -1;
		parent_row[l]= -1;
		col_inc[l]=0;
		slack[l]=INF;
	}
  // End subtract column minima in order to start with lots of zeroes 12

  // Begin initial state 16
	t=0;

	for (k=0;k<m;k++)
	{
		bool row_done = false;
		s=m_costmatrix[k][0];

		for (l=0;l<n;l++)
		{

			if(l > 0)
			{
				if (m_costmatrix[k][l] < s)
				{
					s = m_costmatrix[k][l];
				}
				row_dec[k]=s;
			}

			if (s == m_costmatrix[k][l] && row_vertex[l]<0)
				{
					col_vertex[k]=l;
					row_vertex[l]=k;

					if (verbose)
					{
						fprintf(stderr, "matching col %d==row %d\n",l,k);
					}
					row_done = true;
					break;
				}
		}

		if(!row_done)
		{
			col_vertex[k]= -1;

			if (verbose)
			{
				fprintf(stderr, "node %d: unmatched row %d\n",t,k);
			}
		
			unchosen_row[t++]=k;
		}

	}
  // End initial state 16

	bool checked = false;

  // Begin Hungarian algorithm 18

	//is matching already complete?
	if (t == 0)
	{
		checked = check_solution(row_dec, col_inc, col_vertex);
		if (checked)
		{
			//finish assignment, wrap up and done.
			bool assign = assign_solution(row_dec, col_inc, col_vertex);
			return true;
		}
		else
		{
			if(verbose)
			{
				fprintf(stderr, "Could not solve. Error.\n");
			}
			return false;
		}
	}

	unmatched=t;


	while (1)
	{
		if (verbose)
		{
			fprintf(stderr, "Matched %d rows.\n",m-t);
		}
		q=0;
		bool try_matching;
		while (1)
		{
			while (q<t)
			{
			// Begin explore node q of the forest 19
				
				k=unchosen_row[q];
				s=row_dec[k];
				for (l=0;l<n;l++)
				{
					if (slack[l])
					{
						int del;
						del=m_costmatrix[k][l]-s+col_inc[l];
						if (del<slack[l])
						{
							if (del==0)
							{
								if (row_vertex[l]<0)
								{
									goto breakthru;
								}
								slack[l]=0;
								parent_row[l]=k;
								if (verbose){
									fprintf(stderr, "node %d: row %d==col %d--row %d\n",
										t,row_vertex[l],l,k);}
								unchosen_row[t++]=row_vertex[l];
							}
							else
							{
								slack[l]=del;
								slack_row[l]=k;
							}
						}
					}
				}
			// End explore node q of the forest 19
				q++;	
			}
 
	  // Begin introduce a new zero into the matrix 21
		s=INF;
		for (l=0;l<n;l++)
		{
			if (slack[l] && slack[l]<s)
			{
				s=slack[l];
			}
		}
		for (q=0;q<t;q++)
		{
			row_dec[unchosen_row[q]]+=s;
		}
		for (l=0;l<n;l++)
		{
			//check slack
			if (slack[l])
			{
				slack[l]-=s;
				if (slack[l]==0)
				{
					// Begin look at a new zero 22
					k=slack_row[l];
					if (verbose)
					{
						fprintf(stderr, 
						"Decreasing uncovered elements by %d produces zero at [%d,%d]\n",
						s,k,l);
					}
					if (row_vertex[l]<0)
					{
						for (j=l+1;j<n;j++)
							if (slack[j]==0)
							{
								col_inc[j]+=s;
							}

						goto breakthru;
					}
					else
					{
						parent_row[l]=k;
						if (verbose)
							{ fprintf(stderr, "node %d: row %d==col %d--row %d\n",t,row_vertex[l],l,k);}
						unchosen_row[t++]=row_vertex[l];
						
					}
		// End look at a new zero 22
				}
			}
			else
			{
				col_inc[l]+=s;
			}
		}
	// End introduce a new zero into the matrix 21
	}

    breakthru:
      // Begin update the matching 20
		if (verbose)
		{
			fprintf(stderr, "Breakthrough at node %d of %d!\n",q,t);
		}
		while (1)
		{
			j=col_vertex[k];
			col_vertex[k]=l;
			row_vertex[l]=k;
			if (verbose)
			{
				fprintf(stderr, "rematching col %d==row %d\n",l,k);
			}
			if (j<0)
			{
				break;
			}
			k=parent_row[j];
			l=j;
		}
		// End update the matching 20
		if (--unmatched == 0)
		{
			checked = check_solution(row_dec, col_inc, col_vertex);
			if (checked)
			{
				//finish assignment, wrap up and done.
				bool assign = assign_solution(row_dec, col_inc, col_vertex);
				return true;
			}
			else
			{
				if(verbose)
				{
					fprintf(stderr, "Could not solve. Error.\n");
				}
				return false;
			}
		}
		
		// Begin get ready for another stage 17
			t=0;
			for (l=0;l<n;l++)
			{
				parent_row[l]= -1;
				slack[l]=INF;
			}
			for (k=0;k<m;k++)
			{
				if (col_vertex[k]<0)
				{
					if (verbose)
					{ fprintf(stderr, "node %d: unmatched row %d\n",t,k);}
					unchosen_row[t++]=k;
				}
			}
		// End get ready for another stage 17
	}// back to while loop


}
示例#25
0
int main() {

  igraph_sparsemat_t A, B, C;
  igraph_vector_t b, x;
  long int i;

  /* lsolve */

#define DIM 10
#define EDGES (DIM*DIM/6)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int r=RNG_INTEGER(0, DIM-1);
    long int c=RNG_INTEGER(0, r);
    igraph_real_t value=RNG_INTEGER(1,5);
    igraph_sparsemat_entry(&A, r, c, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);
  
  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {    
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_lsolve(&B, &b, &x);

  if (! check_solution(&B, &x, &b)) { return 1; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&B);

#undef DIM
#undef EDGES

  /* ltsolve */

#define DIM 10
#define EDGES (DIM*DIM/6)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int r=RNG_INTEGER(0, DIM-1);
    long int c=RNG_INTEGER(0, r);
    igraph_real_t value=RNG_INTEGER(1,5);
    igraph_sparsemat_entry(&A, r, c, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);
  
  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {    
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_ltsolve(&B, &b, &x);

  igraph_sparsemat_transpose(&B, &A, /*values=*/ 1);
  if (! check_solution(&A, &x, &b)) { return 2; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&B);
  igraph_sparsemat_destroy(&A);

#undef DIM
#undef EDGES

  /* usolve */

#define DIM 10
#define EDGES (DIM*DIM/6)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int r=RNG_INTEGER(0, DIM-1);
    long int c=RNG_INTEGER(0, r);
    igraph_real_t value=RNG_INTEGER(1,5);
    igraph_sparsemat_entry(&A, r, c, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);
  igraph_sparsemat_transpose(&B, &A, /*values=*/ 1);
  
  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {    
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_usolve(&A, &b, &x);

  if (! check_solution(&A, &x, &b)) { return 3; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&B);
  igraph_sparsemat_destroy(&A);

#undef DIM
#undef EDGES  

  /* utsolve */

#define DIM 10
#define EDGES (DIM*DIM/6)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int r=RNG_INTEGER(0, DIM-1);
    long int c=RNG_INTEGER(0, r);
    igraph_real_t value=RNG_INTEGER(1,5);
    igraph_sparsemat_entry(&A, r, c, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);
  igraph_sparsemat_transpose(&B, &A, /*values=*/ 1);
  igraph_sparsemat_destroy(&B);
  
  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {    
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_utsolve(&A, &b, &x);

  igraph_sparsemat_transpose(&A, &B, /*values=*/ 1);
  if (! check_solution(&B, &x, &b)) { return 4; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&B);
  igraph_sparsemat_destroy(&A);

#undef DIM
#undef EDGES  

  /* cholsol */
  /* We need a positive definite matrix, so we create a full-rank
     matrix first and then calculate A'A, which will be positive
     definite. */

#define DIM 10
#define EDGES (DIM*DIM/6)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int from=RNG_INTEGER(0, DIM-1);
    long int to=RNG_INTEGER(0, DIM-1);
    igraph_real_t value=RNG_INTEGER(1, 5);
    igraph_sparsemat_entry(&A, from, to, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);
  igraph_sparsemat_transpose(&B, &A, /*values=*/ 1);
  igraph_sparsemat_multiply(&A, &B, &C);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_destroy(&B);
  
  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_cholsol(&C, &b, &x, /*order=*/ 0);
  
  if (! check_solution(&C, &x, &b)) { return 5; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&C);

#undef DIM
#undef EDGES

  /* lusol */

#define DIM 10
#define EDGES (DIM*DIM/4)
  igraph_sparsemat_init(&A, DIM, DIM, EDGES+DIM);
  for (i=0; i<DIM; i++) {
    igraph_sparsemat_entry(&A, i, i, RNG_INTEGER(1,3));
  }
  for (i=0; i<EDGES; i++) {
    long int from=RNG_INTEGER(0, DIM-1);
    long int to=RNG_INTEGER(0, DIM-1);
    igraph_real_t value=RNG_INTEGER(1, 5);
    igraph_sparsemat_entry(&A, from, to, value);
  }
  igraph_sparsemat_compress(&A, &B);
  igraph_sparsemat_destroy(&A);
  igraph_sparsemat_dupl(&B);

  igraph_vector_init(&b, DIM);
  for (i=0; i<DIM; i++) {
    VECTOR(b)[i] = RNG_INTEGER(1,10);
  }

  igraph_vector_init(&x, DIM);
  igraph_sparsemat_lusol(&B, &b, &x, /*order=*/ 0, /*tol=*/ 1e-10);
  
  if (! check_solution(&B, &x, &b)) { return 6; }

  igraph_vector_destroy(&b);
  igraph_vector_destroy(&x);
  igraph_sparsemat_destroy(&B);

#undef DIM
#undef EDGES

  return 0;
}
示例#26
0
int testing_dtrmm(int argc, char **argv)
{
    /* Check for number of arguments*/
    if ( argc != 5 ) {
        USAGE("TRMM", "alpha M N LDA LDB",
              "   - alpha  : alpha coefficient\n"
              "   - M      : number of rows of matrices B\n"
              "   - N      : number of columns of matrices B\n"
              "   - LDA    : leading dimension of matrix A\n"
              "   - LDB    : leading dimension of matrix B\n");
        return -1;
    }

    double alpha = (double) atol(argv[0]);
    int M     = atoi(argv[1]);
    int N     = atoi(argv[2]);
    int LDA   = atoi(argv[3]);
    int LDB   = atoi(argv[4]);

    double eps;
    int info_solution;
    int s, u, t, d, i;
    int LDAxM = LDA*max(M,N);
    int LDBxN = LDB*max(M,N);

    double *A      = (double *)malloc(LDAxM*sizeof(double));
    double *B      = (double *)malloc(LDBxN*sizeof(double));
    double *Binit  = (double *)malloc(LDBxN*sizeof(double));
    double *Bfinal = (double *)malloc(LDBxN*sizeof(double));

    /* Check if unable to allocate memory */
    if ( (!A) || (!B) || (!Binit) || (!Bfinal)){
        printf("Out of Memory \n ");
        return -2;
    }

    eps = LAPACKE_dlamch_work('e');

    printf("\n");
    printf("------ TESTS FOR PLASMA DTRMM ROUTINE -------  \n");
    printf("            Size of the Matrix B : %d by %d\n", M, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 10.\n");

    /*----------------------------------------------------------
     *  TESTING DTRMM
     */

    /* Initialize A, B, C */
    LAPACKE_dlarnv_work(IONE, ISEED, LDAxM, A);
    LAPACKE_dlarnv_work(IONE, ISEED, LDBxN, B);
    for(i=0;i<max(M,N);i++)
      A[LDA*i+i] = A[LDA*i+i] + 2.0;

    for (s=0; s<2; s++) {
        for (u=0; u<2; u++) {
#ifdef COMPLEX
            for (t=0; t<3; t++) {
#else
            for (t=0; t<2; t++) {
#endif
                for (d=0; d<2; d++) {

                    memcpy(Binit,  B, LDBxN*sizeof(double));
                    memcpy(Bfinal, B, LDBxN*sizeof(double));

                    /* PLASMA DTRMM */
                    PLASMA_dtrmm(side[s], uplo[u], trans[t], diag[d],
                                 M, N, alpha, A, LDA, Bfinal, LDB);

                    /* Check the solution */
                    info_solution = check_solution(side[s], uplo[u], trans[t], diag[d],
                                                   M, N, alpha, A, LDA, Binit, Bfinal, LDB);

                    printf("***************************************************\n");
                    if (info_solution == 0) {
                        printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ...... PASSED !\n",
                               sidestr[s], uplostr[u], transstr[t], diagstr[d]);
                    }
                    else {
                        printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ... FAILED !\n",
                               sidestr[s], uplostr[u], transstr[t], diagstr[d]);
                    }
                    printf("***************************************************\n");
                }
            }
        }
    }

    free(A); free(B);
    free(Binit); free(Bfinal);

    return 0;
}

/*--------------------------------------------------------------
 * Check the solution
 */
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag,
                          int M, int N, double alpha,
                          double *A, int LDA,
                          double *Bref, double *Bplasma, int LDB)
{
    int info_solution;
    double Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result;
    double eps;
    double mzone = (double)-1.0;

    double *work = (double *)malloc(max(M, N)* sizeof(double));
    int Am, An;

    if (side == PlasmaLeft) {
        Am = M; An = M;
    } else {
        Am = N; An = N;
    }

    Anorm       = LAPACKE_dlantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag),
                                Am, An, A, LDA, work);
    Binitnorm   = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref,    LDB, work);
    Bplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work);

    cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
                (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB);

    Blapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work);

    cblas_daxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1);

    Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work);

    eps = LAPACKE_dlamch_work('e');

    printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n",
           Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm);

    result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps);

    printf("============\n");
    printf("Checking the norm of the difference against reference DTRMM \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result);

    if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
    }
    else {
        printf("-- The solution is CORRECT ! \n");
        info_solution= 0 ;
    }
    free(work);

    return info_solution;
}
示例#27
0
int testing_zgesv_incpiv(int argc, char **argv)
{
    /* Check for valid arguments*/
    if (argc != 4){
        USAGE("GESV_INCPIV", "N LDA NRHS LDB",
              "   - N    : the size of the matrix\n"
              "   - LDA  : leading dimension of the matrix A\n"
              "   - NRHS : number of RHS\n"
              "   - LDB  : leading dimension of the matrix B\n");
        return -1;
    }

    int N     = atoi(argv[0]);
    int LDA   = atoi(argv[1]);
    int NRHS  = atoi(argv[2]);
    int LDB   = atoi(argv[3]);
    double eps;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1));
    PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2));
    PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1));
    PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2));
    PLASMA_Complex64_t *L;
    int *IPIV;

    /* Check if unable to allocate memory */
    if ( (!A1) || (!A2)|| (!B1) || (!B2) ) {
        printf("Out of Memory \n ");
        return -2;
    }

    eps = BLAS_dfpinfo(blas_eps);

    /*----------------------------------------------------------
    *  TESTING ZGESV
    */

    /* Initialize A1 and A2 Matrix */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* PLASMA ZGESV */
    PLASMA_Alloc_Workspace_zgesv_incpiv(N, &L, &IPIV);
    PLASMA_zgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA INCPIV ZGESV ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n", eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /* Check the factorization and the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);

    if ((info_solution == 0)){
        printf("***************************************************\n");
        printf(" ---- TESTING INCPIV ZGESV ............... PASSED !\n");
        printf("***************************************************\n");
    }
    else{
        printf("************************************************\n");
        printf(" - TESTING INCPIV ZGESV ... FAILED !\n");
        printf("************************************************\n");
    }

    /*-------------------------------------------------------------
    *  TESTING ZGETRF + ZGETRS
    */

    /* Initialize A1 and A2  */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* Plasma routines */
    PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV);
    PLASMA_zgetrs_incpiv(PlasmaNoTrans, N, NRHS, A2, LDA, L, IPIV, B2, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA ZGETRF + ZGETRS ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n", eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /* Check the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);

    if ((info_solution == 0)){
        printf("***************************************************\n");
        printf(" ---- TESTING INCPIV ZGETRF + ZGETRS ..... PASSED !\n");
        printf("***************************************************\n");
    }
    else{
        printf("***************************************************\n");
        printf(" - TESTING INCPIV ZGETRF + ZGETRS ... FAILED !\n");
        printf("***************************************************\n");
    }

    /*-------------------------------------------------------------
    *  TESTING ZGETRF + ZTRSMPL + ZTRSM
    */

    /* Initialize A1 and A2  */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* PLASMA routines */
    PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV);
    PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB);
    PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit,
                 N, NRHS, 1.0, A2, LDA, B2, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA INCPIV ZGETRF + ZTRSMPL + ZTRSM  ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n", eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /* Check the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);

    if ((info_solution == 0)){
        printf("***************************************************\n");
        printf(" ---- TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... PASSED !\n");
        printf("***************************************************\n");
    }
    else{
        printf("**************************************************\n");
        printf(" - TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... FAILED !\n");
        printf("**************************************************\n");
    }

    free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L);

    return 0;
}
示例#28
0
int main(int argc, char* argv[])
{
	int seed,i;

	//cout<<"c This is NuMVC, a local search solver for the Minimum Vertex Cover (and also Maximum Independent Set) problem."<<endl;
	
	if(build_instance(argv[1])!=1){
		cout<<"can't open instance file"<<endl;
		return -1;
	}
		optimal_size=0;
		i=2;
		//sscanf(argv[i++],"%d",&cand_count);//if you want to stop the algorithm only cutoff time is reached, set optimal_size to 0.
		//sscanf(argv[i++],"%d",&edge_cand);
		sscanf(argv[i++],"%d",&seed);
		sscanf(argv[i++],"%d",&cutoff_time);

	
		srand(seed);

		//cout<<seed<<' ';
		//cout<<argv[1]<<' ';
		
		times(&start);
		start_time = start.tms_utime + start.tms_stime;

    	init_sol();

#ifdef individual_analysis_on_init_sls_mode
		times(&finish);
		init_time = double(finish.tms_utime - start.tms_utime + finish.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK);
		init_time = round(init_time * 100)/100.0;
#endif

		//if(c_size + uncov_stack_fill_pointer > optimal_size ) 
		//{
			//cout<<"c Start local search..."<<endl;
			cover_LS();
		//}
#ifdef individual_analysis_on_init_sls_mode
		times(&finish);
		sls_time = double(finish.tms_utime - start.tms_utime + finish.tms_stime - start.tms_stime) / sysconf(_SC_CLK_TCK) - init_time;
		sls_step_speed_per_ms = double(step) * 0.001 / sls_time;
#endif			
		//check solution
		if(check_solution()==1)
		{
			cout << "o " << best_c_size << endl;
			//cout << best_c_size << ' ';

			//print_mvc_solution();
			cout << "c searchSteps " << best_step << endl;
			//printf("%ld ", best_step);
			cout << "c solveTime " << best_comp_time << endl;
			//cout << "c stepVelocity(/0.001ms) " << (long double)(best_step) / (best_comp_time * 1000000) << endl;
			/*cout<<"c Best found vertex cover size = "<<best_c_size<<endl;
			print_solution();
			cout<<"c searchSteps = "<<best_step<<endl;
			cout<<"c solveTime = "<<best_comp_time<<endl;*/
			
			//cout<<best_c_size<<' '<<best_comp_time<<' '<<best_step<<endl;
#ifdef 	individual_analysis_on_init_sls_mode
		//cout<<"c initTime " << init_time << endl;
		//cout<<"c slsTime " << sls_time << endl;
		cout<<"c stepSpeed(/ms) "<< sls_step_speed_per_ms << endl;
#endif
		}
		else
		{
			cout<<"the solution is wrong."<<endl;
			//print_solution();
		}
	
		free_memory();

	return 0;
}
示例#29
0
int main ()
{

    int cores = 2;
    int N     = 10 ;
    int LDA   = 10 ;
    int NRHS  = 5 ;
    int LDB   = 10 ;
    int info;
    int info_solution;
    int i,j;
    int NminusOne = N-1;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex32_t *A1   = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
    PLASMA_Complex32_t *A2   = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
    PLASMA_Complex32_t *B1   = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
    PLASMA_Complex32_t *B2   = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
    PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t));
    float *D                = (float *)malloc(LDA*sizeof(float));

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)) {
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialize */
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Initialize A1 and A2 for Symmetric Positive Matrix */
    LAPACKE_slarnv_work(IONE, ISEED, LDA, D);
    claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    for ( i = 0; i < N; i++) {
        A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ;
        A2[LDA*i+i] = A1[LDA*i+i];
    }

    /* Initialize B1 and B2 */
    LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* PLASMA routines */
    info = PLASMA_cpotrf(PlasmaLower, N, A2, LDA);
    info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaNonUnit,
                        N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB);
    info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaConjTrans, PlasmaNonUnit,
                        N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB);

    /* Check the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
        printf("-- Error in CTRSM example ! \n");
    else
        printf("-- Run of CTRSM example successful ! \n");

    free(A1);
    free(A2);
    free(B1);
    free(B2);
    free(WORK);
    free(D);

    PLASMA_Finalize();

    exit(0);
}
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dsygvdx
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t gpu_time;

    double *h_A, *h_R, *h_work;

    #if defined(PRECISION_z) || defined(PRECISION_c)
    double *rwork;
    magma_int_t lrwork;
    #endif

    /* Matrix size */
    double *w1, *w2;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};;
    magma_int_t info_ortho     = 0;
    magma_int_t info_solution  = 0;
    magma_int_t info_reduction = 0;
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    magma_range_t range = MagmaRangeAll;
    if (opts.fraction != 1)
        range = MagmaRangeI;

    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }

    printf("using: itype = %d, jobz = %s, range = %s, uplo = %s, check = %d, fraction = %6.4f\n",
           (int) opts.itype, lapack_vec_const(opts.jobz), lapack_range_const(range), lapack_uplo_const(opts.uplo),
           (int) opts.check, opts.fraction);

    printf("    N     M  GPU Time (sec)  ||I-Q'Q||/.  ||A-QDQ'||/.  ||D-D_magma||/.\n");
    printf("=======================================================================\n");
    magma_int_t threads = magma_get_parallel_numthreads();
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            n2     = N*N;
            #if defined(PRECISION_z) || defined(PRECISION_c)
            lwork  = magma_dbulge_get_lq2(N, threads) + 2*N + N*N;
            lrwork = 1 + 5*N +2*N*N;
            #else
            lwork  = magma_dbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N;
            #endif
            liwork = 3 + 5*N;

            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( h_A,   double, n2 );
            TESTING_MALLOC_CPU( w1,    double, N );
            TESTING_MALLOC_CPU( w2,    double, N );
            TESTING_MALLOC_CPU( iwork, magma_int_t, liwork );
            
            TESTING_MALLOC_PIN( h_R,    double, n2    );
            TESTING_MALLOC_PIN( h_work, double, lwork );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_MALLOC_PIN( rwork, double, lrwork );
            #endif

            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            magma_dmake_symmetric( N, h_A, N );

            magma_int_t m1 = 0;
            double vl = 0;
            double vu = 0;
            magma_int_t il = 0;
            magma_int_t iu = 0;
            if (range == MagmaRangeI) {
                il = 1;
                iu = (int) (opts.fraction*N);
            }

            if (opts.warmup) {
                // ==================================================================
                // Warmup using MAGMA
                // ==================================================================
                lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                if (opts.ngpu == 1) {
                    //printf("calling dsyevdx_2stage 1 GPU\n");
                    magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, 
                                    h_R, N, 
                                    vl, vu, il, iu, 
                                    &m1, w1, 
                                    h_work, lwork, 
                                    #if defined(PRECISION_z) || defined(PRECISION_c)
                                    rwork, lrwork, 
                                    #endif
                                    iwork, liwork, 
                                    &info);
                } else {
                    //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu);
                    magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, 
                                    h_R, N, 
                                    vl, vu, il, iu, 
                                    &m1, w1, 
                                    h_work, lwork, 
                                    #if defined(PRECISION_z) || defined(PRECISION_c)
                                    rwork, lrwork, 
                                    #endif
                                    iwork, liwork, 
                                    &info);
                }
            }


            // ===================================================================
            // Performs operation using MAGMA
            // ===================================================================
            lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
            gpu_time = magma_wtime();
            if (opts.ngpu == 1) {
                //printf("calling dsyevdx_2stage 1 GPU\n");
                magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, 
                                h_R, N, 
                                vl, vu, il, iu, 
                                &m1, w1, 
                                h_work, lwork, 
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork, 
                                #endif
                                iwork, liwork, 
                                &info);
           
            } else {
                //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu);
                magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, 
                                h_R, N, 
                                vl, vu, il, iu, 
                                &m1, w1, 
                                h_work, lwork, 
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork, 
                                #endif
                                iwork, liwork, 
                                &info);
            }
            gpu_time = magma_wtime() - gpu_time;
            
            printf("%5d %5d  %7.2f      ",
                   (int) N, (int) m1, gpu_time );

            if ( opts.check ) {
                double eps   = lapackf77_dlamch("E");
                //printf("\n");
                //printf("------ TESTS FOR MAGMA DSYEVD ROUTINE -------  \n");
                //printf("        Size of the Matrix %d by %d\n", (int) N, (int) N);
                //printf("\n");
                //printf(" The matrix A is randomly generated for each test.\n");
                //printf("============\n");
                //printf(" The relative machine precision (eps) is %8.2e\n",eps);
                //printf(" Computational tests pass if scaled residuals are less than 60.\n");
              
                /* Check the orthogonality, reduction and the eigen solutions */
                if (opts.jobz == MagmaVec) {
                    info_ortho = check_orthogonality(N, N, h_R, N, eps);
                    info_reduction = check_reduction(opts.uplo, N, 1, h_A, w1, N, h_R, eps);
                }
                //printf("------ CALLING LAPACK DSYEVD TO COMPUTE only eigenvalue and verify elementswise -------  \n");
                lapackf77_dsyevd("N", "L", &N, 
                                h_A, &N, w2, 
                                h_work, &lwork, 
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, &lrwork, 
                                #endif
                                iwork, &liwork, 
                                &info);
                info_solution = check_solution(N, w2, w1, eps);
              
                if ( (info_solution == 0) && (info_ortho == 0) && (info_reduction == 0) ) {
                    printf("  ok\n");
                    //printf("***************************************************\n");
                    //printf(" ---- TESTING DSYEVD ...................... PASSED !\n");
                    //printf("***************************************************\n");
                }
                else {
                    printf("  failed\n");
                    status += 1;
                    //printf("************************************************\n");
                    //printf(" - TESTING DSYEVD ... FAILED !\n");
                    //printf("************************************************\n");
                }
            }

            TESTING_FREE_CPU( h_A   );
            TESTING_FREE_CPU( w1    );
            TESTING_FREE_CPU( w2    );
            TESTING_FREE_CPU( iwork );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_FREE_PIN( rwork  );
            #endif
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    /* Shutdown */
    TESTING_FINALIZE();
    return status;
}