Пример #1
0
int least_squares(int m, int n, double **matrix)
{
	if(m < 1 || n < 1 || n > m) return LS_DIMENSION_ERROR;

	int i, j;

	int info, lwork = m*m;
	double *work; if(!allocate_double_vector(&work, lwork)) { return LS_MEMORY_ERROR; }
	char transa = 'N';

	double **a_matrix; if(!allocate_double_matrix(&a_matrix, n, m)) { return LS_MEMORY_ERROR; }
	for(i = 0; i < m; i ++) for(j = 0; j < n; j ++) a_matrix[j][i] = matrix[j][i];

	double **b_matrix; if(!allocate_double_matrix(&b_matrix, m, m)) { return LS_MEMORY_ERROR; }
	for(i = 0; i < m; i ++) for(j = 0; j < m; j ++) b_matrix[i][j] = (i == j);

	dgels_(&transa, &m, &n, &m, a_matrix[0], &m, b_matrix[0], &m, work, &lwork, &info);

	for(i = 0; i < n; i ++) for(j = 0; j < m; j ++) matrix[i][j] = b_matrix[j][i];

	free_matrix((void**)a_matrix);
	free_matrix((void**)b_matrix);
	free_vector(work);
	return LS_SUCCESS;
}
Пример #2
0
double frobenius_check(double *known, double *computed, int m, int n, int id, int np) {
	int l_num_elements = m*n / np;

    if (computed == NULL) {
        return euclidean_norm(known, m*n, id, np);
    }

	double *difference = allocate_double_vector(l_num_elements);

	for (int i = 0; i < l_num_elements; i++) {
		difference[i] = computed[i] - known[i];
	}
    double e_norm = euclidean_norm(difference, m*n, id, np);
    free(difference);

    return e_norm;
}
Пример #3
0
void parallel_blas3_product(double *A, double *B, double *C, int m, int k, int n, int id, int np) {
    if (k % np != 0) {
        if (id == 0)
            fprintf(stderr, "k is not divisible by np.\n");

        MPI_Abort(MPI_COMM_WORLD, 1);
        
    }
    MPI_Status status;
    int l_k = k / np;
    double *l_A = allocate_double_vector(l_k * m);
    double *l_B = allocate_double_vector(l_k * k);
    MPI_Datatype block_col_t;
    MPI_Datatype block_row_t;
    
    // for blocks in B = k x n
    MPI_Type_vector(
        n,          // count = number of blocks, i.e. length of column * l_k(num rows)
        l_k,              // blocklen = number of things in each block
        k,              // stride = difference between start of blocks
        MPI_DOUBLE,     // old datatype
        &block_row_t    // new datatype
        );
    MPI_Type_commit(&block_row_t);

    // for column of A= m x k
    MPI_Type_contiguous(
        m * l_k,        // count = number of items
        MPI_DOUBLE,     // old_type = type of items
        &block_col_t    // new_mpi_type = the new datatype
        );
    MPI_Type_commit(&block_col_t);

    if (id == 0) {
        // copy correct elements from A to l_A
        memcpy(l_A, A, sizeof(double) * l_k * m);
        for (int i = 1; i < np; ++i) {
            MPI_Send(&(A[0 + m*(i*l_k)]), 1, block_col_t, i, 0, MPI_COMM_WORLD);
        }
    }
    else {
        MPI_Recv(l_A, (m*l_k), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    }


    if (id == 0) {
        // copy numbers from B to l_B
        
        for (int col = 0; col < n; ++col) {
            for (int row = 0; row < l_k; ++row) {
                l_B[row + l_k*col] = B[row + k*col];
            }
        }

        for (int i = 1; i < np; ++i) {
            MPI_Send(&(B[i*l_k]), 1, block_row_t, i, 0, MPI_COMM_WORLD);
        }
    }
    else {
        MPI_Recv(l_B, (l_k*n), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    }

    /*
    //debugging only
    for (int i = 0; i < l_k*n; ++i) {
        printf("[%i]: Row l_B[%i]=%f\n", id, i, l_B[i]);
    }

    for (int i = 0; i < l_k*m; ++i) {
        printf("[%i]: Col l_A[%i]=%f\n", id, i, l_A[i]);
    }
    */




    // C only matters on process 0 and should be allocated outside this function
    double *local_C = allocate_double_vector(m*n);
    cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, l_k, 1, l_A, m, l_B, l_k, 0, local_C, m);
    MPI_Reduce(local_C, C, m*n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    free(local_C);
    free(l_A);
    free(l_B);
}
Пример #4
0
int main(int argc, char **argv) {
    int i, j, id, np, processor_name_len;
    int maxit, idleft, idright, flag, iter;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int dimensions, N, l_N, n, l_n;
    double *l_r, *l_u, *l_p, *l_q;
    double *x, *y, *gl, *gr;
    double h, tol, relres;

    MPI_Init(&argc, &argv);

    /* Check processes: */
    MPI_Comm_size(MPI_COMM_WORLD, &np);
    MPI_Comm_rank(MPI_COMM_WORLD, &id);
    MPI_Get_processor_name(processor_name, &processor_name_len);

    /* process command-line inputs: */
    if (argc != 3)
    {
        if (id == 0)
        {
            printf("Usage: \"./poisson N dim\" \n");
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    N = atoi(argv[1]);
    dimensions = atoi(argv[2]);

    if (dimensions == 2) {
        n = N*N;
    }
    else if (dimensions == 3) {
        n = N*N*N;
    }
    else {
        printf("Error: dimensions must equal 2 or 3");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* number of processes np must divide n: */
    if ((n % np) != 0)
    {
        if (id == 0)
        {
            printf("Error: np must divide n!\n");
            printf("  n = %d, np = %d, n%%np = %d\n", n, np, (n%np));
        }
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* calculate size of local blocks: */
    l_N = N / np;
    l_n = n / np;

    if (id == 0)
    {
        printf("n = %d, np = %d, l_n = %d\n", n, np, l_n);
        printf("\n");
        fflush(stdout);
    }
    /*
    vectors l_u, l_r, l_p, l_q should be of
    length l_n and gl and gr should be of length l_n / l_N

    */
    x = allocate_double_vector(N);          /* x is a N length vector*/
    y = allocate_double_vector(N);          /* y is a N length vector*/
    gl = allocate_double_vector(l_n / l_N); 
    gr = allocate_double_vector(l_n / l_N);
    
    l_u = allocate_double_vector(l_n);                          
    l_r = allocate_double_vector(l_n);
    l_p = allocate_double_vector(l_n);
    l_q = allocate_double_vector(l_n);

    double start_time, end_time;

    /*Beginning of cg method: follows matlab code*/
    h = 1.0 / (N + 1.0);
    for (i = 1; i <= N; i++) {
        x[i - 1] = i*h;
        y[i - 1] = i*h;
    }

    /*Setup B*/
    setupB(l_r, x, y, l_N, N, h, id);

    tol = 1.0e-6;
    maxit = 99999;

    if (id>0) {
        idleft = id - 1;
    }
    else {
        idleft = MPI_PROC_NULL;
    }

    if (id<np - 1) {
        idright = id + 1;
    }
    else {
        idright = MPI_PROC_NULL;
    }

    MPI_Barrier(MPI_COMM_WORLD);
    start_time = MPI_Wtime();
    

    cg(l_u, &flag, &relres, &iter,  /*output*/
        l_r, tol, maxit,            /*input*/
        l_p, l_q, l_n, l_N, N, id, idleft, idright, np, MPI_COMM_WORLD, gl, gr);

    MPI_Barrier(MPI_COMM_WORLD);
    end_time = MPI_Wtime();

    double l_diff_norm = fd_norm(l_u, x, y, l_N, N, h, id);
    double diff_norm;
    MPI_Reduce(&l_diff_norm, &diff_norm, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    
    /*
    double *full;
    if (id == 0) full = allocate_double_vector(n);
    MPI_Gather(l_u, l_n, MPI_DOUBLE, full, l_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    if (id == 0) {
        for (int qrx = 0; qrx < n; qrx++)
            printf("u[%i]=%f\n", qrx, full[qrx]);
        free_vector(full);
    }
    */

    if (id == 0) {
        printf("%15s %15s %22s %15s %22s\n", "N", "DOF", "relres", "iter", "time");
        printf("%15d %15.f %22.16e %15d %22.16e\n", N, (double)N*N, relres, iter, (end_time - start_time));
        printf("||u-u_h||=%22.16e\n", diff_norm);
        /*
        printf("N:            %d\n", N);
        printf("DOF:          %d\n", N*(double)N);
        printf("relres:       %22.16e\n", relres);
        printf("iter:         %d\n", iter);
        printf("elapsed time: %22.16e\n", (end_time - start_time));
        */
    }


    free_vector(x);
    free_vector(y);
    free_vector(l_r);
    free_vector(gl);
    free_vector(gr);
    free_vector(l_u);
    free_vector(l_p);
    free_vector(l_q);
    MPI_Finalize();

    return 0;
}
Пример #5
0
int constrained_least_squares(int m, int n, double **matrix, int c, int *constrained)
{
	//check problem dimensions
	if(m < 1 || n < 1 || n > m || c > n) return LS_DIMENSION_ERROR;

	//counters
	int i, j;

	//extra problem dimensions
	int f = m - c, u = n - c;

	//lapack and blas inputs
	char transa, transb;
	double alpha, beta;

	//lapack output
	int info;

	//lapack workspace
	int lwork = m*m;
	double *work; if(!allocate_double_vector(&work, lwork)) { return LS_MEMORY_ERROR; }

	//lapack LU pivot indices
	int *ipiv; if(!allocate_integer_vector(&ipiv,c)) { return LS_MEMORY_ERROR; }

	//lapack coefficients of QR elementary reflectors
	double *tau; if(!allocate_double_vector(&tau,c)) { return LS_MEMORY_ERROR; }

	//matrices used
	double **t_matrix; if(!allocate_double_matrix(&t_matrix, m, m)) { return LS_MEMORY_ERROR; }
	double **c_matrix; if(!allocate_double_matrix(&c_matrix, n, n)) { return LS_MEMORY_ERROR; }
	double **r_matrix; if(!allocate_double_matrix(&r_matrix, c, c)) { return LS_MEMORY_ERROR; }
	double **a_matrix; if(!allocate_double_matrix(&a_matrix, n, f)) { return LS_MEMORY_ERROR; }
	double **d_matrix; if(!allocate_double_matrix(&d_matrix, f, f)) { return LS_MEMORY_ERROR; }

	//indices of unconstrained equations
	int *temp, *unconstrained;
	if(!allocate_integer_vector(&temp,m)) { return LS_MEMORY_ERROR; }
	if(!allocate_integer_vector(&unconstrained,f)) { return LS_MEMORY_ERROR; }

	//create vector of unconstrained indices
	for(i = 0; i < m; i ++) temp[i] = 0;
	for(i = 0; i < c; i ++) temp[constrained[i]] = 1;
	j = 0;
	for(i = 0; i < m; i ++) if(!temp[i]) unconstrained[j++] = i;

	//copy unconstrained equations from input matrix -> t_matrix
	for(i = 0; i < f; i ++) for(j = 0; j < n; j ++) t_matrix[i][j] = matrix[j][unconstrained[i]];

	//copy constrained equations from input matrix -> c_matrix
	for(i = 0; i < c; i ++) for(j = 0; j < n; j ++) c_matrix[i][j] = matrix[j][constrained[i]];

	//QR decomposition of the transposed constrained equations -> c_matrix
	dgeqrf_(&n, &c, c_matrix[0], &n, tau, work, &lwork, &info);

	//copy R out of the above QR decomposition -> r_matrix
	for(i = 0; i < c; i ++) for(j = 0; j < c; j ++) r_matrix[i][j] = ((j >= i) ? c_matrix[j][i] : 0);

	//form the square matrix Q from the above QR decomposition -> c_matrix'
	dorgqr_(&n, &n, &c, c_matrix[0], &n, tau, work, &lwork, &info);

	//multiply unconstrained eqations by Q -> a_matrix'
	transa = 'T'; transb = 'N'; alpha = 1.0; beta = 0.0;
	dgemm_(&transa, &transb, &f, &n, &n, &alpha, t_matrix[0], &m, c_matrix[0], &n, &beta, a_matrix[0], &f);

	//invert R' of the above QR decomposition -> r_matrix
	dgetrf_(&c, &c, r_matrix[0], &c, ipiv, &info);
	dgetri_(&c, r_matrix[0], &c, ipiv, work, &lwork, &info);

	//LS inversion of the non-square parts from unconstrained * Q -> d_matrix'
	for(i = 0; i < f; i ++) for(j = 0; j < u; j ++) t_matrix[j][i] = a_matrix[j+c][i];
	for(i = 0; i < f; i ++) for(j = 0; j < f; j ++) d_matrix[i][j] = (i == j);
	transa = 'N';
	dgels_(&transa, &f, &u, &f, t_matrix[0], &m, d_matrix[0], &f, work, &lwork, &info);

	//multiply matrices together to form the CLS solution -> t_matrix'
	transa = transb = 'N'; alpha = 1.0; beta = 0.0;
	dgemm_(&transa, &transb, &n, &f, &u, &alpha, c_matrix[c], &n, d_matrix[0], &f, &beta, t_matrix[0], &m);

	alpha = -1.0; beta = 1.0;
	dgemm_(&transa, &transb, &n, &c, &f, &alpha, t_matrix[0], &m, a_matrix[0], &f, &beta, c_matrix[0], &n);

	alpha = 1.0; beta = 0.0;
	dgemm_(&transa, &transb, &n, &c, &c, &alpha, c_matrix[0], &n, r_matrix[0], &c, &beta, t_matrix[f], &m);

	//copy the result out of the temporary matrix -> matrix
	for(i = 0; i < n; i ++) for(j = 0; j < f; j ++) matrix[i][unconstrained[j]] = t_matrix[j][i];
	for(i = 0; i < n; i ++) for(j = 0; j < c; j ++) matrix[i][constrained[j]] = t_matrix[j+f][i];

	//clean up and return successful
	free_vector(work);
	free_vector(ipiv);
	free_vector(tau);
	free_vector(temp);
	free_vector(unconstrained);
	free_matrix((void **)t_matrix);
	free_matrix((void **)c_matrix);
	free_matrix((void **)r_matrix);
	free_matrix((void **)a_matrix);
	free_matrix((void **)d_matrix);
	return LS_SUCCESS;
}
Пример #6
0
void calculate_cell_reconstruction_matrices(int n_variables, double *weight_exponent, int *maximum_order, struct FACE *face, int n_cells, struct CELL *cell, struct ZONE *zone)
{
	int c, u, i, j, k, l;

	int order, n_powers, n_stencil;

	//find the overall maximum order
	int maximum_maximum_order = 0;
	for(u = 0; u < n_variables; u ++) if(maximum_order[u] > maximum_maximum_order) maximum_maximum_order = maximum_order[u];

	//cell structure allocation
	for(c = 0; c < n_cells; c ++) exit_if_false(cell_matrix_new(n_variables, &cell[c]),"allocating cell matrices");

	//numerics values
	double **matrix, *weight;
	int n_constraints, *constraint;
	exit_if_false(allocate_double_matrix(&matrix,ORDER_TO_POWERS(maximum_maximum_order),MAX_STENCIL),"allocating matrix");
	exit_if_false(allocate_integer_vector(&constraint,MAX_STENCIL),"allocating constraints");
	exit_if_false(allocate_double_vector(&weight,MAX_STENCIL),"allocating weights");

	//stencil element properties
	int s_id, s_index;
	struct ZONE *s_zone;
	char s_location, *s_condition;
	double s_area, *s_centroid, s_weight;

	//integration
	double x[2];
	int differential[2], d;

	//CV polygon
	int n_polygon;
	double ***polygon;
	exit_if_false(allocate_double_pointer_matrix(&polygon,MAX(MAX_CELL_FACES,4),2),"allocating polygon memory");

	for(c = 0; c < n_cells; c ++)
	{
		for(u = 0; u < n_variables; u ++)
		{
			//problem size
			order = cell[c].order[u];
			n_powers = ORDER_TO_POWERS(order);
			n_stencil = cell[c].n_stencil[u];
			n_constraints = 0;

			for(i = 0; i < n_stencil; i ++)
			{
				//stencil element properties
				s_id = cell[c].stencil[u][i];
				s_index = ID_TO_INDEX(s_id);
				s_zone = &zone[ID_TO_ZONE(s_id)];
				s_location = s_zone->location;
				s_condition = s_zone->condition;

				if(s_location == 'f') {
					s_centroid = face[s_index].centroid;
					s_area = face[s_index].area;
				} else if(s_location == 'c') {
					s_centroid = cell[s_index].centroid;
					s_area = cell[s_index].area;
				} else exit_if_false(0,"recognising zone location");

				s_weight  = (s_centroid[0] - cell[c].centroid[0])*(s_centroid[0] - cell[c].centroid[0]);
				s_weight += (s_centroid[1] - cell[c].centroid[1])*(s_centroid[1] - cell[c].centroid[1]);
				s_weight  = 1.0/pow(s_weight,0.5*weight_exponent[u]);
				if(s_location == 'c' && s_index == c) s_weight = 1.0;

				weight[i] = s_weight;

				//unknown and dirichlet conditions have zero differentiation
				differential[0] = differential[1] = 0;
				//other conditions have differential determined from numbers of x and y-s in the condition string
				if(s_condition[0] != 'u' && s_condition[0] != 'd')
				{
					j = 0;
					while(s_condition[j] != '\0')
					{
						differential[0] += (s_condition[j] == 'x');
						differential[1] += (s_condition[j] == 'y');
						j ++;
					}
				}

				//index for the determined differential
				d = differential_index[differential[0]][differential[1]];

				//unknowns
				if(s_condition[0] == 'u')
				{
					/*//fit unknowns to centroid points
					x[0] = s_centroid[0] - cell[c].centroid[0];
					x[1] = s_centroid[1] - cell[c].centroid[1];

					for(j = 0; j < n_powers; j ++)
					{
						matrix[j][i] = polynomial_coefficient[d][j]*
							integer_power(x[0],polynomial_power_x[d][j])*
							integer_power(x[1],polynomial_power_y[d][j])*
							s_weight;
					}*/

					//fit unknowns to CV average
					n_polygon = generate_control_volume_polygon(polygon, s_index, s_location, face, cell);

					for(j = 0; j < n_powers; j ++) matrix[j][i] = 0.0;

					for(j = 0; j <= order; j ++) 
					{
						for(k = 0; k < n_polygon; k ++)
						{
							x[0] =  0.5*polygon[k][0][0]*(1.0 - gauss_x[order][j]) +
								0.5*polygon[k][1][0]*(1.0 + gauss_x[order][j]) -
								cell[c].centroid[0];
							x[1] =  0.5*polygon[k][0][1]*(1.0 - gauss_x[order][j]) +
								0.5*polygon[k][1][1]*(1.0 + gauss_x[order][j]) -
								cell[c].centroid[1];

							for(l = 0; l < n_powers; l ++)
							{
								//[face integral of polynomial integrated wrt x] * [x normal] / [CV area]
								
								matrix[l][i] += polynomial_coefficient[d][l] *
									(1.0 / (polynomial_power_x[d][l] + 1.0)) *
									integer_power(x[0],polynomial_power_x[d][l]+1) *
									integer_power(x[1],polynomial_power_y[d][l]) *
									s_weight * gauss_w[order][j] * 0.5 *
									(polygon[k][1][1] - polygon[k][0][1]) / s_area;
							}
						}
					}
				}

				//knowns
				else
				{
					//known faces fit to face average
					if(s_location == 'f')
					{
						for(j = 0; j < n_powers; j ++) matrix[j][i] = 0.0;

						for(j = 0; j < order; j ++)
						{
							x[0] =  0.5*face[s_index].node[0]->x[0]*(1.0 - gauss_x[order-1][j]) +
								0.5*face[s_index].node[1]->x[0]*(1.0 + gauss_x[order-1][j]) - 
								cell[c].centroid[0];
							x[1] =  0.5*face[s_index].node[0]->x[1]*(1.0 - gauss_x[order-1][j]) +
								0.5*face[s_index].node[1]->x[1]*(1.0 + gauss_x[order-1][j]) -
								cell[c].centroid[1];

							for(k = 0; k < n_powers; k ++)
							{
								matrix[k][i] += polynomial_coefficient[d][k] *
									integer_power(x[0],polynomial_power_x[d][k]) *
									integer_power(x[1],polynomial_power_y[d][k]) *
									s_weight*gauss_w[order-1][j]*0.5;
							}
						}
					}

					//cells need implementing
					//if(s_location == 'c')
					//{
					//}
				}

				//constraints are the centre cell and any dirichlet boundaries
				if((s_location == 'c' && s_index == c) || s_condition[0] == 'd') constraint[n_constraints++] = i;
			}

			//solve
			if(n_constraints > 0)
				exit_if_false(constrained_least_squares(n_stencil,n_powers,matrix,n_constraints,constraint) == LS_SUCCESS, "doing CLS calculation");
			else
				exit_if_false(least_squares(n_stencil,n_powers,matrix) == LS_SUCCESS,"doing LS calculation");

			//multiply by the weights
			for(i = 0; i < n_powers; i ++) for(j = 0; j < n_stencil; j ++) matrix[i][j] *= weight[j];

			//store in the cell structure
			for(i = 0; i < n_powers; i ++) for(j = 0; j < n_stencil; j ++) cell[c].matrix[u][i][j] = matrix[i][j];
		}
	}

	//clean up
	free_matrix((void**)matrix);
	free_vector(constraint);
	free_vector(weight);
	free_matrix((void**)polygon);
}