TEST(la, inversion_matrix) { srand(time(NULL)); int first_idx=0, size=0, submat_blocks=6; IS set; // vytvorit rozdeleni bloku na procesory ve tvaru "part" (tj. indexy prvnich radku na procesorech) int np, rank; double block_size; int min_idx, max_idx; MPI_Comm_size(PETSC_COMM_WORLD, &np); MPI_Comm_rank(PETSC_COMM_WORLD, &rank); block_size = (double)submat_blocks / (double)np; min_idx = (int) ( round(block_size * rank) ); max_idx = (int) ( round(block_size * (rank + 1)) ); for (int i = 0; i < min_idx; i++) { first_idx += rows[i]; } for (int i = min_idx; i < max_idx; i++) { size += rows[i]; } // volat s lokalni velkosti = pocet radku na lokalnim proc. LinSys * lin_sys = new LinSys_MPIAIJ(size + max_idx - min_idx); lin_sys->set_symmetric(); lin_sys->start_allocation(); fill_matrix( lin_sys, min_idx, max_idx ); // preallocate matrix lin_sys->start_add_assembly(); fill_matrix( lin_sys, min_idx, max_idx ); // fill matrix lin_sys->finalize(); MatView(lin_sys->get_matrix(),PETSC_VIEWER_STDOUT_WORLD); ISCreateStride(PETSC_COMM_WORLD, size, first_idx + min_idx, 1, &set); // kazdy proc. lokalni cast indexsetu viz. schur.cc line 386 ISView(set, PETSC_VIEWER_STDOUT_WORLD); SchurComplement schurComplement(lin_sys, set, 6); }
void fill_matrix(string type, // select the operation to perform RBF rbf, Pol &pol, // defined the configuration of RBF + Pol T c, Vec &x, Vec &y, Vec& z, // three input vectors int ini, int fin, // from row=ini to row<fin in A Mat &A // output ) { Vec myc(x.GetSize()); myc = c; fill_matrix(rbf,pol,myc,x,y,z,ini,fin,A); }
int main (void) { long int *matrix; long int i; long int range; printf("Poszukiwanie liczb pierwszych w przedziale 0 - "); scanf("%d", &range); matrix=create_matrix(range); //alokacja pamieci if(matrix == NULL) { return 0; } fill_matrix(matrix, range); //wypelnienie tablicy kolejnymi liczbami sieve(matrix, range); write_out(matrix, range); free(matrix); return 0; }
spmat_hyb( const command_queue &queue, int n, int m, const row_t *row_begin, const col_t *col_begin, const val_t *val_begin ) : handle( cusparse_handle(queue) ), desc ( create_description(), detail::deleter() ), mat ( create_matrix(), detail::deleter() ) { cuda_check( cusparseSetMatType(desc.get(), CUSPARSE_MATRIX_TYPE_GENERAL) ); cuda_check( cusparseSetMatIndexBase(desc.get(), CUSPARSE_INDEX_BASE_ZERO) ); fill_matrix(queue, n, m, row_begin, col_begin, val_begin); }
TEST(PrintMatrixSpiralTest, generalInput) { int n = 5; std::vector<int> data; gen_rand_int(data, 10, 50, n*n); int **mat = alloc_matrix(n); fill_matrix(mat, n, data); std::cout << "Input matrix: " << std::endl; print_matrix(mat, n, cout, 1, 3); std::cout << "Print matrix in spiral order: " << std::endl; print_matrix_spiral(mat, n); std::cout << std::endl; free_matrix(mat, n); }
int main(int argc, char *argv[]) { int n,i,j,*pivot,info,lwork; double *A,*wr,*wi,*vl,*vr,*work; char jobvl,jobvr; int seed; FILE *fp; /* Comamnd line arguments: matrix size, RNG seed */ if (argc!=3) { fprintf(stderr,"usage: %s n seed\n",argv[0]); return -1; } n=atoi(argv[1]); seed=atoi(argv[2]); /* Allocate space. Note that matrix A is essentially a 1D array */ A=(double *)malloc((size_t)n*n*sizeof(double)); pivot=(int *)malloc((size_t)n*sizeof(int)); wr=(double *)malloc((size_t)n*sizeof(double)); wi=(double *)malloc((size_t)n*sizeof(double)); vl=(double *)malloc((size_t)n*n*sizeof(double)); vr=(double *)malloc((size_t)n*n*sizeof(double)); lwork=10*n; work=(double *)malloc((size_t)lwork*sizeof(double)); jobvl='N'; jobvr='N'; /* Fill the matrix with random numbers */ fill_matrix(A,n,seed); /* ---- The eigenvalue calculation proper ---- */ dgeev_(&jobvl,&jobvr, &n, A, &n, wr, wi, vl, &n, vr, &n, work, &lwork, &info); /* Print eigenvalues to file "evalues.datc" */ fp=fopen("evalues.datc","w"); for (i=0; i<n; i++) fprintf(fp,"%12.8g %12.8g\n", wr[i],wi[i]); fclose(fp); return 0; }
int main() { /*for(N = 101; N <= 200; N++) { printf("/\* N=%u *\/ {", N); for(d = 1; d <= 100; d++) { count = 0; get_count_slow(); printf("%u", count); if(d < 100) printf(", "); } printf("}, \n"); }*/ scanf("%u%u", &N, &d); fill_matrix(); count = 0; get_count_slow(); printf("%u\n", count); /*for(N = 0; N <= 500; N++) { printf("N=%u\t", N); for(d = 1; d <= 10; d++) { count = 0; get_count_slow(); printf("%u\t", count); } printf("\n"); }*/ /*N = 5; d = 1; fill_matrix(); get_count_slow(); printf("\n"); N = 5; d = 3; fill_matrix(); get_count_slow();*/ }
int test_matrix_clear(void) { Matrix *matrix = new_matrix(); int res = fill_matrix(matrix, 20, 20); if(res != 0) { LOG_ERR("test matrix clear -> fill matrix error"); return res; } Status status = matrix_clear(matrix); if(status != STAT_SUCCESS) { LOG_ERR("test matrix clear"); return 1; } else { LOG_SUCCESS("test matrix clear"); return 0; } }
TEST(RotateMatrixTest, generalInput) { int n = 4; std::vector<int> data; gen_rand_int(data, 10, 50, n*n); int **mat = alloc_matrix(n); fill_matrix(mat, n, data); std::cout << "Input matrix: " << std::endl; print_matrix(mat, n, cout, 1, 3); rotate_mat_90(mat, n); std::cout << "Rotated matrix: " << std::endl; print_matrix(mat, n, cout, 1, 3); std::cout << std::endl; free_matrix(mat, n); }
int test_matrix_find_element(void) { Element *element_by_pos; Element *element_by_val; Matrix *matrix = new_matrix(); int res = fill_matrix(matrix, 20, 20); if(res != 0) { LOG_ERR("test matrix find element -> fill matrix error"); return res; } //find element by pos element_by_pos = matrix_find_by_pos(matrix, 3, 15); if(element_by_pos == NULL) { LOG_ERR("find element from matrix by pos"); return 1; } if(element_by_pos->value == 3*15) { LOG_SUCCESS("find element from matrix by pos"); } else { LOG_ERR("find element from matrix by pos"); return 1; } //find element by value element_by_val = matrix_find_by_val(matrix, 3*15); if(element_by_val == NULL) { LOG_ERR("find element from matrix by value"); return 1; } while(element_by_val != NULL) { if(element_by_val->row == 3 && element_by_val->col == 15) { LOG_SUCCESS("find element from matrix by value"); break; } else if(element_by_val->next == NULL){ LOG_ERR("find element from matrix by value"); return 1; } else { element_by_val = element_by_val->next; } } return 0; }
/* Transfers Minc setup data to the PLACE object, which continues the initialization formerly done in space(). Returns 0 if successful, -1 if space hasn't been called in Minc score. */ int get_setup_params(double Dimensions[], /* array of 5 elements */ double Matrix[12][12], float *abs_factor, float *rvb_time, int *UseMikes, double *MikeAngle, double *MikePatternFactor) { int i, j; if (!space_called) return -1; Dimensions[0] = (double)_front; Dimensions[1] = (double)_right; Dimensions[2] = (double)_back; Dimensions[3] = (double)_left; Dimensions[4] = (double)_ceiling; fill_matrix(); /* copy local matrix into the one passed from caller */ for (i = 0; i < 12; i++) for (j = 0; j < 12; j++) Matrix[j][i] = _Matrix[j][i]; *abs_factor = _abs_factor; *rvb_time = _rvb_time; *UseMikes = _UseMikes; *MikeAngle = _MikeAngle; *MikePatternFactor = _MikePatternFactor; space_called = 1; return 0; }
void print_square_distance_matrix (struct rooted_tree *tree, struct llist *selected_nodes, int show_headers) { double **matrix = fill_matrix(tree, selected_nodes); struct list_elem *h_el, *v_el; int i, j; if (show_headers) { /* Header line */ for (h_el = selected_nodes->head; NULL != h_el; h_el = h_el->next) printf ("\t%s", ((struct rnode *) h_el->data)->label); putchar('\n'); } for (j = 0, v_el = selected_nodes->head; NULL != v_el; v_el = v_el->next, j++) { if (show_headers) printf ("%s\t", ((struct rnode *) v_el->data)->label); for (i = 0, h_el = selected_nodes->head; NULL != h_el; h_el = h_el->next , i++) { printf("%g", matrix[j][i]); if (h_el == selected_nodes->tail) putchar('\n'); else putchar('\t'); } } /* free matrix's rows, then matrix itself */ for (j = 0; j < selected_nodes->count; j++) { free(matrix[j]); } free(matrix); }
int main() { /* AbelianGroup x; AbelianGroup y; abelian_init(&x, 2, 0); *(x.orders)=1; *(x.orders+1)=1; abelian_init(&y, 1, 0); *(y.orders)=2; Matrix *f = matrix_init(1,2); int val[2] = {2,2}; fill_matrix(val, f); Matrix *g = matrix_init(2,1); int val_g[2] = {1,1}; fill_matrix(val_g, g); test_kernel(2, f, x, y, g);*/ AbelianGroup x; AbelianGroup y; abelian_init(&x, 0, 2); abelian_init(&y, 0, 2); Matrix *f = matrix_init(2, 2); int val[4] = { 0, 1, 0, 0 }; fill_matrix(val, f); test_epi_mono(2, f, x, y); abelian_clear(&x); abelian_clear(&y); return 0; }
int main() { int n; if (scanf("%d", &n) != 1) { printf("Invalid input!"); return 1; } int matrix[n][n]; fill_matrix(n, n, matrix); int i, r; printf("\n"); for (i = 0; i < n; i ++) { for (r = 0; r <= i; r++) { printf("%-3d", matrix[i][r]); } printf("\n"); } return 0; }
void numerical_jacobian_fill(int ijaf[], /* fill Vector of integer pointers into a matrix */ double afill[], /* Vector of non-zero entries in the * coefficient matrix */ double xf[], /* fill Solution vector for the current processor */ double rf[], /* Residual vector for the current * processor */ double delta_t, /* time step size */ double theta, /* parameter to vary time integration * from explicit (theta = 1) to * implicit (theta = 0) */ double x[], /* Value current big solution vector holding everything*/ double x_old[], /* Value of the old solution vector */ double xdot[], /* Value of xdot predicted for new solution */ int Debug_Flag, /* flag for calculating numerical jacobian -3 == calc num jac w/ rescaling */ int node_to_fill[], /* this is a map from the */ Exo_DB *exo, /* ptr to whole fe mesh */ Dpi *dpi) /* ptr to parallel info */ /****************************************************************************** This function compares the analytical jacobian entries calculated in matrix_fill the numerical ones approximated by central difference method. Author: K. S. Chen (1511) (based on an earlier version by P. R. Schunk). Date: January 19, 1994 ******************************************************************************/ { int i, j, k, ii, nn, kount, nnonzero, num_total_nodes; extern struct elem_side_bc_struct **First_Elem_Side_BC_Array; const double epsilon = 1.e-07; double epsilon1=1.e-3; dbl *aj_diag, *aj_off_diag, *nj, *resid_vector_old, *x_save, *scale; int *irow, *jcolumn, *nelem; fprintf(stderr,"\n Starting Numerical Jacobian Checker for Fill equation\n"); nnonzero = fill_zeros+1; nn = ijaf[num_fill_unknowns]-ijaf[0]; /* total number of diagonal entries a[] */ num_total_nodes = Num_Internal_Nodes + Num_Border_Nodes; /* allocate arrays to hold jacobian and vector values */ irow = (int *) array_alloc(1, nnonzero, sizeof(int)); jcolumn = (int *) array_alloc(1, nnonzero, sizeof(int)); nelem = (int *) array_alloc(1, nnonzero, sizeof(int)); aj_diag = (double *) array_alloc(1, num_fill_unknowns, sizeof(double)); aj_off_diag = (double *) array_alloc(1, nnonzero, sizeof(double)); nj = (double *) array_alloc(1, nnonzero, sizeof(double)); resid_vector_old = (double *) array_alloc(1, num_fill_unknowns, sizeof(double)); x_save = (double *) array_alloc(1, num_fill_unknowns, sizeof(double)); scale = (double *) array_alloc(1, num_fill_unknowns, sizeof(double)); if (nj == NULL || scale == NULL) EH(-1, "No room for numerical jacobian arrays"); if (Debug_Flag == -2) epsilon1 = 1.e-6; /* initialization */ memset(aj_off_diag, 0, nnonzero*sizeof(dbl)); /* off-diagonal analytical jacobian elements */ memset(nj, 0, nnonzero*sizeof(dbl)); /* numerical jacobian elements */ memset(aj_diag, 0, num_fill_unknowns*sizeof(dbl)); /* diagonal analytical jacobian elements */ /* first calculate the residual vector corresponding to the solution vector read in the initial guess file; also calculate the analytical jacobian entries */ af->Assemble_Residual = TRUE; af->Assemble_Jacobian = TRUE; af->Assemble_LSA_Jacobian_Matrix = FALSE; af->Assemble_LSA_Mass_Matrix = FALSE; (void) fill_matrix(afill, ijaf, rf, xf, x, x_old, xdot, delta_t, theta, ADVECT, node_to_fill, First_Elem_Side_BC_Array, exo, dpi); if (Debug_Flag == -2) { /* Scale matrix first to get rid of problems with penalt parameter */ row_sum_scale_MSR(num_fill_unknowns, afill, ijaf, rf, scale); } /* save solution vector and residual vector before numerical jacobian calculations */ dcopy1( num_fill_unknowns, xf, x_save); dcopy1( num_fill_unknowns, rf, resid_vector_old); /* extract diagonal and off-diagonal elements from the coefficient matrix stored in sparse-storage format */ dcopy1(num_fill_unknowns, afill, aj_diag); /* diagonal elements */ kount=0; /* off-diagonal elements */ for (i=0; i<num_fill_unknowns; i++) { nelem[i] = ijaf[i+1] - ijaf[i]; for (k=0; k<nelem[i]; k++) { irow[kount]=i; /* row # in global jacobian matrix */ ii = kount + num_fill_unknowns + 1; jcolumn[kount]=ijaf[ii]; /* column # in global jacobian matrix */ aj_off_diag[kount] = afill[ii]; kount=kount+1; } } piksr2(nn, jcolumn, irow, aj_off_diag); /* arrange coefficient matrix columnwise,*/ /* in ascending column number order */ /* calculate numerical jacobian entries columnwise and then compare them with the analytical jacobian entries */ for (j=0; j<num_fill_unknowns; j++) /* loop over each column */ { xf[j] = x_save[j] + epsilon; /* perturb one variable at a time */ /* let big vector know of the change for load_fv */ put_fill_vector(num_total_nodes, x, xf, node_to_fill); for (i=0; i<num_fill_unknowns; i++) rf[i] = 0.0; /* zero residual vector before its calculation */ af->Assemble_Residual = TRUE; af->Assemble_Jacobian = FALSE; af->Assemble_LSA_Jacobian_Matrix = FALSE; af->Assemble_LSA_Mass_Matrix = FALSE; (void) fill_matrix(afill, ijaf, rf, xf, x, x_old, xdot, delta_t, theta, ADVECT, node_to_fill, First_Elem_Side_BC_Array, exo, dpi); if (Debug_Flag == -2) { /* Scale matrix first to get rid of problems with penalt parameter */ row_scaling(num_fill_unknowns, afill, ijaf, rf, scale); } for (i=0; i<num_fill_unknowns; i++) /* cal numerical jacobian vector for column j */ nj[i] = (rf[i] - resid_vector_old[i])/epsilon; /* COMPARISON: analytical vs. numerical --- the diagonal element for column j */ if(ABS(aj_diag[j] - nj[j]) > epsilon1) fprintf(stderr, " aj=%-10.4g nj=%-10.4g resid=%-12.5g at unknown j = %d\n", aj_diag[j], nj[j], rf[j], j); /* COMPARISON: analytical vs. numerical --- the off-diagonal elements for column j */ for (k=0; k<(ijaf[num_fill_unknowns]-ijaf[0]); k++) { if(jcolumn[k] == j) /* match the column numbers */ { for (i=0; i<num_fill_unknowns; i++) { if(i == irow[k]) /* match the row numbers */ { if(ABS(aj_off_diag[k]-nj[i]) > epsilon1 && aj_off_diag[k] != 1.0e+06) fprintf(stderr," aj=%-10.4g nj=%-10.4g at unknown j = %d row i = %d\n", aj_off_diag[k], nj[i], j, i); } } } } xf[j] = xf[j] - epsilon; /* return solution vector to its original state */ } /* End of for (j=0; j<num_fill_unknowns; j++) */ /* free arrays to hold jacobian and vector values */ safe_free( (void *) irow) ; safe_free( (void *) jcolumn) ; safe_free( (void *) nelem) ; safe_free( (void *) aj_diag) ; safe_free( (void *) aj_off_diag) ; safe_free( (void *) nj) ; safe_free( (void *) resid_vector_old) ; safe_free( (void *) x_save) ; safe_free( (void *) scale) ; } /* End of function numerical_jacobian_fill */
double find_moid( const ELEMENTS *elem1, const ELEMENTS *elem2, double *barbee_style_delta_v) { double mat1[3][3], mat2[3][3], xform_matrix[3][3]; const double identity_matrix[3][3] = { { 1., 0., 0.}, { 0., 1., 0.}, { 0., 0., 1.} }; double least_dist_squared = 10000.; int i, j; if( elem1->ecc > elem2->ecc) { const ELEMENTS *tptr = elem1; elem1 = elem2; elem2 = tptr; } fill_matrix( mat1, elem1); fill_matrix( mat2, elem2); for( i = 0; i < 3; i++) for( j = 0; j < 3; j++) xform_matrix[j][i] = dot_prod( mat1[j], mat2[i]); for( i = 0; i < N_STEPS; i++) { double vect1[3], vect2[3], dist_squared = 0.; double deriv1[3], deriv2[3], r1, r2; double true_anomaly2 = 2. * PI * (double)i / (double)N_STEPS; double delta_true1, delta_true2; int loop_count = 0, solution_found = 0; #if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 #pragma GCC diagnostic push /* see comments above */ #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" double true_anomaly1; #pragma GCC diagnostic pop #else double true_anomaly1 = 0.; #endif do { r2 = compute_posn_and_derivative( elem2, true_anomaly2, xform_matrix, vect2, deriv2); if( !loop_count) true_anomaly1 = atan2( vect2[1], vect2[0]); r1 = compute_posn_and_derivative( elem1, true_anomaly1, identity_matrix, vect1, deriv1); for( j = 0; j < 3; j++) vect1[j] -= vect2[j]; compute_improvement( vect1, deriv1, deriv2, &delta_true1, &delta_true2); true_anomaly1 += delta_true1; true_anomaly2 -= delta_true2; if( fabs( delta_true1) < 5. * PI / N_STEPS) if( fabs( delta_true2) < 5. * PI / N_STEPS) { for( j = 0; j < 3; j++) vect1[j] += delta_true1 * deriv1[j] + delta_true2 * deriv2[j]; solution_found = 1; } loop_count++; // debug_printf( " i = %3d; loop %d; %f\n", // i, loop_count, sqrt( dot_prod( vect1, vect1))); } while( solution_found && loop_count < 5); dist_squared = dot_prod( vect1, vect1); if( dist_squared < least_dist_squared) { least_dist_squared = dist_squared; if( barbee_style_delta_v) { double delta_v[3]; set_true_velocity( deriv1, r1, elem1->q, elem1->major_axis); set_true_velocity( deriv2, r2, elem2->q, elem2->major_axis); for( j = 0; j < 3; j++) delta_v[j] = deriv1[j] - deriv2[j]; *barbee_style_delta_v = vector3_length( delta_v); /* in AU/day */ *barbee_style_delta_v *= AU_IN_KM / seconds_per_day; } } #ifdef TEST_VERSION printf( "%3d %c%8.6f%8.2f%8.2f%8.2f%8.2f%15f%15f\n", i, (solution_found ? '*' : ' '), sqrt( dot_prod( vect1, vect1)), true_anomaly1 * 180. / PI, true_anomaly2 * 180. / PI, true_anomaly_to_eccentric( true_anomaly1, elem1->ecc) * 180. / PI, true_anomaly_to_eccentric( true_anomaly2, elem2->ecc) * 180. / PI, dot_prod( vect1, deriv1), dot_prod( vect1, deriv2)); // printf( "%3d%15f%15f%15f%15f%15f\n", i, x, y, // vect[0], vect[1], vect[2]); #endif } return( sqrt( least_dist_squared)); }
void main(void) { // Malloc spaces for four matrix double *A = malloc(sizeof(double) * SIZE * SIZE); fill_matrix(A, SIZE); double *B = malloc(sizeof(double) * SIZE * SIZE); fill_matrix(B, SIZE); double *C = malloc(sizeof(double) * SIZE * SIZE); memset(C, 0, sizeof(double) * SIZE * SIZE); double *D = malloc(sizeof(double) * SIZE * SIZE); memset(D, 0, sizeof(double) * SIZE * SIZE); // struct to timing struct timeval begin, end; // test function gettimeofday(&begin, NULL); square_dgemm(SIZE, A, B, C); gettimeofday(&end, NULL); // niave multipily naive_multiply(A, B, D, SIZE); // validate result, if wrong, print four matrix for(int i=0; i<SIZE*SIZE; i++) { if(C[i] != D[i]) { printf("WRONG.\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", A[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", B[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", C[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", D[x*SIZE+y]); } printf("\n"); } return; } } printf("CORRECT.^_^\n"); printf("Single Round Time use: %ld usec.\n", (end.tv_sec-begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); /* Time a "sufficiently long" sequence of calls to reduce noise */ double Gflops_s, seconds = -1.0; double timeout = 0.1; // "sufficiently long" := at least 1/10 second. for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) { /* Warm-up */ square_dgemm (SIZE, A, B, C); /* Benchmark n_iterations runs of square_dgemm */ seconds = -wall_time(); for (int it = 0; it < n_iterations; ++it) square_dgemm (SIZE, A, B, C); seconds += wall_time(); /* compute Mflop/s rate */ Gflops_s = 2.e-9 * n_iterations * SIZE * SIZE * SIZE / seconds; } printf ("Size: %d\tGflop/s: %.3g\n", SIZE, Gflops_s); }
int main(int argc, char *argv[]) { // One argument is required to specify the number of rows if(argc!=2) { printf("No of rows not specified \n"); return 1; } struct timeval tv,tv1; nrows=atoi(argv[1]); ncols=nrows; //Assuming it is a square matrix mapsize=nrows*ncols*sizeof(float); fill_matrix("./matrixA"); fill_matrix("./matrixB"); fill_matrix("./matrixC"); printf("Matrix filled \n"); mapA = map_matrix ("./matrixA", MAP_RDONLY, nrows, ncols, &fda); //mapping the entire file A to memory mapB = map_matrix ("./matrixB", MAP_RDONLY, nrows, ncols, &fdb); //mapping the entire file B to memory mapC = map_matrix ("./matrixC", MAP_RDWR, nrows, ncols, &fdc); //mapping the entire file C to memory gettimeofday(&tv,NULL); Mat_Mat_Multiplication_Sequential(); gettimeofday(&tv1,NULL); double diff_in_usec; if(tv1.tv_usec>tv.tv_usec) diff_in_usec=tv1.tv_usec-tv.tv_usec; else diff_in_usec=tv.tv_usec-tv1.tv_usec; printf("time taken is %lf seconds %lf microseconds\n",(tv1.tv_sec-tv.tv_sec)+0.0,diff_in_usec); printf("matrix C is \n"); print_matrix(mapC); printf("time taken is %lf seconds %lf microseconds\n",(tv1.tv_sec-tv.tv_sec)+0.0,diff_in_usec); munmap (mapA, mapsize); munmap (mapB, mapsize); munmap (mapC, mapsize); close (fda); close (fdb); close (fdc); return 0; }
//--------------------------------------------------------- int main(int argc, char **argv) { TPS<double> rbf; Polinomio<double> pol; Matrix<double> A,B; Vector<double> x,y,f; Vector<double> lambda,b; Vector<double> xnew,ynew,fnew; double c=0.01; int n,ni,m; //make the data in the square [0,1] x [0,1] make_data(0,1,0,1, 21, 21, x, y, ni, n); //stablish the exponent in: r^beta log(r) rbf.set_beta(4); //configure the associate polynomial // pol.make( data_dimension, degree_pol) pol.make(2 , rbf.get_degree_pol()); //show the rbf and pol info cout<<rbf; cout<<pol; //show the number of nodes cout<<endl; cout<<"total nodes N = "<<n<<endl; cout<<"interior nodes ni = "<<ni<<endl; cout<<"boundary nodes nf = "<<n-ni<<endl; cout<<endl; //get the number of elements in the polynomial base m = pol.get_M(); //resize the matrices to store the partial derivatives A.Resize(n+m,n+m); A = 0.0; B.Resize(n+m,n+m); B = 0.0; //Recall that this problem has the general form // (Uxx+Uyy) (Pxx+Pyy) = f interior nodes 0..ni // U P_b = g boundary nodes ni..n // P^transpose 0 = 0 momentun conditions in P // // P is the polynomial wit size n x m // P_b is the polynomial working in the boundary nodes, size nf x m // Pxx+Pyy has size ni x m //make the matriz derivatives fill_matrix( "dxx" , rbf , pol , c , x , y, 0 , ni , A); fill_matrix( "dyy" , rbf , pol , c , x , y, 0 , ni , B); A = A + B; // A <- Uxx + Uyy //Add the submatriz for the boundary nodes: U , P_b boundary nodes ni..n fill_matrix( "normal" , rbf , pol , c , x , y, ni, n , A); //Add the submatriz P^transpose at the end: P^transpose fill_matrix( "pol_trans" , rbf , pol , c , x , y, n , n+m, A); //resize the vector to store the right_size of the PDE b.Resize(n+m); b = 0.0; //fill with f for(int i=0;i<ni;i++) b(i) = right_side(x(i), y(i)); //fill with the boundary condition for(int i=ni;i<n;i++) b(i) = boundary_condition(x(i),y(i)); //solve the linear system of equations lambda = gauss(A,b); //make the new data grid int ni2,n2; make_data(0,1,0,1, 41, 41, xnew, ynew, ni2, n2); //interpolate on this new data grid (xnew,ynew) fnew = interpolate(rbf,pol,c,lambda,x,y,xnew,ynew); //determine the maximum error double e=0.0; for(int i=0;i<ni2;i++) { e = max(e, fabs(fnew(i) - sin(2*M_PI*xnew(i))*cos(2*M_PI*ynew(i))) ); } //show the error cout<<endl; cout<<"The maximum error: e_max = "<<e<<endl<<endl; //store the interpolated data save_gnu_data("data",xnew,ynew,fnew); return 0; }
int build(int vn) { vertex_num = vn; edge_num = 0; fill_matrix(weight, vertex_num, vertex_num, INF); }
int main(int argc, char *argv[]) { //Declaration of variables int my_rank, num_procs; int num_rows, num_cols; double **A, **B, **C; int nrA, ncA, nrB, ncB; double **a, **b, **c; int nra, nca, nrb, ncb; int sqrt_p; int *rsA, *csA, *rsB, *csB; int mrA, mcA, mrB, mcB; //Declare MPI-suff MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); char *Mat_A, *Mat_B, *Mat_C; Mat_A = argv[1]; Mat_B = argv[2]; Mat_C = argv[3]; sqrt_p = sqrt(num_procs); if(my_rank == 0){ //read_matrix_binaryformat(Mat_A, &A, &nrA, &ncA ); //read_matrix_binaryformat(Mat_B, &B, &nrB, &ncB ); nrA = 100; ncA = 50; nrB = 50; ncB = 100; allocate_matrix(&A, nrA, ncA); allocate_matrix(&B, nrB, ncB); fill_matrix(&A, nrA, ncA); fill_matrix(&B, nrB, ncB); char A_id = 'A', B_id = 'B'; print_matrix(nrA, ncA, A, my_rank, A_id); print_matrix(nrB, ncB, B, my_rank, B_id); } MPI_Bcast(&nrA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ncA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nrB, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ncB, 1, MPI_INT, 0, MPI_COMM_WORLD); find_size(nrA, ncA, &nra, &nca, my_rank, sqrt_p); find_size(nrB, ncB, &nrb, &ncb, my_rank, sqrt_p); rsA = (int*)malloc(num_procs*sizeof(int)); csA = (int*)malloc(num_procs*sizeof(int)); rsB = (int*)malloc(num_procs*sizeof(int)); csB = (int*)malloc(num_procs*sizeof(int)); for(int i=0; i<num_procs; ++i) { if(i == my_rank) { rsA[i] = nra; csA[i] = nca; rsB[i] = nrb; csB[i] = ncb; } else { MPI_Sendrecv(&nra, 1, MPI_INT, i, 1, &(rsA[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&nca, 1, MPI_INT, i, 1, &(csA[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&nrb, 1, MPI_INT, i, 1, &(rsB[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&ncb, 1, MPI_INT, i, 1, &(csB[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } //printf("nra[%i] = %i, my_rank:%i\n", i, rsA[i], my_rank); } // root finds large enough sizes if(my_rank == 0) { find_max(rsA, &mrA, num_procs); find_max(csA, &mcA, num_procs); find_max(rsB, &mrB, num_procs); find_max(csB, &mcB, num_procs); } // send bigbuffs to all MPI_Bcast(&mrA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mcA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mrB, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mcB, 1, MPI_INT, 0, MPI_COMM_WORLD); // allocate sub matrices allocate_matrix(&a, mrA, mcA); allocate_matrix(&b, mrB, mcB); allocate_matrix(&c, mrA, mcB); if(my_rank == 0) { // root sets its own matrices for(int i=0; i<nra; ++i) { for(int j=0; j<nca; ++j) { a[i][j] = A[i][j]; } } for(int i=0; i<nrb; ++i) { for(int j=0; j<ncb; ++j) { b[i][j] = B[i][j]; } } MPI_Datatype btA; MPI_Datatype btB; // initialize row and column index variables int riA = 0; int riB = 0; int ciA = csA[0]; int ciB = csB[0]; int tmpk = 1; for(int k=1; k<num_procs; ++k) { //send to slaves // create strided types (blocks/chunks/blarg) MPI_Type_vector(rsA[k], csA[k], ncA, MPI_DOUBLE, &btA); MPI_Type_create_resized(btA, 0, sizeof(double), &btA); MPI_Type_commit(&btA); MPI_Type_vector(rsB[k], csB[k], ncB, MPI_DOUBLE, &btB); MPI_Type_create_resized(btB, 0, sizeof(double), &btB); MPI_Type_commit(&btB); // send to slaves MPI_Send(&((*A)[ncA*riA + ciA]), 1, btA, k, 1, MPI_COMM_WORLD); MPI_Send(&((*B)[ncB*riB + ciB]), 1, btB, k, 2, MPI_COMM_WORLD); // free types for next iteration MPI_Type_free(&btA); MPI_Type_free(&btB); ciA += csA[k]; ciB += csB[k]; tmpk++; if(tmpk == sqrt_p) { // jump down to next chunk row riA += rsA[k-1]; riB += rsB[k-1]; ciA = 0; ciB = 0; tmpk = 0; } } // deallocate A and B //deallocate(&A); //deallocate(&B); } else { // slaves recv from root a chunk/block/somethgin // create data types MPI_Datatype btA; MPI_Datatype btB; // create strided data types MPI_Type_vector(nra, nca, mcA, MPI_DOUBLE, &btA); MPI_Type_create_resized(btA, 0, sizeof(double), &btA); MPI_Type_commit(&btA); MPI_Type_vector(nrb, ncb, mcB, MPI_DOUBLE, &btB); MPI_Type_create_resized(btB, 0, sizeof(double), &btB); MPI_Type_commit(&btB); // recv from root a chunk MPI_Recv(&((*a)[0]), 1, btA, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&((*b)[0]), 1, btB, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // free types MPI_Type_free(&btA); MPI_Type_free(&btB); //char a_id = 'a', b_id = 'b'; //print_matrix(nra, nca, a, my_rank, a_id); } char a_id = 'a', b_id = 'b'; //print_matrix(nra, nca, a, my_rank, a_id); //print_matrix(nrb, ncb, b, my_rank, b_id); MatrixMatrixMultiply(&a, &b, &c, mrA, mcA, mrB, mcB, rsA, csA, rsB, csB, MPI_COMM_WORLD); if(my_rank == 0) { // allocate result allocate_matrix(&C, nrA, ncB); // root sets its own matrices for(int i=0; i<nra; ++i) { for(int j=0; j<ncb; ++j) { C[i][j] = c[i][j]; } } MPI_Datatype btC; int riC = 0; int ciC = csB[0]; int tmpk = 1; for(int k=1; k<num_procs; ++k) { MPI_Type_vector(rsA[k], csB[k], ncB, MPI_DOUBLE, &btC); MPI_Type_create_resized(btC, 0, sizeof(double), &btC); MPI_Type_commit(&btC); printf("%i\n", ncB*riC+ciC); MPI_Recv(&((*C)[ncB*riC+ciC]), 1, btC, k, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Type_free(&btC); ciC += csB[k]; if(tmpk == sqrt_p) { riC += rsA[k-1]; ciC = 0; tmpk = 0; } } //read_matrix_binaryformat(Mat_A, &A, &nrA, &ncA ); //read_matrix_binaryformat(Mat_B, &B, &nrB, &ncB ); test_result(nrA, ncA, nrB, ncB, A, B, C); //test_result(nrA, ncA, nrB, ncB, A, B, C); deallocate(&A); deallocate(&B); deallocate(&C); } else { MPI_Datatype btC; MPI_Type_vector(nra, ncb, mcB, MPI_DOUBLE, &btC); MPI_Type_create_resized(btC, 0, sizeof(double), &btC); MPI_Type_commit(&btC); MPI_Send(&((*c)[0]), 1, btC, 0, my_rank, MPI_COMM_WORLD); MPI_Type_free(&btC); } // deallocate sub matrices deallocate(&a); deallocate(&b); deallocate(&c); free(rsA); free(csA); free(rsB); free(csB); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i; int row; int col; time_t start; time_t finish; if (argc != 3) { fprintf(stderr, "The arguments should be ./matrix_sum size_of_matrix number_of_threads\n"); return 1; } SIZE_OF_MATRIX = atoi(argv[1]); NUMBER_OF_THREADS = atoi(argv[2]); omp_set_num_threads(NUMBER_OF_THREADS); printf("Number of procs is %d\n", omp_get_num_procs()); printf("The number of threads is %d\n", NUMBER_OF_THREADS); printf("Max number of threads is %d\n", omp_get_max_threads()); double **matrix = malloc(sizeof(double) * SIZE_OF_MATRIX); double **matrix_copy = malloc(sizeof(double) * SIZE_OF_MATRIX); double *answer_vector = malloc(sizeof(double) * SIZE_OF_MATRIX); double *answer_vector_copy = malloc(sizeof(double) * SIZE_OF_MATRIX); double *answers = malloc(sizeof(double) * SIZE_OF_MATRIX); for (i = 0; i < SIZE_OF_MATRIX; ++i) { matrix[i] = malloc(sizeof(double) * SIZE_OF_MATRIX); matrix_copy[i] = malloc(sizeof(double) * SIZE_OF_MATRIX); } srand48(time(NULL)); // seed random number fill_matrix(matrix, matrix_copy); fill_answer(answer_vector, answer_vector_copy); // Start Timing start = time(NULL); // Start elimination row = 0; col = 0; int j; for (row = 0; row < SIZE_OF_MATRIX; ++row) { pivot_on_row(row, matrix, answer_vector); #pragma omp parallel for for (i = 0; i < SIZE_OF_MATRIX; ++i) { convert_to_upper_triangle(row, matrix, answer_vector); } } back_subsitution(matrix, answer_vector, answers); // Finish Timing finish = time(NULL); double seconds = (double) difftime(finish, start); printf("Time Taken: %f\n", seconds); double l2 = 0; double total = 0; for (i = 0; i < SIZE_OF_MATRIX; ++i) { for (j = 0; j < SIZE_OF_MATRIX; ++j) { total = total + matrix_copy[i][j] * answers[j]; } l2 = l2 + pow( (total - answer_vector_copy[i]), 2); total = 0; } l2 = sqrt(l2); printf("L2 norm is %g\n", l2); free(matrix); free(matrix_copy); free(answer_vector); free(answer_vector_copy); free(answers); return 0; }
/** * Main driver code for the parallel lab. Generates the matrix of the * specified size, initiates the decomposition and checking * routines. */ int main (int argc, char *argv[]) { int size = 0; double *a = NULL; double *lu = NULL; clock_t start, time1, time2; struct timeval start_timeval, end_timeval; double elapsed_secs, elapsed_total_secs, cpu_secs; /* Bail out if we don't have the correct number of parameters */ if (argc!=2) { printf("This program is used to decompose a (random) matrix A into its components L and U.\n"); printf("Usage: %s <matrix size>\n", argv[0]); return -1; } size = atoi(argv[1]); /* Adjust matrix size */ if (size < MIN_MATRIX_SIZE) { printf("Setting matrix size to minimum value %d.\n", MIN_MATRIX_SIZE); size = MIN_MATRIX_SIZE; } else if (size > MAX_MATRIX_SIZE) { printf("Setting matrix size to maximum value %d.\n", MAX_MATRIX_SIZE); size = MAX_MATRIX_SIZE; } /* Generate data. */ printf("LU matrix decomposition, starting warmup...\n"); printf(" - Generating a %i * %i matrix\n", size, size); a = (double*)malloc(sizeof(double)*size*size); lu = (double*)malloc(sizeof(double)*size*size); if (a==NULL || lu==NULL) { printf("Not enough memory!\n"); return -1; } fill_matrix(a, size); print_matrix(a, size); memcpy(lu, a, sizeof(double)*size*size); /* Start LU decomposition. */ printf("Decomposing the matrix into its components...\n"); gettimeofday(&start_timeval, NULL); start = clock(); decompose_matrix(lu, size); time1 = clock()-start; gettimeofday(&end_timeval, NULL); elapsed_total_secs = elapsed_secs = (double)timediff_ms(&start_timeval, &end_timeval)/1000.0; cpu_secs = (double)(time1)/CLOCKS_PER_SEC; /* Verify resulting decomposition. */ printf("Checking result...\n"); print_matrix(lu, size); gettimeofday(&start_timeval, NULL); start = clock(); if (check_matrix(lu, a, size)) printf("The computation seems correct\n"); else printf("The computation seems not correct\n"); time2 = clock()-start; gettimeofday(&end_timeval, NULL); /* Output stats. */ printf("\nDecomposition time: %.2fs CPU, %.2fs elapsed, %.1f%% speedup\n", cpu_secs, elapsed_secs, cpu_secs/elapsed_secs*100.0); elapsed_secs = (double)timediff_ms(&start_timeval, &end_timeval)/1000.0; elapsed_total_secs += elapsed_secs; cpu_secs = (double)(time2)/CLOCKS_PER_SEC; printf("Checking time: %.2fs CPU, %.2fs elapsed, %.1f%% speedup\n", cpu_secs, elapsed_secs, cpu_secs/elapsed_secs*100.0); cpu_secs = (double)(time1+time2)/CLOCKS_PER_SEC; printf("Overall time: %.2fs CPU, %.2fs elapsed, %.1f%% speedup\n", cpu_secs, elapsed_total_secs, cpu_secs/elapsed_total_secs*100.0); /* Free resources. */ free(lu); free(a); return 0; }
//! Fill all entries of the matrix with the given value. void fill (const Scalar value) { fill_matrix (nrows(), ncols(), get(), lda(), value); }
int build(int vn) { vertex_num = vn; edge_num = 0; fill_matrix(relate, vertex_num, vertex_num, false); // \SourceRef{source:utility} }
MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent), ui(new Ui::MainWindow) { rnum = 5; cnum = 5; mod = 10; data.assign(rnum * cnum, 0); ui->setupUi(this); update_matrix(); sub_menu = new QMenu(this); ui->tableWidget->setContextMenuPolicy(Qt::CustomContextMenu); connect(ui->tableWidget, SIGNAL(customContextMenuRequested(QPoint)), this, SLOT(show_menu(QPoint))); createAct = new QAction(this); createAct->setIcon(QIcon::fromTheme("window-new")); createAct->setText("Create new matrix..."); createAct->setStatusTip("CTRL + N"); createAct->setShortcut(Qt::CTRL + Qt::Key_N); connect(createAct, SIGNAL(triggered()),this, SLOT(create_mat())); sub_menu->addAction(createAct); ui->menuFiles->addAction(createAct); ui->mainToolBar->addAction(createAct); fillAct = new QAction(this); fillAct->setIcon(QIcon::fromTheme("view-refresh")); fillAct->setText("Generate matrix values"); fillAct->setStatusTip("CTRL + G"); fillAct->setShortcut(Qt::CTRL + Qt::Key_G); connect(fillAct, SIGNAL(triggered()),this, SLOT(fill_matrix())); sub_menu->addAction(fillAct); ui->menuFiles->addAction(fillAct); ui->mainToolBar->addAction(fillAct); searchAct = new QAction(this); searchAct->setIcon(QIcon::fromTheme("system-search")); searchAct->setEnabled(0); searchAct->setText("Find max"); searchAct->setStatusTip("CTRL + F"); searchAct->setShortcut(Qt::CTRL + Qt::Key_F); connect(searchAct, SIGNAL(triggered()),this, SLOT(find_max())); sub_menu->addAction(searchAct); ui->menuFiles->addAction(searchAct); ui->mainToolBar->addAction(searchAct); deleteAct = new QAction(this); deleteAct->setIcon(QIcon::fromTheme("edit-delete")); deleteAct->setEnabled(0); deleteAct->setText("Delete current row"); deleteAct->setStatusTip("CTRL + X"); deleteAct->setShortcut(Qt::CTRL + Qt::Key_X); connect(deleteAct, SIGNAL(triggered()),this, SLOT(delete_row())); sub_menu->addAction(deleteAct); ui->menuFiles->addAction(deleteAct); ui->mainToolBar->addAction(deleteAct); ui->menuFiles->addSeparator(); closeAct = new QAction(this); closeAct->setIcon(QIcon::fromTheme("window-close")); closeAct->setText("Close file"); closeAct->setStatusTip("CTRL + Q"); closeAct->setShortcut(Qt::CTRL + Qt::Key_Q); connect(closeAct, SIGNAL(triggered()),this, SLOT(close())); ui->menuFiles->addAction(closeAct); ui->mainToolBar->addAction(closeAct); //QToolBar* pr_bar = this->addToolBar("Main toolbar"); //pr_bar->addAction(closeAct); }
int main(int argc, char **argv) { if(DEBUG_ASM) { long C[] = {2, 1,2,3,4}; long D[] = {2, 5,6,7,8}; long R3[5]; asm_inv_multiply(C, D, R3); printmatrix(R3); } else { struct timeval start, stop; unsigned long long t, asm_tot = 0, asm_inv_tot = 0, c_tot = 0, c_inv_tot = 0, c_2_tot = 0; FILE *f = fopen("testr.txt", "a"); long *A = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *B = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R1 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R2 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R3 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R4 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R5 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); fill_matrix(MATRIX_SIZE, A); fill_matrix(MATRIX_SIZE, B); int i; for (i = 1; i <= NUM_RUNS; ++i) { fprintf(f,"Run %d, %s\n", i, RUN_INFO); printf("Run %d, %s\n", i, RUN_INFO); // Assembly gettimeofday(&start, NULL); asm_multiply(A, B, R1); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; asm_tot += t; fprintf(f,"Asm-func: %llu Microseconds\n", t); printf("Asm-func: %llu Microseconds\n", t); // Inverted assembly gettimeofday(&start, NULL); asm_inv_multiply(A, B, R2); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; asm_inv_tot += t; fprintf(f,"Asm-inv-func: %llu Microseconds\n", t); printf("Asm-inv-func: %llu Microseconds\n", t); // C gettimeofday(&start, NULL); c_multiply(A, B, R3); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_tot += t; fprintf(f,"C-func: %llu Microseconds\n", t); printf("C-func: %llu Microseconds\n", t); // Inverted C gettimeofday(&start, NULL); inv_c_multiply(A, B, R4); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_inv_tot += t; fprintf(f,"inv C-func: %llu Microseconds\n\n", t); printf("inv C-func: %llu Microseconds\n\n", t); // C 2 gettimeofday(&start, NULL); c_multiply_2(A, B, R5); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_2_tot += t; fprintf(f,"C2-func: %llu Microseconds\n\n", t); printf("C2-func: %llu Microseconds\n\n", t); } fprintf(f,"Average on %s\n", RUN_INFO); fprintf(f, "Asm-func average: %llu\n", asm_tot / NUM_RUNS); fprintf(f, "Asm-inv-func average: %llu\n", asm_inv_tot / NUM_RUNS); fprintf(f, "C-func average: %llu\n", c_tot / NUM_RUNS); fprintf(f, "C-inv-func average: %llu\n", c_inv_tot / NUM_RUNS); fprintf(f, "C2-func average: %llu\n\n________\n", c_2_tot / NUM_RUNS); fclose(f); validate_matrices(R1, R2, R3, R4); free(A); free(B); free(R1); free(R2); free(R3); free(R4); } return 0; }
void no_more_moves(void){ broesel(); txt_mode(); print2x2_centered("no more moves",13,5,8); fill_matrix(); }
int main(int argc ,char **argv){ /* Declare start and end of computation time */ struct timeval start, end; /* Variables declaration */ int i; int j; int rows_of_proc; /* Number of rounds that the parallel part will be computed */ int rounds = 0; /* Variable that shows the non zero elemets of the array */ int non_zero_elements = 0; /* Session start */ //session *s; //join_session(&argc, &argv, &s, "Master.spr"); //role *worker1 = s->get_role(s, "worker1"); //role *worker2 = s->get_role(s, "worker2"); //role *worker3 = s->get_role(s, "worker3"); /* Declaration of array results, which is the vector that will hold the results */ int *results = NULL; /* Declaration of array x, which is the vector that will bw multiplied with the array */ int *x = NULL; //Dynamic memory allocation x = (int *) malloc( ncolumns * sizeof(int) ); if(x == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } //Fill the vector x printf("Filling and printing vector x \n"); fill_vector(x, ncolumns); /* The array that contains the start of each row within the non-zero elements array*/ int *row_ptr = NULL; /* Dynamic memory allocation of the array row_ptr */ row_ptr = (int *) malloc( (nrows + 1) * sizeof(int) ); /* Check if we have enough memory */ if(row_ptr == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } //Declaration of array A int **A = NULL; //Dynamic memory allocation of the matrix A = malloc(nrows * sizeof(int *)); /* Check if we have enough memory */ if(A == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } for(i = 0; i < nrows; i++) { A[i] = malloc(ncolumns * sizeof(int)); /* Check if we have enough memory */ if(A[i] == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } } /* A[0][0] = 5; A[0][1] = 1; A[0][2] = 0; A[0][3] = 0; A[0][4] = 0; A[0][5] = 0; A[1][0] = 0; A[1][1] = 6; A[1][2] = 0; A[1][3] = 7; A[1][4] = 0; A[1][5] = 8; A[2][0] = 0; A[2][1] = 0; A[2][2] = 1; A[2][3] = 0; A[2][4] = 0; A[2][5] = 0; A[3][0] = 0; A[3][1] = 0; A[3][2] = 2; A[3][3] = 0; A[3][4] = 3; A[3][5] = 2; A[4][0] = 9; A[4][1] = 0; A[4][2] = 0; A[4][3] = 1; A[4][4] = 4; A[4][5] = 0; A[5][0] = 1; A[5][1] = 0; A[5][2] = 2; A[5][3] = 3; A[5][4] = 0; A[5][5] = 1; */ /* Fill tha matrix with values */ fill_matrix(A); /* Start counting the time */ gettimeofday(&start, NULL); /* Computation of non zero elements */ non_zero_elements = num_of_non_zero_elements(A); printf("Num of non zeros is %d \n", non_zero_elements); /* Master sends the non zero elements to the workers */ //send_int(worker1, non_zero_elements); //send_int(worker2, non_zero_elements); //send_int(worker3, non_zero_elements); /* Dynamically allocate memory for the array results */ results = (int *) malloc( nrows * sizeof(int) ); fill_with_zeros(results, nrows); /* Declaration af array values and memory allocation */ int *values = (int *) malloc(non_zero_elements * sizeof(int)); /* Check if we have enough memory */ if(values == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } /* Fill the values array */ fill_values(A, values); /* Declaration of the array values and dynamic memory allocation */ int *col_ind = (int *) malloc( non_zero_elements * sizeof(int) ); /* Check if we have enough memory */ if(col_ind == NULL) { fprintf(stderr, "out of memory\n"); exit(-1); } /* Fill the col_ind array */ fill_col_ind(A, col_ind); /* Fill the row_ptr array with zero*/ fill_with_zeros(row_ptr, nrows + 1); /* Print row_ptr initialized */ ////////////////print_vector(row_ptr, nrows + 1); /* Fill the row_ptr array */ fill_row_ptr(A, row_ptr); /* The results that workers will send to master */ int *buffer_results[participants] = {NULL}; /* Master sends tha arrays to workers */ //send_int_array(worker1, row_ptr, nrows + 1); //send_int_array(worker1, values, non_zero_elements); //send_int_array(worker1, col_ind, non_zero_elements); //send_int_array(worker1, x, ncolumns); //send_int_array(worker2, row_ptr, nrows + 1); //send_int_array(worker2, values, non_zero_elements); //send_int_array(worker2, col_ind, non_zero_elements); //send_int_array(worker2, x, ncolumns); //send_int_array(worker3, row_ptr, nrows + 1); //send_int_array(worker3, values, non_zero_elements); //send_int_array(worker3, col_ind, non_zero_elements); //send_int_array(worker3, x, ncolumns); /* print_vector(values, non_zero_elements); printf("\n"); print_vector(row_ptr, nrows + 1); printf("\n"); print_vector(col_ind, non_zero_elements); */ //print_vector(values, non_zero_elements); /* Define the work that master will do */ /*int start_work_i[participants]; int end_work_i[participants];*/ int start_work[participants]; int end_work[participants]; int amount_of_work[participants]; for(i = 0; i < participants; i++){ /* The amount of work that the master will do */ start_work[i] = floor((i * nrows)/participants); end_work[i] = floor(((i + 1) * nrows)/participants); amount_of_work[i] = end_work[i] - start_work[i]; printf("start = %d - end = %d - amount of work = %d\n", start_work[i], end_work[i], amount_of_work[i]); /* Dynamic allocation of buffer results */ buffer_results[i] = (int *) malloc(amount_of_work[i] * sizeof(int)); if(buffer_results[i] == NULL){ fprintf(stderr, "Out of memory, aborting program...\n"); } } /* The amount of work that the master will do */ //start_work[master] = floor((master * nrows)/participants); //end_work[master] = floor(((master + 1) * nrows)/participants); //printf("start = %d - end = %d\n", start_work[0], end_work[0]); /* Run for 40 rounds */ while(rounds++ < 40){ /* Main computation of the result. Each processor computes the work that is assigned to it*/ for(i = start_work[master]; i < end_work[master]; i++){ for(j = row_ptr[i]; j < row_ptr[i + 1]; j++) buffer_results[0][i] += values[j] * x[col_ind[j]]; //results[i] += values[j] * x[col_ind[j]]; } } /* Master node receives the results */ //receive_int_array(worker1, buffer_results[1], amount_of_work[1]); //receive_int_array(worker2, buffer_results[2], amount_of_work[2]); //receive_int_array(worker3, buffer_results[3], amount_of_work[3]); /* End counting of time here */ gettimeofday(&end, NULL); /* Computation of time */ long time_elapsed = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000; /* Print the time that has elapsed */ printf("Elapsed time is: %ld seconds\n", time_elapsed); /* int counter = 0; while(counter++ < 1000){ for(i = 0; i < nrows; i++){ //printf("rank = %d - i = %d\n", rank, i); //printf("RANK = %d\n", rank); for(j = row_ptr[i]; j < row_ptr[i + 1]; j++) results[i] += values[j] * x[col_ind[j]]; //results[i - start] += values[j] * x[col_ind[j]]; } }*/ /* Define the sub-buffers that we will send to the participants */ /*int *buffer_values[participants] = {NULL}; int *buffer_col_ind[participants] = {NULL}; int *buffer_row_ptr[participants] = {NULL};*/ /* Compute which part of the array each participant will compute */ //for(i = 0; i < participants; i++){ /*start_work_i[i] = floor((i * nrows)/participants); end_work_i[i] = floor(((i + 1) * nrows)/participants); start_work[i] = floor((i * non_zero_elements)/participants); end_work[i] = floor(((i + 1) * non_zero_elements)/participants); amount_of_work[i] = floor(((i + 1) * non_zero_elements)/participants) - floor(( i * non_zero_elements)/participants); //amount_of_work[i] = end_work[i] - start_work[i]; printf("start = %d - end = %d - amount of work = %d\n", start_work[i], end_work[i], amount_of_work[i]); */ /* Dynamically buffer allocation */ /*buffer_values[i] = (int *) malloc( (amount_of_work[i]) * sizeof(int)); buffer_col_ind[i] = (int *) malloc( (amount_of_work[i]) * sizeof(int)); */ //buffer_values[i] = (int *) malloc( (non_zero_elements) * sizeof(int)); //buffer_col_ind[i] = (int *) malloc( (non_zero_elements) * sizeof(int)); /* Check if memory is enough */ /*if(buffer_values[i] == NULL){ fprintf(stderr, "Out of memory.Aborting the program...\n"); exit(0); }*/ /* Check if memory is enough */ /*if(buffer_col_ind[i] == NULL){ fprintf(stderr, "Out of memory.Aborting the program...\n"); exit(0); }*/ //} ////////////////////////print_array(A); ////////////////////////print_vector(row_ptr, nrows + 1); //printf("row_ptr[%d] = %d\n", start_work[0], row_ptr[start_work[0]]); //printf("row_ptr[%d] = %d\n", end_work[0], row_ptr[end_work[0]]); //print_vector(buffer_values[0], amount_of_work[0]); //printf("\n\n"); /* Compute the buffer for each participant (amount of work) */ /*for(i = 0; i < participants; i++){ memcpy( buffer_values[i], values + start_work[i], amount_of_work[i] * sizeof(int) ); memcpy( buffer_col_ind[i], col_ind + start_work[i], amount_of_work[i] * sizeof(int)); //printf("buffer[%d] = %s\n", i, buffer[i]); }*/ //print_vector(buffer_values[0], amount_of_work[0]); //print_vector(buffer_col_ind[0], amount_of_work[0]); //print_vector(col_ind, non_zero_elements); //print_vector(values, non_zero_elements); ////////print_vector(col_ind, non_zero_elements); /* Main computation of the result. Master computes the work that is assigned to it*/ /*for(i = start_work[0]; i < end_work[0]; i++){ for(j = row_ptr[i]; j < row_ptr[i + 1]; j++) results[i] += values[j] * x[col_ind[j]]; //results[i - start] += values[j] * x[col_ind[j]]; }*/ /* for(i = start_work_i[1]; i < end_work_i[1]; i++){ for(j = row_ptr[i]; j < row_ptr[i + 1]; j++){ printf("j = %d\n", j); results[i] += buffer_values[1][j] * x[buffer_col_ind[1][j]]; } } */ //printf("buffer_values[%d][%d] = %d\n", 1, 12, buffer_values[1][12]); /* Main computation of the result. Each processor computes the work that is assigned to it*/ /*for(i = start; i < end; i++){ //printf("rank = %d - i = %d\n", rank, i); //printf("RANK = %d\n", rank); for(j = row_ptr[i]; j < row_ptr[i + 1]; j++) y[i] += values[j] * x[col_ind[j]]; //results[i - start] += values[j] * x[col_ind[j]]; }*/ /* The arrays that we will send to the workers */ //int *buffer_col_ind = NULL; //int *buffer_values = NULL; /* Free memory */ free(x); free(results); free(values); free(col_ind); free(row_ptr); return EXIT_SUCCESS; }
int solve_cur(struct device *in) { double error; int ittr = 0; if (get_dump_status(dump_print_newtonerror) == TRUE) { printf("Solve cur\n"); } #ifdef only only_update_thermal = FALSE; #endif //in->enable_back=FALSE; int stop = FALSE; int thermalrun = 0; double check[10]; int cpos = 0; int i = 0; //for (i=0;i<in->ymeshpoints;i++) //{ // printf("Rod ------- nt= %d %le\n",i,in->Gn[i]); //} do { fill_matrix(in); //dump_for_plot(in); //plot_now(in,"plot"); //solver_dump_matrix(in->M,in->N,in->Ti,in->Tj, in->Tx,in->b); //getchar(); if (in->stop == TRUE) { break; } solver(in->M, in->N, in->Ti, in->Tj, in->Tx, in->b); int propper = TRUE; update_solver_vars(in, TRUE); //printf("Going to clamp=%d\n",propper); //solver_dump_matrix(in->M,in->N,in->Ti,in->Tj, in->Tx,in->b); //printf("%d\n"); //getchar(); error = get_cur_error(in); //thermalrun++; if (thermalrun == 40) thermalrun = 0; //update(in); //getchar(); if (get_dump_status(dump_print_newtonerror) == TRUE) { printf("%d Cur error = %e %le I=%le", ittr, error, in->Vapplied, get_I(in)); printf("\n"); } in->last_error = error; in->last_ittr = ittr; ittr++; if (get_dump_status(dump_write_converge) == TRUE) { in->converge = fopena(in->outputpath, "converge.dat", "a"); fprintf(in->converge, "%e\n", error); fclose(in->converge); } stop = TRUE; if (ittr < in->max_electrical_itt) { if (error > in->min_cur_error) { stop = FALSE; } } if (ittr < in->newton_min_itt) { stop = FALSE; } if (in->newton_clever_exit == TRUE) { check[cpos] = error; cpos++; if (cpos > 10) { cpos = 0; } if (ittr >= in->newton_min_itt) { if ((check[0] < error) || (check[1] < error)) { stop = TRUE; } } } } while (stop == FALSE); in->newton_last_ittr = ittr; if (error > 1e-3) { printf_log ("warning: The solver has not converged very well.\n"); } //getchar(); if (get_dump_status(dump_newton) == TRUE) { dump_1d_slice(in, in->outputpath); } //plot_now(in,"plot"); //getchar(); in->odes += in->M; //getchar(); return 0; }