void free_genealogy_weights (struct genealogy_weights *gweight) { int i; for (i = 0; i < numsplittimes + 1; i++) { XFREE (gweight->cc[i]); XFREE (gweight->fc[i]); XFREE (gweight->hcc[i]); } XFREE (gweight->cc); XFREE (gweight->fc); XFREE (gweight->hcc); if (modeloptions[NOMIGRATION] == 0) { for (i = 0; i < lastperiodnumber; i++) { free2D ((void **) gweight->mc[i], npops - i); /* free2D ((void **) gweight->mc[i], npops - i); */ free2D ((void **) gweight->fm[i], npops - i); /* free2D ((void **) gweight->fm[i], npops - i); */ } XFREE (gweight->mc); XFREE (gweight->fm); } return; }
// Applies previously trained QDA classifier to new data matrix x // // Inputs: // i_x - N x p data matrix of N samples in p dimensions // i_means - K x p matrix of class centroids // i_icovmats - K x p x p array of inverse covariance matrices // i_bias - vector of length containing the bias term for each discriminant function // i_N - number of samples in x // i_p - dimensionality of x // i_K - number of classes // // Outputs: // o_labels - labels for all samples in i_x // // Memory allocation requirements for the outputs: // o_labels must be of length N void QDAtest( double* i_x, double* i_means, double* i_icovmats, double* i_bias, int* i_N, int* i_p, int* i_K, int* o_labels ) { // Create local storage int t, k, i, j; double temp; double* x_c = make1D( *i_p ); double** x = make2D( *i_N, *i_p ); double** means = make2D( *i_K, *i_p ); double*** icovmats = make3D( *i_K, *i_p, *i_p ); double** delta = make2D( *i_N, *i_K ); // Copy and reshape the inputs to local storage cp_vec_mat( *i_N, *i_p, i_x, x ); cp_vec_mat( *i_K, *i_p, i_means, means ); cp_vec_3D( *i_K, *i_p, *i_p, i_icovmats, icovmats ); // Compute the delta function values for( t = 0; t < *i_N; t++ ) for( k = 0; k < *i_K; k++ ) { add_vec_vec( *i_p, 1.0, -1.0, x[t], means[k], x_c ); delta[t][k] = 0.0; for( i = 0; i < *i_p; i++ ) for( j = 0; j < *i_p; j++ ) delta[t][k] += icovmats[k][i][j] * x_c[i] * x_c[j]; delta[t][k] *= -0.5; delta[t][k] += i_bias[k]; } // Pick the highest discriminant function value for each point for( t = 0; t < *i_N; t++ ) { o_labels[t] = 1; temp = delta[t][0]; for( k = 1; k < *i_K; k++ ) { if( delta[t][k] > temp ) { o_labels[t] = k + 1; temp = delta[t][k]; } } } // Free up memory free( x_c ); free2D( x, *i_N ); free2D( means, *i_K ); free3D( icovmats, *i_K, *i_p ); free2D( delta, *i_N ); }
int main() { char *temp_arr = (char *)malloc(50 * sizeof(char)); int r, c, rd, cd; int dim = getDim(temp_arr); char **square = make2D(dim); char **answer = make2D(dim); moveTemp(square, temp_arr, dim); fillSquare(square, dim); /*printSquare(square, dim);*/ temp_arr = (char*)malloc(50 * sizeof(char)); while((fgets(temp_arr, 50, stdin)) != NULL ) { if(strlen(temp_arr) > 1) { temp_arr[strlen(temp_arr) - 1] = '\0'; /*printf("searching for %s\n",temp_arr);*/ for(r = 0; r < dim; r++) for(c = 0; c < dim; c++) for(rd = -1; rd <= 1; rd++) for(cd = -1; cd <= 1; cd++) { if(find(r, c, rd, cd, dim, square, temp_arr)) { add(r, c, rd, cd, dim, square, answer, temp_arr); cd = 2; rd = 2; c = dim + 1; r = dim + 1; } } } } free(temp_arr); printSquare(answer, dim); free2D(square, dim); free2D(answer, dim); return 0; }
int main(int argc, char **argv){ int x = atoi(argv[1]); int y = x; double ** A = malloc2D(x, y); init2D(A, x, y); print2DFile(A, x, y, argv[2]); free2D(A, x, y); return 0; }
static void cartesian_grid_destroy (CartesianGrid * g, gboolean destroy_vertices) { g_return_if_fail (g != NULL); if (destroy_vertices) { guint i, j; gts_allow_floating_vertices = TRUE; for (i = 0; i < g->nx; i++) for (j = 0; j < g->ny; j++) if (g->vertices[i][j]) gts_object_destroy (GTS_OBJECT (g->vertices[i][j])); gts_allow_floating_vertices = FALSE; } free2D ((void **) g->vertices, g->nx); g_free (g); }
double unbiasedest(int *ndx, int ndsize, int **counts) { /** ndx is ndsize array containing small integers coding pop index of each bracket (pop0 assumed) thus ndsize = 4 ndx = (1,1,1,1) codes (p_0-p_1)^4 thus ndsize = 4 ndx = (1,1,2,3) codes (p_0-p_1)^2 (p_0-p_2) (p_0-p_3) counts [][] is integer array containing counts[k][0] is count for variant allele for pop k counts[k][1] is count for reference allele for pop k */ double xtop, xbot, yest, y ; int popind[20] ; int popmax, j, k, n, nmax, a, t, s ; int *tcounts ; double **xmomest, yp ; ivmaxmin(ndx, ndsize, &popmax, NULL) ; //printf("popmax: %d\n", popmax) ; ZALLOC(tcounts, popmax+1, int) ; for (j=0; j <= popmax; ++j) { tcounts[j] = counts[j][0] + counts[j][1] ; } /** unbiased estimate of p_j^k */ xmomest = initarray_2Ddouble(popmax+1, ndsize, 0.0) ; for (j=0; j<= popmax; ++j) { xmomest[j][0] = 1.0 ; for (k=1; k<=ndsize; ++k) { xtop = ifall(counts[j][0], k) ; xbot = ifall(tcounts[j], k) ; if (xbot <= 0.1) xmomest[j][k] = -10000.0 ; else xmomest[j][k] = (double) xtop / (double) xbot ; //printf("zz %3d %3d %9.3f\n", j, k, xmomest[j][k] ) ; } } nmax = (1<<(ndsize)) -1 ; yest = 0.0 ; //printf("nmax: %d\n", nmax) ; for (n=0; n<= nmax; ++n) { t = n ; ivzero(popind, popmax+1) ; for (k=0; k<ndsize; ++k) { a = 0 ; s = t & 1 ; t = t >> 1 ; if (s==1) a = ndx[k] ; ++popind[a] ; } yp = 1.0 ; for (j=0; j<=popmax; ++j) { t = popind[j] ; s = 0 ; if (j>0) s = t % 2 ; // flags sign y = xmomest[j][t] ; if (y < -1.0) { free(tcounts) ; free2D(&xmomest, popmax+1) ; return (-10000.0) ; } if (s==1) y = -y ; yp *= y ; } //printf(" %12.6f ", yp) ; //printimat(popind, 1, popmax+1) ; yest += yp ; } if (fabs(yest) >= 100) yest = -10000 ; free(tcounts) ; free2D(&xmomest, popmax+1) ; return (yest) ; }
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext,i,j,k; double ** A, ** localA, l, *msg; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; if (rank==0) idx=&A[0][0]; MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); if (rank==0) { free2D(A,X_ext,Y); } //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time=0,computation_time=0,communication_time=0; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in contiguous blocks to the local matrices localA You have to use point-to-point communication routines Don't forget to set the timers for computation and communication! ******************************************************************************/ //******************************************************************************** msg = malloc(y * sizeof(double)); int tag =55, dest, dif, srank; MPI_Status status; MPI_Request request; for(k = 0; k < X - 1; k++){ // if is owner_of_pivot_line(k) - x*rank <= k < x*(rank+1) if ( ( x*rank <= k ) && ( k < (x * (rank + 1)) ) ) { //pack_data(lA, send_buffer); memcpy(msg, localA[ k%x ], y * sizeof(double) ); //send_data_to_all for(dest=0;dest<size;dest++) { if ((dest==rank) || (dest<rank)) continue; gettimeofday(&comms,NULL); MPI_Send(msg,y,MPI_DOUBLE,dest,tag,MPI_COMM_WORLD); gettimeofday(&commf,NULL); communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001; } } else { //receive_data_from_owner //unpack_data(receive_buffer, lA); srank = k / x; if ((rank<srank) || (rank==srank)) continue; gettimeofday(&comms,NULL); MPI_Recv(msg,y,MPI_DOUBLE,srank,tag,MPI_COMM_WORLD,&status); gettimeofday(&commf,NULL); communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001; } //compute(k, lA); gettimeofday(&comps,NULL); if ( k < ( x * (rank + 1) - 1 ) ) { dif = ( x * (rank + 1) - 1 ) - k; if (dif > x) dif = x; for ( i = x - dif; i < x; i++ ) { l = localA[i][k] / msg[k]; for ( j=k; j<y; j++ ) localA[i][j] -= l * msg[j]; } } gettimeofday(&compf,NULL); computation_time+=compf.tv_sec-comps.tv_sec+(compf.tv_usec-comps.tv_usec)*0.000001; } free(msg); MPI_Barrier(MPI_COMM_WORLD); //******************************************************************************** gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) { A=malloc2D(X_ext,Y); idx=&A[0][0]; } MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_block_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }
// Trains a QDA classifier // // Inputs: // i_x - N x p data matrix of N samples in p dimensions // i_y - N x 1 vector of labels // i_N - number of samples // i_p - data dimensionality // i_K - number of classes // i_debug_output - level of debug output // // Output: // o_priors - K x 1 vector of Bayesian priors, computed as fraction of points from each class // o_means - K x p matrix of means approximated from the data // o_covmats - K x p x p array of covariance matrices approximated from the data // o_icovmats - K x p x p array of inverse covariance matrices // o_bias - K x 1 vector of bias terms for discriminant function computations // // Memory allocation requirements for outputs: // o_priors must be of length K // o_means must be of length K*p // o_covmat must be of length K*p*p // o_icovmat must be of length K*p*p // o_bias must be of lenght K // o_status must be of length 1 // o_info must be of length 1 // // Meaning of o_status: // 0 - Everything is OK // 1 - The covariance matrix for class *o_info is singular void QDAtrain( double* i_x, int* i_y, int* i_N, int* i_p, int* i_K, double* i_cov_reg, int* i_debug_output, double* o_priors, double* o_means, double* o_covmats, double* o_icovmats, double* o_bias, int* o_status, int* o_info ) { if( *i_debug_output > 0 ) printf( "Currently running C version of the QDA.train code\n" ); // Create local storage int i, j, t, k; int* Nk = (int*)malloc( *i_K * sizeof(int) ); double** x = make2D( *i_N, *i_p ); double** means = make2D( *i_K, *i_p ); double*** covmats = make3D( *i_K, *i_p, *i_p ); double*** icovmats = make3D( *i_K, *i_p, *i_p ); // Copy and reshape the inputs cp_vec_mat( *i_N, *i_p, i_x, x ); // Init the counts and means to 0 for( k = 0; k < *i_K; k++ ) { Nk[k] = 0; for( i = 0; i < *i_p; i++ ) means[k][i] = 0.0; } // Init the covariance matrices to 0 for( k = 0; k < *i_K; k++ ) for( i = 0; i < *i_p; i++ ) for( j = 0; j < *i_p; j++ ) covmats[k][i][j] = 0.0; // Traverse one sample at a time to compute mean contributions for( t = 0; t < *i_N; t++ ) { k = i_y[t] - 1; // account for 0-based index // Count the sample Nk[k]++; // Add contribution to the mean add_vec_vec( *i_p, 1.0, 1.0, means[k], x[t], means[k] ); } // Compute the means and the priors for( k = 0; k < *i_K; k++ ) { o_priors[k] = ((double)Nk[k]) / ((double)*i_N); mult_const_vec( *i_p, 1.0 / ((double)Nk[k]), means[k] ); } // Subtract the means from the samples and compute the covariance matrices for( t = 0; t < *i_N; t++ ) { k = i_y[t] - 1; // account for 0-based index add_vec_vec( *i_p, 1.0, -1.0, x[t], means[k], x[t] ); for( i = 0; i < *i_p; i++ ) for( j = 0; j < *i_p; j++ ) covmats[k][i][j] += x[t][i] * x[t][j]; } for( k = 0; k < *i_K; k++ ) for( i = 0; i < *i_p; i++ ) for( j = 0; j < *i_p; j++ ) covmats[k][i][j] /= (Nk[k] - 1); // Regularize the covariance matrices if( *i_cov_reg > 0 ) { for( k = 0; k < *i_K; k++ ) for( i = 0; i < *i_p; i++ ) for( j = 0; j < *i_p; j++ ) { covmats[k][i][j] *= (1 - *i_cov_reg); if( i == j ) covmats[k][i][j] += *i_cov_reg; } } // Compute the bias terms for( k = 0; k < *i_K; k++ ) o_bias[k] = log( o_priors[k] ) - 0.5 * covmat_logdet( *i_p, covmats[k] ); // Compute the covariance matrix inverses for( k = 0; k < *i_K; k++ ) { *o_status = covmat_inverse( *i_p, covmats[k], icovmats[k] ); if( *o_status != 0 ) { free2D( x, *i_N ); free2D( means, *i_K ); free3D( covmats, *i_K, *i_p ); free3D( icovmats, *i_K, *i_p ); *o_info = k; return; } } // Copy the results from local storage to outputs cp_mat_vec( *i_K, *i_p, means, o_means ); cp_3D_vec( *i_K, *i_p, *i_p, covmats, o_covmats ); cp_3D_vec( *i_K, *i_p, *i_p, icovmats, o_icovmats ); // Free memory free2D( x, *i_N ); free2D( means, *i_K ); free3D( covmats, *i_K, *i_p ); free3D( icovmats, *i_K, *i_p ); }
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext,i; double **A, **localA; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; for (i=0;i<x;i++) { if (rank==0) idx=&A[i*size][0]; MPI_Scatter(idx,Y,MPI_DOUBLE,&localA[i][0],y,MPI_DOUBLE,0,MPI_COMM_WORLD); } if (rank==0) free2D(A,X_ext,Y); //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time,computation_time,communication_time; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in a round-robin fashion to the local matrices localA You have to use point-to-point communication routines. Don't forget the timers for computation and communication! ******************************************************************************/ int line_index, line_owner; int k, start; double *k_row, *temp; MPI_Status status; temp = malloc(y * sizeof(*temp)); // k_row = malloc(y * sizeof(*k_row)); /* omoia me to allo cyclic, vriskoume ton line_owner */ for (k=0; k<y-1; k++){ line_owner = k % size; line_index = k / size; if (rank <= line_owner) start = k / size + 1; else start = k / size; if (rank == line_owner) k_row = localA[line_index]; else k_row = temp; /* set communication timer */ gettimeofday(&comms, NULL); /* COMM */ // if (rank != line_owner){ // if (rank == 0) // MPI_Recv( k_row, y, MPI_DOUBLE, size-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status); // else // MPI_Recv( k_row, y, MPI_DOUBLE, rank-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status); // } // // /* autos pou einai prin ton line_owner den prepei na steilei */ // if (rank != line_owner -1){ // /* o teleutaios prepei na steilei ston prwto, ektos an o prwtos einai o line_owner */ // if (rank == size-1) { // if (line_owner != 0) // MPI_Send( k_row, y, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD); // } // else // MPI_Send(k_row, y, MPI_DOUBLE, rank+1, rank, MPI_COMM_WORLD); // } /* o line_owner stelnei se olous (ektos tou eautou tou) kai oloi oi alloi kanoun * receive */ if (rank == line_owner){ for (i=0; i<size; i++) if (i != line_owner) MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD); } else MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status); /* stop communication timer */ gettimeofday(&commf, NULL); communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001; /* set computation timer */ gettimeofday(&comps, NULL); /* Compute */ go_to_work( localA, k_row, x, y, rank, start, k ); /* stop computation timer */ gettimeofday(&compf, NULL); computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001; } gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) A=malloc2D(X_ext,Y); for (i=0;i<x;i++) { if (rank==0) idx=&A[i*size][0]; MPI_Gather(&localA[i][0],y,MPI_DOUBLE,idx,Y,MPI_DOUBLE,0,MPI_COMM_WORLD); } double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Cyclic-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_cyclic_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }
void printautoctable (FILE * outto, int numautoc, struct autoc **ac, char **ac_str, const char *printacstring, int pstep) { int i, j; double **acvals; int numacprint; char numstr[20]; /* 4/9/09 change output for stdout so it only does tables for L[P] and split times, not tmrcas the total number of autoc tables for stdout will then be npops */ //numacprint = (outto == stdout) ? IMIN (numautoc, IFSTDOUTMAXSHOW) : numautoc; if (strcmp (printacstring, "Population Assignment Autocorrelations and Effective Sample Size Estimates")) { numacprint = (outto == stdout) ? IMIN (npops, IFSTDOUTMAXSHOW) : numautoc; } else { numacprint = (outto == stdout) ? IMIN (numautoc, IFSTDOUTMAXSHOW) : numautoc; } acvals = alloc2Ddouble (numacprint, AUTOCTERMS); fprintf (outto, "\n%s\n", printacstring); for (i = 0; i < (int) strlen (printacstring); i++) fprintf (outto, "-"); fprintf (outto, "\n"); fprintf (outto, " # Steps Between Values and Autocorrelation Estmates \n"); fprintf (outto, "\tSteps "); for (j = 0; j < numacprint; j++) fprintf (outto, "\t %s", ac_str[j]); fprintf (outto, "\n"); for (i = 0; i < AUTOCTERMS; i++) { if (ac[0][i].cov.n > (AUTOCCUTOFF / AUTOCSTEPSCALAR)) { if (((float) autoc_checkstep[i] * AUTOCSTEPSCALAR) > 1e4) { sprintf (&numstr[0], "%.1e", (float) autoc_checkstep[i] * AUTOCSTEPSCALAR); fprintf (outto, "\t%s", shorten_e_num (&numstr[0])); //fprintf (outto, "\t%.1e", (float) autoc_checkstep[i]*AUTOCSTEPSCALAR); } else { fprintf (outto, "\t%d", autoc_checkstep[i] * AUTOCSTEPSCALAR); } for (j = 0; j < numacprint; j++) { acvals[j][i] = printautocvalue (outto, &ac[j][i]); } fprintf (outto, "\n"); } } fprintf (outto, "\tESS"); /* integrate over autocorrelations values for values > 0.03 */ for (j = 0; j < numacprint; j++) integrate_autoc (outto, ac[j], acvals[j], pstep); fprintf (outto, "\n"); free2D ((void **) acvals, numacprint); } //printautoctable
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext, i; double ** A, ** localA; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; if (rank==0) idx=&A[0][0]; MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); if (rank==0) { free2D(A,X_ext,Y); } //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time,computation_time,communication_time; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in contiguous blocks to the local matrices localA You have to use point-to-point communication routines Don't forget to set the timers for computation and communication! ******************************************************************************/ int line_index, line_owner; int k, start; MPI_Status status; double *k_row, *temp; temp = malloc(y * sizeof(*k_row)); for (k=0; k<y-1; k++){ start = 0; line_owner = k / x; line_index = k % x; if (rank == line_owner){ start = line_index+1; k_row = localA[line_index]; } else k_row = temp; /* set communication timer */ gettimeofday(&comms, NULL); /* o line_owner stelnei se olous (ektos tou eautou tou) kai oi alloi * kanoun receive th k_row */ if (rank == line_owner){ for (i=0; i<size; i++) if (i != line_owner) MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD); } else MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status); /* stop communication timer */ gettimeofday(&commf, NULL); communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001; /* set computation timer */ gettimeofday(&comps, NULL); /* Compute */ go_to_work( localA, k_row, x, y, rank, line_owner, start, k ); /* stop computation timer */ gettimeofday(&compf, NULL); computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001; } gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) { A=malloc2D(X_ext,Y); idx=&A[0][0]; } MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_block_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }