int main(int argc, char *argv[]) { unsigned char str[154]; unsigned int arr[] = {9,2,5,8,4,2,4,1,6,9,1,8,9,9,6,1,5,7,0,7,7,4,3,7,6,3,9,5,4,2,3,0,4,4,1,5,3,3,7,2,3,3,7,0,9,4,5,2,8,4,6,\ 2,1,3,4,1,4,2,6,0,8,5,1,7,3,1,4,4,7,0,5,3,4,4,8,9,1,1,9,8,3,5,1,8,3,4,4,8,3,2,8,1,2,8,7,4,1,8,1,8,0,4,\ 8,4,2,4,4,5,4,9,1,8,3,4,9,5,6,3,3,1,4,6,4,1,0,2,0,2,5,1,4,8,5,9,9,6,9,4,0,3,6,5,5,9,5,4,2,2,3,7,8,5,9,7}; long i; double t1, t2, Itime; int provided; /* Allocation */ v1 = (vector *) malloc (VLEN * sizeof (vector)); v2 = (vector *) malloc (VLEN * sizeof (vector)); v3 = (vector *) malloc (VLEN * sizeof (vector)); fin_sum = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t)); result = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t)); q = (mp_limb_t *) malloc (LIMBS * sizeof (mp_limb_t)); MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank (MPI_COMM_WORLD, &id); MPI_Comm_size (MPI_COMM_WORLD, &p); MPI_Type_contiguous (2*LIMBS+1, MPI_UNSIGNED_LONG_LONG, &mpntype0); MPI_Type_commit (&mpntype0); MPI_Type_contiguous (LIMBS, MPI_UNSIGNED_LONG_LONG, &mpntype1); MPI_Type_commit (&mpntype1); MPI_Op_create ((MPI_User_function *)addmpn, 1, &mpn_sum); for (i=0; i<154; ++i) str[i] = (unsigned char)arr[i]; mpn_set_str (q, str, 154, 10); //if (!id) gmp_printf ("Modulus: %Nd\n", q, LIMBS); MPI_Barrier (MPI_COMM_WORLD); /* Setting limits for 2 MPI nodes */ VOffset = BLOCK_LOW(id,p,VLEN); VChunk = BLOCK_SIZE(id,p,VLEN); /* Setting limits for NCORES-1 threads */ for (i=0; i<NCORES-1; ++i) { VStart[i] = VOffset + BLOCK_LOW(i,NCORES-1,VChunk); VEnd[i] = VOffset + BLOCK_HIGH(i,NCORES-1,VChunk); } for (i=0; i<VLEN; ++i) mpn_random (v1[i], LIMBS); for (i=0; i<VLEN; ++i) mpn_random (v2[i], LIMBS); for (i=BLOCK_LOW(id,p,VLEN); i<=BLOCK_HIGH(id,p,VLEN); ++i) mpn_random (v3[i], LIMBS); MPI_Barrier (MPI_COMM_WORLD); t1 = MPI_Wtime (); for (i=0; i<NCORES; ++i) pthread_create(&threads[i], &attr, VectMul, (void *) i); for (i=0; i<NCORES; ++i) pthread_join (threads[i], NULL); t2 = MPI_Wtime (); Itime = t2 - t1; if (!id) printf ("Total time taken: %lf\n",Itime); if (!id) gmp_printf ("Result: %Nd\n", cnum, LIMBS); MPI_Op_free(&mpn_sum); MPI_Request_free (&Rrqst); MPI_Request_free (&Srqst); MPI_Finalize (); return 0; }
int main (int argc, char *argv[]) { //define variables int n; double elapsed_time; int p; int id; int low_value; int high_value; int size; int proc0_size; char* marked; int index; int prime; int first; int count; int global_count; int i; //Initialize MPI MPI_Init (&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); elapsed_time = -MPI_Wtime(); MPI_Comm_rank (MPI_COMM_WORLD, &id); MPI_Comm_size (MPI_COMM_WORLD, &p); //Check for proper command line parameters, must include N if (argc != 2) { if (!id) printf ("Command line: %s <m>\n", argv[0]); MPI_Finalize(); exit(1); } //Convert parameter string to integer //N represents the number up to which we need to calculate primes n = atoi(argv[1]); //Low and high values for each processor low_value = 2 + BLOCK_LOW(id,p,n-1); high_value = 2 + BLOCK_HIGH(id,p,n-1); size = BLOCK_SIZE(id,p,n-1); //largest prime is sqrt(n), so first processor has all primes if //p is less than sqrt(n). We need to check we don't have more processors //than we need. proc0_size = (n-1)/p; if ((2 + proc0_size) < (int) sqrt((double) n)) { if (!id) printf ("Too many processes\n"); MPI_Finalize(); exit (1); } //allocate memory for block, error if unable to marked = (char *) malloc (size); if (marked == NULL) { printf ("Cannot allocate enough memory\n"); MPI_Finalize(); exit (1); } /* Begin Sieve of Eratosthenes Algorithm */ //First fill marked[] with zero/false for all items in block for (i = 0; i < size; i++) marked[i] = 0; if (!id) index = 0; //first prime is 2 prime = 2; do { if (prime * prime > low_value) first = prime * prime - low_value; else { if (!(low_value % prime)) first = 0; else first = prime - (low_value % prime); } //increment by prime, marking the non-primes with 1, or true for (i = first; i < size; i += prime) marked[i] = 1; if (!id) { while (marked[++index]); prime = index + 2; } MPI_Bcast (&prime, 1, MPI_INT, 0, MPI_COMM_WORLD); } while (prime * prime <= n); /* End Sieve of Eratosthenes Algorithm */ /*Begin count of primes */ count = 0; //for all elements in block, if prime is 1/true, increment count for (i = 0; i < size; i++) if (!marked[i]) count++; //Sum count of primes from each process MPI_Reduce (&count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); elapsed_time += MPI_Wtime(); //print results on main processor if (!id) { printf ("%d primes are less than or equal to %d\n", global_count, n); printf ("Total elapsed time: %10.6f\n", elapsed_time); } MPI_Finalize(); return 0; }
int BLOCK_SIZE(int id,int p,int n) {return BLOCK_HIGH(id,p,n)-BLOCK_LOW(id,p,n)+1;}
/* the optimization loop */ int main(int argc, char **argv) { cmaes_t evo; /* an CMA-ES type struct or "object" */ double *arFunvals, *xfinal, *const*pop; int i,j; int numberDipoles; int id; //Rank int p; //Number processors double elapsed_time;//Time from beginning. double bestValue; int lambda; int maxLambda; int * sendCnts; //For MPI_Alltoallv for arFunVals int * sdispls; //For MPI_Alltoallv for arFunVals int * recvCnts; //For MPI_Alltoallv for arFunVals int * rdispls; //For MPI_Alltoallv for arFunVals int * sendCntsPop; //For MPI_Alltoallv for pop int * sdisplsPop; //For MPI_Alltoallv for pop int * recvCntsPop; //For MPI_Alltoallv for pop int * rdisplsPop; //For MPI_Alltoallv for pop int canTerminate; int canTerminateBuffer; //Start MPI MPI_Init(&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); elapsed_time = -MPI_Wtime(); //Set initial time. MPI_Comm_rank(MPI_COMM_WORLD, &id); //Set id MPI_Comm_size(MPI_COMM_WORLD, &p); //set p for (i=0;i<32;i++) { observations[i]/=1000.0; } //Set number of dipoles, either first argument or default value of 2. numberDipoles=2; if (argc>=2) { numberDipoles=atoi(argv[1]); } //Set lambda based on entry, default of 40 maxLambda=40; if (argc>=3) { maxLambda=atoi(argv[2]); } if (id==0) { printf("Dipoles:%d MaxLambda:%d\n",numberDipoles,maxLambda); } //Allocate lambda pieces to each processor, based on the size of maxLambda and the number of processors. lambda = BLOCK_SIZE(id,p,maxLambda); printf("Id:%d Lambda:%d\n",id,lambda); //Setup send and receive buffers for function evaluations and populations that resulted in those evaluation. sendCnts = malloc(p*sizeof(int)); sdispls = malloc(p*sizeof(int)); recvCnts = malloc(p*sizeof(int)); rdispls = malloc(p*sizeof(int)); sendCntsPop = malloc(p*sizeof(int)); sdisplsPop = malloc(p*sizeof(int)); recvCntsPop = malloc(p*sizeof(int)); rdisplsPop = malloc(p*sizeof(int)); for (i=0;i<p;i++) { sendCnts[i]=lambda;//Same for all others sdispls[i] = BLOCK_LOW(id,p,maxLambda);//Same for all others recvCnts[i] = BLOCK_SIZE(i,p,maxLambda);//Depends on which we receive from. rdispls[i] = BLOCK_LOW(i,p,maxLambda); sendCntsPop[i]=lambda*((numberDipoles*6+2));//Same for all others sdisplsPop[i] = BLOCK_LOW(id,p,maxLambda)*(numberDipoles*6+2);//Same for all others recvCntsPop[i] = BLOCK_SIZE(i,p,maxLambda)*(numberDipoles*6+2);//Depends on which we receive from. rdisplsPop[i] = BLOCK_LOW(i,p,maxLambda)*(numberDipoles*6+2); } for (i=0;i<p;i++) { printf("Id: %d recvCnts[%d]=%d\n",id,i,recvCnts[i]); printf("Id: %d rdispls[%d]=%d\n",id,i,rdispls[i]); printf("Id: %d recvCntsPop[%d]=%d\n",id,i,recvCntsPop[i]); printf("Id: %d rdisplsPop[%d]=%d\n",id,i,rdisplsPop[i]); } /* Initialize everything into the struct evo, 0 means default */ //arFunvals = cmaes_init(&evo, 0, NULL, NULL, 0, 0, "initials.par"); // printf("0\n"); //The maxLambda parameter has been added so all of them will have enough space to store the results arFunvals = reinit(&evo, maxLambda, numberDipoles); //outputCMAES_t(evo,1); // printf("1\n"); resetSignals(&evo, numberDipoles); /* write header and initial values */ //Reset the seed value based on processor (so they don't all come out the same! evo.sp.seed=evo.sp.seed*(id+1)/p; printf("proc: %d seed: %d\n",id,evo.sp.seed); //outputCMAES_t(evo,0); // printf("2\n"); // printf("%s\n", cmaes_SayHello(&evo)); // i=40; // for (i=32;i<40;i*=2) // { // arFunvals = reinit(&evo, i); //outputCMAES_t(evo); evo.sp.lambda=lambda; canTerminate = (0==1); /* Iterate until stop criterion holds */ while(!canTerminate) { /* generate lambda new search points, sample population */ pop = cmaes_SamplePopulation(&evo); /* do not change content of pop */ /* Here you may resample each solution point pop[i] until it becomes feasible, e.g. for box constraints (variable boundaries). function is_feasible(...) needs to be user-defined. Assumptions: the feasible domain is convex, the optimum is not on (or very close to) the domain boundary, initialX is feasible and initialStandardDeviations are sufficiently small to prevent quasi-infinite looping. */ /*for (i = 0; i < lambda; ++i) { cmaes_ReSampleSingle(&evo, i); }*/ for (i = 0; i < lambda; ++i) { while (!is_feasible(evo.rgrgx[i],(int) cmaes_Get(&evo, "dim"))) { cmaes_ReSampleSingle(&evo, i); } } for (i=0;i<lambda;i++) { for(j=0;j<(6*numberDipoles)+2;j++) { evo.rgrgx[BLOCK_LOW(id,p,maxLambda)+i][j]=evo.rgrgx[i][j]; } } /* evaluate the new search points using fitfun from above */ for (i = BLOCK_LOW(id,p,maxLambda); i <= BLOCK_HIGH(id,p,maxLambda); ++i) { arFunvals[i] = fitfun(evo.rgrgx[i], (int) cmaes_Get(&evo, "dim")); //printf("ID:%d, arFunvals[%d]=%lf\n",id,i,arFunvals[i]); } //Now communicate the arFunvals around MPI_Alltoallv(arFunvals,sendCnts,sdispls,MPI_DOUBLE,arFunvals,recvCnts,rdispls,MPI_DOUBLE,MPI_COMM_WORLD); //Now communicate the populations being looked at around MPI_Alltoallv(&evo.rgrgx[0][0],sendCntsPop,sdisplsPop,MPI_DOUBLE,&evo.rgrgx[0][0],recvCntsPop,rdisplsPop,MPI_DOUBLE,MPI_COMM_WORLD); /* update the search distribution used for cmaes_SampleDistribution() */ cmaes_UpdateDistribution(&evo, arFunvals); //Test for any that can terminate. canTerminate = cmaes_TestForTermination(&evo); if (canTerminate) { printf("id:%d can terminate for reason:%s\n",id,cmaes_TestForTermination(&evo)); } MPI_Allreduce(&canTerminate,&canTerminateBuffer,1,MPI_INT,MPI_MAX,MPI_COMM_WORLD);//Get the max, if any are >0, then someone has terminated. canTerminate = canTerminateBuffer;//Reset so the loop will exit. /* read instructions for printing output or changing termination conditions */ // cmaes_ReadSignals(&evo, "signals.par"); // fflush(stdout); /* useful in MinGW */ } // printf("Stop:\n%s\n", cmaes_TestForTermination(&evo)); /* print termination reason */ // cmaes_WriteToFile(&evo, "all", "allcmaes.dat"); /* write final results */ elapsed_time += MPI_Wtime(); /* get best estimator for the optimum, xmean */ xfinal = cmaes_GetNew(&evo, "xmean"); /* "xbestever" might be used as well */ bestValue = fitfun(xfinal, (int) cmaes_Get(&evo, "dim")); printf("Proccesor:%d has last mean of:%lf elapsedTime:%lf\n",id,bestValue,elapsed_time); for (i=0;i<6*numberDipoles;i++) { printf("(%d:%d:%lf)\n",id,i,xfinal[i]); } // cmaes_exit(&evo); /* release memory */ /* do something with final solution and finally release memory */ free(xfinal); free(sendCnts); free(sdispls); free(recvCnts); free(rdispls); free(sendCntsPop); free(sdisplsPop); free(recvCntsPop); free(rdisplsPop); MPI_Finalize(); //} return 0; }
//wh int main (int argc, char *argv[]) { int count; ///局部素数个数 double elapsed_time; ///运行时间 int first; ///每组中第一个是prime倍数的数 int global_count; ///全局素数个数 int high_value; ///每组的最后一个元素值 int i; ///用于循环 int id; ///程序ID int index; ///子程序0数组标号 int low_value; ///每组的第一个元素值 char * marked; ///指向局部数组 int n; ///求素数的范围 int p; ///子程序数目 int proc0_size; ///子程序0的数组大小 int prime; ///下一个要删除其倍数的素数 int size; ///局部素数个数 MPI_Init (&argc, &argv); ///MPI初始化 MPI_Barrier(MPI_COMM_WORLD);///所有程序同时开始 elapsed_time = -MPI_Wtime();///获得当前时间的负值 MPI_Comm_rank (MPI_COMM_WORLD, &id);///得到该通信子程序的ID号 MPI_Comm_size (MPI_COMM_WORLD, &p);///得到通信关联组大小 ///判断传入参数个数是否正确,不正确则终止程序 if (argc != 2) { if (!id) printf ("Command line: %s <m>\n", argv[0]); MPI_Finalize(); exit (1); } n = atoi(argv[1]);///从传入参数得到的大小 low_value = 3 + 2*(BLOCK_LOW(id,p,(n-1)/2));///算出该子程序持有的数组的最小值 high_value = 3 + 2*(BLOCK_HIGH(id,p,(n-1)/2));///算出该子程序持有的数组的最大值 size = BLOCK_SIZE(id,p,(n-1)/2);///算出该子程序持有的数组元素个数 proc0_size = ((n-1)/2)/p;///算出进程0的数组元素个数 ///如果进程0所控制的最大数小于n的平方根,异常退出 if ((3 + 2*proc0_size) < (int) sqrt((double) n)) { if (!id) printf ("Too many processes\n"); MPI_Finalize(); exit (1); } marked = (char *) malloc (size);///分配数组空间 ///如果分配空间失败,异常退出 if (marked == NULL) { printf ("Cannot allocate enough memory\n"); MPI_Finalize(); exit (1); } for (i = 0; i < size; i++) marked[i] = 0;///初始化数组,0表示未标记 if (!id) index = 0;///初始化index,只有子程序0使用该变量 prime = 3;///第一个素数为3(偶数已经去掉) /*************************核心算法*****************************/ do { if (prime * prime > low_value) first = (prime * prime - low_value)/2; else { if (!(low_value % prime)) first = 0; else first = (prime - low_value % prime+1)/2+((prime-1)/2)*((prime - low_value % prime)%2); } for (i = first; i < size; i += prime) { marked[i] = 1; } if (!id) { while (marked[++index]); prime = 2*index + 3; } MPI_Bcast (&prime, 1, MPI_INT, 0, MPI_COMM_WORLD); } while (prime * prime <= n); /*************************************************************/ ///计算局部数组个数 count = 0; for (i = 0; i < size; i++) if (!marked[i]) { count++; } ///将所有局部素数数目求和发送给子程序0 MPI_Reduce (&count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); elapsed_time += MPI_Wtime();///计算运行时间 ///子程序0负责输出信息 if (!id) { printf ("%d primes are less than or equal to %d\n", global_count+1, n); printf ("Total elapsed time: %10.6f\n", elapsed_time); } MPI_Finalize ();///MPI终止 return 0; }
grid *grid_create(double startx, double endx, int nx, double starty, double endy, int ny, double startz, double endz, int nz) { MPI_Comm cart_comm; int periodics[3]; int i, j, k; int ind; int np, rank; grid *grd = (grid*) malloc(sizeof(grid)); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &rank); grd->id = 0; if (nz > 0) grd->nd = 3; else grd->nd = 2; grd->num_global[0] = nx; grd->num_global[1] = ny; grd->num_global[2] = nz; grid_decomp(grd, np); periodics[0] = periodics[1] = periodics[2] = 0; MPI_Cart_create(MPI_COMM_WORLD, grd->nd, grd->num_procs, periodics, 1, &cart_comm); MPI_Comm_rank(cart_comm, &rank); MPI_Cart_coords(cart_comm, rank, grd->nd, grd->cart_coord); grd->nx = BLOCK_SIZE(grd->cart_coord[0], grd->num_procs[0], nx); grd->ny = BLOCK_SIZE(grd->cart_coord[1], grd->num_procs[1], ny); grd->num_local[0] = grd->nx; grd->num_local[1] = grd->ny; grd->num_pts = grd->nx * grd->ny; grd->is[0] = BLOCK_LOW(grd->cart_coord[0], grd->num_procs[0], nx) + 1; grd->ie[0] = BLOCK_HIGH(grd->cart_coord[0], grd->num_procs[0], nx) + 1; grd->is[1] = BLOCK_LOW(grd->cart_coord[1], grd->num_procs[1], ny) + 1; grd->ie[1] = BLOCK_HIGH(grd->cart_coord[1], grd->num_procs[1], ny) + 1; if (grd->nd == 3) { grd->nz = BLOCK_SIZE(grd->cart_coord[2], grd->num_procs[2], nz); grd->num_local[2] = grd->nz; grd->num_pts = grd->num_pts * grd->nz; grd->is[2] = BLOCK_LOW(grd->cart_coord[2], grd->num_procs[2], nz) + 1; grd->ie[2] = BLOCK_HIGH(grd->cart_coord[2], grd->num_procs[2], nz) + 1; } grd->comm = MPI_COMM_WORLD; grd->xyz = (double*) malloc(grd->num_pts*3*sizeof(double)); grd->x = &grd->xyz[0]; grd->y = &grd->xyz[grd->num_pts]; grd->z = &grd->xyz[grd->num_pts*2]; grd->hx = (endx - startx) / (grd->num_global[0] - 1); grd->hy = (endy - starty) / (grd->num_global[1] - 1); grd->hz = (endz - startz) / (grd->num_global[2] - 1); startx = startx + (grd->is[0]-1)*grd->hx; starty = starty + (grd->is[1]-1)*grd->hy; startz = startz + (grd->is[2]-1)*grd->hz; if (grd->nd == 3) { for (k = 0; k < grd->nz; k++) { for (j = 0; j < grd->ny; j++) { for (i = 0; i < grd->nx; i++) { ind = k*grd->nx*grd->ny + j*grd->nx + i; grd->x[ind] = startx + i*grd->hx; grd->y[ind] = starty + j*grd->hy; grd->z[ind] = startz + k*grd->hz; } } } } else { for (j = 0; j < grd->ny; j++) { for (i = 0; i < grd->nx; i++) { ind = j*grd->nx + i; grd->x[ind] = startx + i*grd->hx; grd->y[ind] = starty + j*grd->hy; } } } for (i = 0; i < grd->nd; i++) { grd->periodic[i] = 0; } return grd; }
/* Function to be executed by the workers */ void slave(Params * p) { srand(2); /* Timer */ double start, end; int i; int * array; array = (int *) malloc(sizeof(int) * p->array_size); MPI_Status status; int count = 0; int done = 0; int temp = 0; int low = BLOCK_LOW(p->rank-1,p->size,p->max_num+1); int high = BLOCK_HIGH(p->rank-1,p->size,p->max_num+1); start = MPI_Wtime(); // Repeat until done while(!done) { // Master work if(p->rank == 0) { // Increase count until array_size if(count++ < p->array_size) { // Add random number to list, send it up the pipeline to rank 1 temp = rand() % p->max_num; MPI_Send(&temp, 1, MPI_INT, 1, NUM_TAG, MPI_COMM_WORLD); } else { // Signal end of the list MPI_Send(&temp, 1, MPI_INT, 1, TERM_TAG, MPI_COMM_WORLD); done = 1; } } // Slave work else { // receive a number from the previous stage MPI_Recv(&temp, 1, MPI_INT, p->rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); // check for termination if(status.MPI_TAG == TERM_TAG) { // make sure I'm sending to a valid rank and foward termination done = 1; if(p->rank != p->size) { MPI_Send(&temp, 1, MPI_INT, p->rank+1, TERM_TAG, MPI_COMM_WORLD); } } // got number else if(status.MPI_TAG == NUM_TAG) { // check to see if I keep it if(temp >= low && temp <= high) array[count++] = temp; else // pass it on MPI_Send(&temp, 1, MPI_INT, p->rank+1, NUM_TAG,MPI_COMM_WORLD); } } } MPI_Barrier(MPI_COMM_WORLD); int * sizes = (int *) calloc(p->size+1,sizeof(int)); MPI_Gather(&count, 1, MPI_INT, sizes, 1, MPI_INT, 0 , MPI_COMM_WORLD); int * disp = (int *) calloc(p->size+1,sizeof(int)); if(p->rank == 0) { sizes[0] = 0; disp[0] = 0; for(i = 1; i < p->size+1; ++i) { disp[i] = disp[i-1] + sizes[i-1]; } } // sort the array, except for the master if(p->rank != 0) qsort(array,count,sizeof(int),compare); if(p->rank == 0) count = 0; // gather results MPI_Gatherv(array, count, MPI_INT, &array[0], sizes, disp, MPI_INT,0,MPI_COMM_WORLD); // stop the clock, print results end = MPI_Wtime(); if(p->rank ==0) fprintf(stderr,"[%d] Elapsed time: %f\n",p->rank,end-start); return; }
bool processOptions (int argc, char *argv[], INFO *info) { int c = 0; char *base_fn = NULL; char *co_fn = NULL; unsigned int num_clusters = 0; unsigned int seed = UINT_MAX; unsigned int maxiter = 0; unsigned int snapshot = UINT_MAX; bool verbose = false; bool debug = false; bool textio = false; bool rounding = false; bool no_output = false; /* Usage information if no arguments */ if (argc == 1) { usage (argv[0]); } while (1) { int option_index = 0; static struct option long_options[] = { {"base", 1, 0, 0}, {"cooccur", 1, 0, 0}, {"clusters", 1, 0, 0}, {"seed", 1, 0, 0}, {"maxiter", 1, 0, 0}, {"snapshot", 1, 0, 0}, {"openmp", 1, 0, 0}, {"verbose", 0, 0, 0}, {"debug", 0, 0, 0}, {"text", 0, 0, 0}, {"rounding", 0, 0, 0}, {"nooutput", 0, 0, 0}, {0, 0, 0, 0} }; c = getopt_long (argc, argv, "", long_options, &option_index); if (c == -1) { break; } switch (c) { case 0: if (strcmp (long_options[option_index].name, "cooccur") == 0) { co_fn = wmalloc (strlen (optarg) + 1); co_fn = strcpy (co_fn, optarg); } else if (strcmp (long_options[option_index].name, "clusters") == 0) { num_clusters = atoi (optarg); } else if (strcmp (long_options[option_index].name, "seed") == 0) { seed = atoi (optarg); } else if (strcmp (long_options[option_index].name, "base") == 0) { base_fn = wmalloc (strlen (optarg) + 1); base_fn = strcpy (base_fn, optarg); } else if (strcmp (long_options[option_index].name, "maxiter") == 0) { maxiter = atoi (optarg); } else if (strcmp (long_options[option_index].name, "snapshot") == 0) { snapshot = atoi (optarg); } else if (strcmp (long_options[option_index].name, "openmp") == 0) { #if HAVE_OPENMP /* Check the previously set value, which is the maximum for the system */ if (atoi (optarg) > info -> threads) { fprintf (stderr, "==\tError: The number of threads requested exceeds the number available in the system (%u).\n", info -> threads); exit (-1); } info -> threads = atoi (optarg); omp_set_num_threads (info -> threads); #else fprintf (stderr, "==\tError: OpenMP is not enabled; --openmp meaningless.\n"); exit (-1); #endif } else if (strcmp (long_options[option_index].name, "verbose") == 0) { verbose = true; } else if (strcmp (long_options[option_index].name, "debug") == 0) { debug = true; } else if (strcmp (long_options[option_index].name, "text") == 0) { textio = true; } else if (strcmp (long_options[option_index].name, "rounding") == 0) { rounding = true; } else if (strcmp (long_options[option_index].name, "nooutput") == 0) { no_output = true; } break; default: printf ("?? getopt returned character code 0%o ??\n", c); exit (EXIT_FAILURE); } } info -> base_fn = base_fn; info -> co_fn = co_fn; info -> num_clusters = num_clusters; info -> seed = seed; info -> maxiter = maxiter; info -> snapshot = snapshot; info -> verbose = verbose; info -> debug = debug; info -> textio = textio; info -> rounding = rounding; info -> no_output = no_output; /* Set the range of clusters this process will handle */ info -> block_start = BLOCK_LOW (info -> world_id, info -> world_size, info -> num_clusters); info -> block_end = BLOCK_HIGH (info -> world_id, info -> world_size, info -> num_clusters); info -> block_size = BLOCK_SIZE (info -> world_id, info -> world_size, info -> num_clusters); return true; }
int main(int argc, char * argv[]) { /* Constant Declarations */ //long const SET_SIZE = 7920; /* Variable Declarations */ int count = 0; // local count double elapsed_time = 0.00; // time elapsed int first; // index of first multiple int global_count = 1; // global count int high_value; // highest value on processor char hostname[MPI_MAX_PROCESSOR_NAME]; // host process is running on int i; // counter variable int id; // process id number int index; int init_status; // initialization error status flag bool initialized = false; // mpi initialized flag int len; // hostname length int low_value; // lowest value on the processor char* marked; // portion of 2 to n that is marked int n; // number of elements to sieve int n_sqrt; // square root of n int p; // number of processes int prime; int proc0_size; // size of process 0's subarray int size; // elements in marked int* sqrt_primes; // primes up to the square root int sqrt_primes_index; // index in the square root primes array char* sqrt_primes_marked; // numbers up to sqrt marked prime or not int sqrt_primes_size; // size of square root primes array /* Function Declarations */ //int is_prime( int ); /* Initialization */ MPI_Initialized( &initialized ); // set initialized flag if( !initialized ) // if MPI is not initialized init_status = MPI_Init( &argc, &argv ); // Initialize MPI else init_status = MPI_SUCCESS; // otherwise set init_status to success if( init_status != MPI_SUCCESS ) { // if not successfully initialized printf ("Error starting MPI program. Terminating.\n"); // print error message fflush(stdout); MPI_Abort(MPI_COMM_WORLD, init_status); // abort } MPI_Get_processor_name( hostname, &len ); // set hostname MPI_Comm_rank( MPI_COMM_WORLD, &id ); // set process rank MPI_Comm_size( MPI_COMM_WORLD, &p ); // set size of comm group //printf("Process rank %d started on %s.\n", id, hostname); // print start message //fflush(stdout); //MPI_Barrier(MPI_COMM_WORLD ); /* Start Timer */ MPI_Barrier( MPI_COMM_WORLD ); // synchronize elapsed_time = - MPI_Wtime(); // start time /* Check that a set size was passed into the program */ if(argc != 2) { if(id==0) { printf("Command line: %s <m>\n", argv[0]); fflush(stdout); } MPI_Finalize(); exit(1); } n = atoi(argv[1]); n_sqrt = ceil(sqrt((double)n)); //if(id==0) // printf("square root: %i\n", n_sqrt); // debug //if(id==0) { //printf("n sqrt: %i\n", n_sqrt); //fflush(stdout); //} sqrt_primes_marked = (char *) malloc(n_sqrt + 1); sqrt_primes_marked[0] = 1; sqrt_primes_marked[1] =1; for(i = 2; i <= n_sqrt; ++i) { sqrt_primes_marked[i] = 0; } prime = 2; sqrt_primes_size = n_sqrt; //printf("sqrt primes size: %i\n", sqrt_primes_size); do { for(i = prime * prime; i < n_sqrt; i+=prime) { sqrt_primes_marked[i] = 1; //sqrt_primes_size--; } while(sqrt_primes_marked[++prime]); } while (prime * prime <= n_sqrt); //printf("sqrt primes size: %i\n", sqrt_primes_size); sqrt_primes = (int *) malloc(sqrt_primes_size); sqrt_primes_index = 0; //sqrt_primes_size = 0; for(i = 3; i <= n_sqrt; ++i) { if(!sqrt_primes_marked[i]) { sqrt_primes[sqrt_primes_index] = i; // printf("%i, ", sqrt_primes[sqrt_primes_index]); sqrt_primes_index++; } } sqrt_primes_size = sqrt_primes_index; //printf("sqrt primes size: %i\n", sqrt_primes_size); //fflush(stdout); /* Set process's array share and first and last elements */ low_value = 2 + BLOCK_LOW(id,p,n-1); high_value = 2 + BLOCK_HIGH(id,p,n-1); size = BLOCK_SIZE(id,p,n-1); //printf("Process %i block low: %i\n", id, low_value); //fflush(stdout); //printf("Process %i block high: %i\n", id, high_value); //fflush(stdout); //printf("Block size: %i\n", size); //fflush(stdout); if(low_value % 2 == 0) { if(high_value % 2 == 0) { size = (int)floor((double)size / 2.0); high_value--; } else { size = size / 2; } low_value++; } else { if(high_value % 2 == 0) { size = size / 2; high_value--; } else { size = (int)ceil((double)size / 2.0); } } //printf("Process %i block low: %i\n", id, low_value); //fflush(stdout); //printf("Process %i block high: %i\n", id, high_value); //fflush(stdout); //printf("Block size: %i\n", size); //fflush(stdout); //proc0_size = (n-1)/p; /* if process 0 doesn't have all the primes for sieving, then bail*/ /*if((2+proc0_size) < (int)sqrt((double)n)) { if(id==0) { printf("Too many processes\n"); fflush(stdout); } MPI_Finalize(); exit(1); } */ /* Allocate share of array */ marked = (char *) malloc(size); if(marked == NULL) { printf("Cannot allocate enough memory\n"); fflush(stdout); MPI_Finalize(); exit(1); } /* Run Sieve */ //printf("made it to sieve\n"); //fflush(stdout); for(i = 0; i < size; i++) marked[i] = 0; if(id==0) first = 0; sqrt_primes_index = 0; prime = sqrt_primes[sqrt_primes_index]; //printf("first prime: %i\n", prime); //fflush(stdout); //for(i = 0; i < sqrt_primes_size; i++) { // printf("%i,", sqrt_primes[i]); // fflush(stdout); //} do { if(prime >= low_value) first = ((prime - low_value) / 2) + prime; else if(prime * prime > low_value) { first = (prime * prime - low_value) / 2; } else { if(low_value % prime == 0) first = 0; else { first = 1; while ((low_value + (2 * first)) % prime != 0) ++first; } } //printf("first: %i\n", first); //fflush(stdout); for(i = first; i < size; i += (prime)) marked[i] = 1; //printf("made it to prime assignment\n"); prime = sqrt_primes[++sqrt_primes_index]; //printf("prime: %i\n", prime); //fflush(stdout); } while(prime * prime <= n && sqrt_primes_index < sqrt_primes_size); count = 0; for(i = 0; i < size; i++) { if(!marked[i]) count++; } //printf("size: %i\ncount: %i\n", size, count); // for( i=id; i<SET_SIZE; i+=p ) // interleaved allocation // count += is_prime( i ); // check if prime w/ sieve of eratosthenes /* Reduce Sum */ MPI_Reduce( &count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ); // reduce the primes count, root: proces 0 /* Stop Timer */ elapsed_time += MPI_Wtime(); // end time //printf("Process %i found %i primes.\n", id, count); //fflush(stdout); //printf("Process %d is done in %d, running on %s.\n", id, elapsed_time, hostname); // print process done message if( id == 0 ) { // rank 0 prints global count printf("There are %d primes in the first %i integers.\nExecution took %10.6f.\n", global_count, n, elapsed_time); fflush(stdout); // printf("Debug:\n"); // fflush(stdout); // printf("sqrt primes size: %i\n", sqrt_primes_size); // fflush(stdout); for(i = 0; i < sqrt_primes_size; i++) { if(!sqrt_primes[i]){ printf("%i,", sqrt_primes[i]); fflush(stdout); } } } MPI_Barrier(MPI_COMM_WORLD); // printf("rank: %i\nlow value: %i\nhigh value: %i\ncount: %i\n", id, low_value, high_value, count); //fflush(stdout); MPI_Finalize(); // finalize return 0; }
int main (int argc, char *argv[]) { int count; /* local prime count */ double elapsed_time; /* execution time */ int first; /* index of the first sieve */ int global_count; /* global count of prime numbers */ int high_value; /* highest value assigned to this process */ int i; /* loop counter */ int id; /* this process id */ int index; /* index of the current sieve */ int low_value; /* lowest value assigned to this process */ int *marked; /* array elements to be marked */ int n; /* value of the largest number */ int p; /* number of processes */ int proc0_size; /* number of elements assigned to process zero */ /* this is to find if process zero has all primes */ int prime; /* current prime or sieve */ int size; /* elements in marked array */ int seed_size; char cpu_name[MPI_MAX_PROCESSOR_NAME]; int namelen; MPI_Init (&argc, &argv); /* start timer */ MPI_Barrier(MPI_COMM_WORLD); elapsed_time = -MPI_Wtime(); MPI_Comm_rank (MPI_COMM_WORLD, &id); MPI_Comm_size (MPI_COMM_WORLD, &p); MPI_Get_processor_name(cpu_name, &namelen); if (argc != 2) { if (!id) printf ("Command line: %s <m>\n", argv[0]); MPI_Finalize(); exit (1); } n = atoi(argv[1]); /* find how many elements are assigned to this process */ low_value = BLOCK_LOW(id,p,n); high_value = BLOCK_HIGH(id,p,n); size = BLOCK_SIZE(id,p,n); seed_size = SEED_SIZE(n); proc0_size = (n-1)/(2*p); // - main loop works only for prime * prime <= n // - this means it only runs as long as // prime <= sqrt(n) // - In this setup, the program will exit if proc0 doesn't hold all starting primes (i.e. // it will exit if a starting prime will need to be chosen from another process - which this // program is not prepared to do. /* if ((OFFSET + proc0_size) < (int) sqrt((double) n)) { if (!id) printf ("Too many processes\n"); MPI_Finalize(); exit (1); }*/ // There is too many processes when we cannot split // up what is left of the numbers after taking out // the SEED section (which encloses sqrt(n)) if (BLOCK_SXN_SIZE(n) < p) { if (!id) printf ("Too many processes\n"); MPI_Finalize(); exit (1); } marked = (int *) malloc ((seed_size + size) * sizeof(int)); if (marked == NULL) { printf ("Cannot allocate enough memory\n"); MPI_Finalize(); exit (1); } for (i = 0; i < (seed_size + size); i++) marked[i] = 0; index = 0; prime = OFFSET; if(!id) printf("[%d-%s] SEED: low[ 0]= 3, high[%6d]=%6d (%d)\n", id, cpu_name, (SEED_SIZE(n)-1), SEED_HIGH(n), seed_size); MPI_Barrier(MPI_COMM_WORLD); printf("[%d-%s] ARRAY: low[%6d]=%6d, high[%6d]=%6d (%d)\n", id, cpu_name, seed_size, low_value, seed_size + size-1, high_value, size); do { //printf("[%d] *prime: %d\n", id, prime); // SEED marking - mark the multiples of the seed - within // the seed block. Start marking from the value of // the prime * prime, which is at the position // index + prime for (i = index + prime; i < seed_size; i += prime) { //printf("[%d] marked: %d\n", id, (OFFSET + (i * 2))); marked[i] = 1; } // Now, we need to continue to the block section, and keep // marking. But we first need to find out where to start. // For each process, we need to find the first element to mark. // The first number that we would have to mark is prime * prime // If the first number to mark (prime * prime) is at least // above the low bound of this process ... if (prime * prime > low_value) { // ... Then the first index is that number (prime * prime) // - the low_value. E.g: // prime = 7 // low_value = 41 // first = [(7*7) - 41]/2 = (49 - 41)/2 = 8/2 = 4 // marked[4] will be marked first first = (prime * prime - low_value)/2; //printf("[%d] first(a): %d(%d)\n", id, first, low_value + (2 * first)); } else { // This section is for "run-on" arrays, e.g. // prime=3 // p0 [ 3| 5] 3*3 > 3 // p1 [ 7| 9] 3*3 !> 7 "run-on", need mark 9 // p2 [11|13] 3*3 !> 11 "run-on" if (!(low_value % prime)) { // If it is, then the first element of the // array will be the starting place first = 0; //printf("[%d] first(b): %d(%d)\n", id, first, low_value + (2 * first)); } else { // (3 - (11 % 3))/2 // (3 - (2))/2s int tmp = prime - (low_value % prime); first = tmp % 2 == 0 ? tmp/2 : (tmp + prime)/2; //printf("[%d] first(c): %d(%d)\n", id, first, low_value + (2 * first)); } } // 0 sqrt(n) // [SEED ][ BLOCK_SXN ] // SEED_SIZE -------> ---> first // ----------------------> first + SEED_SIZE // Now, mark all multiples of the prime. // 'first' is a multiple of the prime, so += prime // is also a multiple. for (i = first; i < size; i += prime) { //printf("[%d] marked: %d\n", id, (low_value + (i * 2))); marked[i + seed_size] = 1; } // Increase 'index' which pointed to the last // smallest prime, until it reaches the next // smallest prime. while (marked[++index]); // Remember, each 'index' in process 0 // represents the number = index + 2 prime = (2 * index) + OFFSET; } while (prime * prime <= n); count = 0; // Total up the amount of items that are not marked // i.e. the number of local primes // REMOVE LATER!!!! //sleep(1); MPI_Barrier(MPI_COMM_WORLD); // //printf("[%d] ", id); if (!id) { for (i = 0; i < seed_size; i++) { if (!marked[i]) { //printf("%d,", ((2*i)+low_value)); count++; } } } for (i = 0; i < size; i++) { if (!marked[i + seed_size]) { //printf("%d,", ((2*i)+low_value)); count++; } } //printf("\n"); // process 0 will receieve the sum of the number // of local primes MPI_Reduce (&count, &global_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); elapsed_time += MPI_Wtime(); if (!id) { global_count++; // To account for 2 printf ("%d primes are less than or equal to %d\n", global_count, n); printf ("Total elapsed time: %10.6f\n", elapsed_time); } MPI_Finalize (); return 0; }