Ejemplo n.º 1
0
int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage: setup outfilename" << endl;
        return(-1);
    }
    string filename = argv[1];

    vector<Cparticle> ps;
    Cparticle p;
    cout << "Creating box with sides: rmax = ["<<BMAX[0]<<" "<<BMAX[1]<<"] rmin = ["<<BMIN[0]<<" "<<BMIN[1]<<"]"<<endl;
    cout << "Reynolds Number = "<<REYNOLDS_NUMBER<<endl;
    cout << "Density = "<<DENS<<endl;
    cout << "speed of sound = "<<SPSOUND<<endl;
    cout << "prb = "<<PRB<<endl;
    cout << "number of particles on side = "<<NX<<endl;
    cout << "max num partilces = "<<MAX_NUM_PARTICLES_PER_CPU<<endl;
    cout << "this run will need "<<MAX_NUM_PARTICLES_PER_CPU*sizeof(Cparticle)<<" bytes"<<endl;
    cout <<"inlet vel is "<<INFLOW_VEL<<endl;

    cout << "PSEP = "<<PSEP<<endl;
    cout << "alpha = "<<ALPHA<<endl;
    cout << "viscosity = "<<VISCOSITY<<endl;
    cout << "maxtime = "<<MAXTIME<<endl;
    cout << "term vel (3d) is "<<(2.0/9.0)*(DEM_DENS-DENS)*9.81*pow(DEM_RADIUS,2)/(VISCOSITY*DENS)<<endl;
    cout << "term vel (2d) is "<<(1.0/3.0)*(DEM_DENS-DENS)*9.81*pow(DEM_RADIUS,2)/(VISCOSITY*DENS)<<endl;
    //cout << "term vel is "<<(1.0/6.0)*(DEM_DENS-DENS)*9.81*pow(DEM_RADIUS,1)/(VISCOSITY*DENS)<<endl;

    cout << "gamma must be less than" <<2.0*sqrt(DEM_K*DEM_MIN_REDUCED_MASS)<<endl;
#ifdef LUBRICATION
    cout << "k must be greater than" <<pow(0.5*LUB_GAMMA,2)/DEM_MIN_REDUCED_MASS<<endl;
#else
    cout << "k must be greater than" <<pow(0.5*DEM_GAMMA,2)/DEM_MIN_REDUCED_MASS<<endl;
#endif
    cout << "Konmr ="<<Konmr<<endl;
    cout << "gammaOnmr = "<<gammaOnmr<<endl;
    cout << "K = "<<DEM_K<<endl;
#ifdef LUBRICATION
    cout << "GAMMA  = "<<LUB_GAMMA<<endl;
#else
    cout << "GAMMA  = "<<DEM_GAMMA<<endl;
#endif


    cout <<"THETS ="<<THETS<<endl;
    cout <<"DEM_TIMESTEP="<<DEM_TIMESTEP<<endl;
    cout <<"CR = "<<CR<<endl;


    double tsc = Nsph::courantCondition(H,2*SPSOUND);
    double tsv = Nsph::viscDiffusionCondition(H,VISCOSITY);
    double tdem = Nsph::demCondition();
#ifdef LUBRICATION
    cout <<"coeff of restitution = "<<exp(-tdem*0.5*LUB_GAMMA/DEM_MIN_REDUCED_MASS)<<endl;
#else
    cout <<"coeff of restitution = "<<exp(-tdem*0.5*DEM_GAMMA/DEM_MIN_REDUCED_MASS)<<endl;
#endif
    double liqtdem = Nsph::liqDemCondition();
    cout <<"dem ts = "<<tdem<<endl;
    cout <<"liq dem ts = "<<liqtdem<<endl;
    cout <<"simulation will take "<<int((MAXTIME/tsc)+1)<<" steps according to Courant condition, "<<int((MAXTIME/tsv)+1)<<" steps according to visc diffusion condition"<<int((MAXTIME/tdem)+1)<<" steps according to the DEM condition"<<int((MAXTIME/liqtdem)+1)<<" steps according to the LIQ DEM condition"<<endl;

    cout <<"reduced mass = "<<DEM_MIN_REDUCED_MASS<<endl;
    cout <<" tdem = "<< (1.0/50.0)*PI*sqrt(DEM_MIN_REDUCED_MASS)/sqrt(DEM_K-pow(0.5*DEM_GAMMA,2)/DEM_MIN_REDUCED_MASS) << endl;
    cout <<"particle reynolds number = "<<2.0*DEM_RADIUS*INFLOW_VEL/VISCOSITY<<endl;
    vect normal = 0.0;
    normal[2] = 1.0;
    Nmisc::boundaryCircle(ps,CYLINDER_ORIGIN,INLET_RADIUS,CYLINDER_RADIUS,normal);
    vect newOrigin = CYLINDER_ORIGIN;
    const double newHeight = CYLINDER_HEIGHT - BFAC*PSEP;
    newOrigin[2] += BFAC*PSEP;
    cout <<"adding cylinder at origin"<<newOrigin<<endl;
    Nmisc::boundaryCylinderNoTopBottom(ps,newOrigin,CYLINDER_RADIUS,newHeight);
    newOrigin[2] = CYLINDER_ORIGIN[2]-INLET_HEIGHT;
    cout <<"adding cylinder at origin"<<newOrigin<<endl;
    Nmisc::boundaryCylinderNoTopBottom(ps,newOrigin,INLET_RADIUS,INLET_HEIGHT);
    newOrigin[2] = CYLINDER_ORIGIN[2]+CYLINDER_HEIGHT;
    normal[2] = -1.0;
    cout <<"adding cylinder at origin"<<newOrigin<<endl;
    Nmisc::boundaryCircle(ps,newOrigin,INLET_RADIUS,CYLINDER_RADIUS,normal);
    newOrigin[2] += PSEP;
    cout <<"adding cylinder at origin"<<newOrigin<<endl;
    Nmisc::boundaryCylinderNoTopBottom(ps,newOrigin,INLET_RADIUS,INLET_HEIGHT);
    //demPorousCylinder(ps,CYLINDER_ORIGIN,CYLINDER_RADIUS,DEM_DISK_HEIGHT,DEM_DISK_POROSITY);
    newOrigin[2] = CYLINDER_ORIGIN[2]+CYLINDER_HEIGHT-DEM_DISK_HEIGHT;
    //demPorousCylinder(ps,newOrigin,CYLINDER_RADIUS,DEM_DISK_HEIGHT,0.5);


    CglobalVars globals;
    globals.procNeighbrs = -1;
    for (int j=0; j<NDIM; j++) {
        globals.procDomain[2*j] = RMIN[j]-0.25*(RMAX[j]-RMIN[j]);
        globals.procDomain[2*j+1] = RMAX[j]+0.25*(RMAX[j]-RMIN[j]);
    }
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &(globals.mpiSize));
    MPI_Comm_rank(MPI_COMM_WORLD, &(globals.mpiRank));
    ps.reserve(MAX_NUM_PARTICLES_PER_CPU);
    cout <<"creating new data structure"<<endl;
    CdataLL *data = new CdataLL(ps,globals,true);
#ifdef LIQ_DEM
    cout <<"adding dem particles"<<endl;

    cout <<"going to add "<<NDEM<<" dem particles at random locations...."<<endl;
    newOrigin[2] = CYLINDER_ORIGIN[2]+DEM_DISK_HEIGHT+2.0*DEM_RADIUS;
    cout <<"adding cylinder at origin"<<newOrigin<<endl;
    Nmisc::addRandomDEMparticlesCyl(data,newOrigin,CYLINDER_RADIUS-2.0*DEM_RADIUS,CYLINDER_HEIGHT-4.0*DEM_RADIUS-2*DEM_DISK_HEIGHT,NDEM);
#endif
    //newOrigin = CYLINDER_ORIGIN;
    //origin[2] += 1.6*PSEP;
    //Nmisc::sphCylinder(ps,origin);
#ifdef WET_START
    const double totalDEMVol = NDEM*(4.0/3.0)*PI*pow(DEM_RADIUS,3);
    //const double newLiqDens = DENS;
    const double liqVol = PI*pow(CYLINDER_RADIUS,2)*CYLINDER_HEIGHT*0.90;
    //const double newLiqDens = DENS*(liqVol-totalDEMVol)/(liqVol);
    const double newLiqDens = DENS;
    cout << "porosity = "<<1.0-totalDEMVol/(liqVol)<<endl;
    cout << "after adding dem particles, new liquid density is "<<newLiqDens<<endl;

    cout <<"adding liquid particles"<<endl;
    for (int i=0; i<2*NX+3; i++) {
        cout << "\rParticle ("<<i<<","<<"0"<<"). Generation "<<((i+2)*(NY+4))/double((NX+4)*(NY+4))*100<<"\% complete"<<flush;
        for (int j=0; j<2*NY+3; j++) {
            for (int k=0; k<=NZ; k++) {
                p.tag = ps.size()+1;
                p.r = (i)*PSEP+BMIN[0],j*PSEP+BMIN[1],(k+1)*PSEP+CYLINDER_ORIGIN[2];
                if ((p.r[2]<CYLINDER_ORIGIN[2])||(p.r[0]*p.r[0]+p.r[1]*p.r[1]>pow(CYLINDER_RADIUS-PSEP,2))) continue;
                p.dens = newLiqDens;
                p.mass = pow(PSEP,NDIM)*newLiqDens;
                p.h = HFAC*pow(p.mass/p.dens,1.0/NDIM);
                p.v = 0.0;
                p.vhat = p.v;
                p.iam = sph;
                ps.push_back(p);
            }
        }
    }

    for (int k=0; k<int(INLET_HEIGHT/PSEP)+1; k++) {
        vect tmp = CYLINDER_ORIGIN;
        tmp[2] -= INLET_HEIGHT-k*PSEP;
        if (k<4) {
            myBoundaryCircle(ps,tmp,0,int((INLET_RADIUS-1.4*PSEP)/PSEP)*PSEP,sphBoundary);
        } else {
            myBoundaryCircle(ps,tmp,0,int((INLET_RADIUS-1.4*PSEP)/PSEP)*PSEP,sph);
        }
    }

#endif
    /*
    p.tag = ps.size()+1;
    p.r = CYLINDER_ORIGIN;
    p.r[2] = CYLINDER_HEIGHT;
    p.dens = DEM_DENS;
    p.h = DEM_RADIUS/2;
    p.mass = (4.0/3.0)*PI*pow(DEM_RADIUS,3)*DEM_DENS;
    p.v = 0.0;
    p.vhat = p.v;
    p.iam = dem;
    //ps.push_back(p);
    */

    cout << "Total number of particles = " << ps.size() << endl;

    //CglobalVars globals;

    vector<vector<double> > vprocDomain(globals.mpiSize);
    vector<Array<int,NDIM> > vprocNeighbrs(globals.mpiSize);
    vector<particleContainer > vps;
    vectInt split;
    split = NCPU_X,NCPU_Y,NCPU_Z;
    particleContainer pps;
    for (int i=0; i<ps.size(); i++) {
        pps.push_back(ps[i]);
    }
    Nmisc::splitDomain(pps,split,vps,vprocDomain,vprocNeighbrs);


    cout << "Opening files for writing..."<<endl;
    Cio_data_vtk ioFile(filename.c_str(),&globals);
    cout << "Calculating Output stuff.."<<endl;
    //sph.calcOutputVars();
    //customOutput.calcOutput(0,&customSim,&ioFile);
    cout << "Writing Restart data to file..."<<endl;
    int nProc = product(split);
    for (int i=0; i<nProc; i++) {
        globals.mpiRank = i;
        for (int j=0; j<NDIM*2; j++)
            globals.procDomain[j] = vprocDomain[i][j];
        globals.procNeighbrs = vprocNeighbrs[i];
        ioFile.setFilename(filename.c_str(),&globals);
        ioFile.writeGlobals(0,&globals);
        ioFile.writeRestart(0,vps[i],&globals);
        ioFile.writeDomain(0,&globals);
        globals.mpiRank = 0;
    }
    //ioFile.writeGlobals(0,&globals);
    //ioFile.writeDomain(0,vprocDomain,vprocNeighbrs);

}
Ejemplo n.º 2
0
int main(int argc,char *argv[])
{  
  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD,&p);
  MPI_Comm_rank(MPI_COMM_WORLD,&pid);

  int i,j,k,i1,j1;

  start_time=MPI_Wtime();

  if(pid==0)
    {
      printf("There are %d processors\n",p);
    }

  start= n/(p-1)*pid;
  end= min(start+p-2,n-1);
  init();
  printf(" Processor %d takes from %d to %d",pid,start,end);
  above();
  below();
  for(k=0;k<10;k++)
  {
    printf("iter : %d\n",k);
    above();
    below();
    for(i=start;i<=end;i++)
    {
      for(j=0;j<n;j++)
	{
	  if((i==0)||(j==0)||(i==(n-1))||(j==(n-1)))
	    continue;
	  else if(i==start)
	    arr[0][i-1][j]=t[j];
	  else if(i==end)
	    arr[0][i+1][j]=b[j];
	  else    
	    arr[1][i][j]=(4*arr[0][i][j]+2*(arr[0][i+1][j]+arr[0][i-1][j])+2*(arr[0][i][j+1]+arr[0][i][j-1])+arr[0][i+1][j+1]+arr[0][i-1][j-1]);
	  for(i1=start;i1<=end;i1++)
	    for(j1=0;j1<n;j1++)
	      arr[0][i1][j1]=arr[1][i1][j1];
      
	 }
     
    }
    MPI_Barrier(MPI_COMM_WORLD);
  }
  printf("\n");
  /*for(i=start;i<=end;i++)
    {
      printf("\n");
      for(j=0;j<n;j++)
	printf("arr0:%lf arr1:%lf pid:%d\t",arr[0][i][j],arr[1][i][j],pid);
	}*/
  printf("\n");
  end_time=MPI_Wtime();
  if(pid==0)
    printf("\nTime taken is %lf",end_time-start_time);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 3
0
int main(int argc, char **argv)
{
  // Error handling scheme: this function has failed until proven otherwise.
  int ret = EXIT_FAILURE;

  if(MPI_Init(&argc, &argv) != MPI_SUCCESS) {
    // Theoretically, an error at this point will abort the program, and this
    // code path is never followed. This is here for completeness.
    fprintf(stderr, "unable to initialize MPI\n");
    goto die_immed;
  }

  // Install the MPI error handler that returns error codes, so we can perform
  // the usual process suicide ritual.
  if(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)
      != MPI_SUCCESS) {
    // Again, theoretically, the previous error handler (MPI_Abort) gets called
    // instead of reaching this fail point.
    fprintf(stderr, "unable to reset MPI error handler\n");
    goto die_finalize_mpi;
  }

  int size, rank;
  if(MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS ||
      MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) {
    fprintf(stderr, "unable to determine rank and size\n");
    goto die_finalize_mpi;
  }

  dsfmt_t *prng = malloc(sizeof(dsfmt_t));
  if(prng == NULL) {
    fprintf(stderr, "unable to allocate PRNG\n");
    goto die_finalize_mpi;
  }
  dsfmt_init_gen_rand(prng, SEED);

  const int master_elems = proc_elems * size;

  double *const master = fftw_malloc(VL*master_elems*sizeof(double));
  if(master == NULL) {
    fprintf(stderr, "unable to allocate master array\n");
    goto die_free_prng;
  }
  for(int i = 0; i < master_elems*VL; ++i) {
    master[i] = 2*dsfmt_genrand_open_close(prng) - 1;
  }

  /* Allocate the array holding the serial result */
  double complex *const serial = fftw_malloc(VL*master_elems*sizeof(*serial));
  if(serial == NULL) {
    fprintf(stderr, "unable to allocate serial array\n");
    goto die_free_master;
  }

  /* Perform serial transform */
  fftw_plan serial_plan = fftw_plan_many_dft_r2c(1, &master_elems, VL,
      master, NULL, VL, 1, serial, NULL, VL, 1, FFTW_ESTIMATE);
  if(serial_plan == NULL) {
    fprintf(stderr, "unable to construct forward transform plan\n");
    goto die_free_serial;
  }

  /* Perform the serial transform, and complete it */
  fftw_execute(serial_plan);
  fft_r2c_1d_vec_finish(serial, master_elems, VL);

  /* Allocate space to hold the parallel transform result */
  double complex *const parallel = fftw_malloc(
      proc_elems*VL*sizeof(double complex));
  if(parallel == NULL) {
    fprintf(stderr, "unable to allocate space for parallel array\n");
    goto die_destroy_serial_plan;
  }

  /* Create the parallel plan */
  fft_par_plan par_plan = fft_par_plan_r2c_1d(MPI_COMM_WORLD, proc_elems, VL,
      master + rank*proc_elems*VL, parallel, NULL);
  if(par_plan == NULL) {
    fprintf(stderr, "unable to create parallel transform plan\n");
    goto die_free_parallel;
  }

  /* Execute the parallel transform */
  if(fft_par_execute_fwd(par_plan) != MPI_SUCCESS) {
    fprintf(stderr, "unable to execute parallel transform\n");
    goto die_destroy_par_plan;
  }

  /* Compare values */
  int sup = 0.0;
  for(int i = 0; i < proc_elems*VL; ++i) {
    sup = fmax(sup, cabs(serial[rank*proc_elems*VL + i] - parallel[i]));
  }
  if(sup < 1.0e-6) {
    ret = EXIT_SUCCESS;
  }

die_destroy_par_plan:
  fft_par_plan_destroy(par_plan);
die_free_parallel:
  fftw_free(parallel);
die_destroy_serial_plan:
  fftw_destroy_plan(serial_plan);
die_free_serial:
  fftw_free(serial);
die_free_master:
  fftw_free(master);
die_free_prng:
  free(prng);
die_finalize_mpi:
  if(MPI_Finalize() != MPI_SUCCESS) {
    fprintf(stderr, "unable to finalize MPI\n");
    ret = EXIT_FAILURE;
  }
die_immed:
  fftw_cleanup();
  return ret;
}
Ejemplo n.º 4
0
int main (int argc, char **argv)
{
    int i, e;
    MPI_Aint lb;
    MPI_Datatype type, types[4];
    MPI_Aint indices[1];
    int blocklens[1];
    MPI_Datatype oldtypes[1];

    MPI_Init( &argc, &argv );

    indices[0] = 0;
    blocklens[0] = 2;
    oldtypes[0] = MPI_CHAR;

    /* generate four distinct datatypes - should trigger a leak report */
    for( i = 0; i < 4; i++ ) {
        MPI_Type_vector( 1, i+2, i+2, MPI_CHAR, types + i );
        MPI_Type_commit( types + i );
        MPI_Type_lb( types[i], &lb );
    }
    /* now free one and create it again - should not trigger a report */
    type = types[3];
    MPI_Type_free( types + 3 );
    MPI_Type_lb( type, &lb );
    MPI_Type_vector( 1, 3+2, 3+2, MPI_CHAR, types + 3 );
    MPI_Type_commit( types + 3 );

    /* creating the same type over and over again is expected to create new handles */
    for( i = 0; i < 2; i++ ) {
        MPI_Type_vector( 1, 10, 10, MPI_CHAR, types + i );
        MPI_Type_commit( types + i );
        assert( i == 0 || types[i] != types[0] );
    }
    /* each type has to be freed once */
    type = types[1];
    MPI_Type_free( types + 1 );
    MPI_Type_lb( type, &lb );
    MPI_Type_lb( types[0], &lb );
    /* now trigger leak report */
    for( i = 1; i < 4; i++ ) {
        MPI_Type_vector( 1, 10, 10, MPI_CHAR, types + i );
        MPI_Type_commit( types + i );
        assert( i == 0 || types[i] != types[0] );
    }

    /* create the same type (two chars) in four other ways, thus trigger a leak report */
    MPI_Type_hvector( 1, 2, 2, MPI_CHAR, &type );
    MPI_Type_commit( &type );
    MPI_Type_commit( &type ); /* should have no effect */
    MPI_Type_contiguous( 2, MPI_CHAR, &type );
    /* MPI_Type_commit( &type ); */ /* intentionally missing - should be included in leak report anyway */
    MPI_Type_hindexed( 1, blocklens, indices, MPI_CHAR, &type );
    MPI_Type_commit( &type );
    MPI_Type_struct( 1, blocklens, indices, oldtypes, &type );
    MPI_Type_commit( &type );

    /*
     * Create a custom type, then "duplicate" it by creating
     * a type derived from it which is identical to it
     * (in MPI-2 one could use MPI_Type_dup()). First create
     * and free without commit, then with.
     *
     * None of this should trigger an error. 
     */
    MPI_Type_contiguous( 2, MPI_CHAR, types + 0 );
    MPI_Type_contiguous( 1, types[0], types + 1 );
    MPI_Type_free( types + 0 );
    MPI_Type_free( types + 1 );

    MPI_Type_contiguous( 2, MPI_CHAR, types + 0 );
    MPI_Type_commit( types + 0);
    MPI_Type_contiguous( 1, types[0], types + 1 );
    MPI_Type_commit( types + 1 );
    MPI_Type_free( types + 0 );
    MPI_Type_free( types + 1 );
    

    MPI_Finalize( );

    return 0;
}
/* INIT MPI SYSTEM
 *	Initializes the MPI library, and returns the rank
 *	of the current node.
 */
void init_mpi(int argc, char **argv){
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &num_processors);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	printf("Initialize MPI complete for node %d\n", rank);
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
	int iarrayOfSizes[2], iarrayOfSubsizes[2], iarrayOfStarts[2], ilocal_size;
	int nproc[2], periods[2], icoord[2];
	int m, n, i, j, wsize, wrank, crank, ndims, lrows, lcols, grow, gcol, err;
	MPI_Datatype filetype;
	MPI_File     fh;
	MPI_Comm     cartcomm;
	MPI_Info     info0, info3;
	double       t, topen, twrite, tclose, wrate;
	double       *local_array;
	char         nstripesStr[12], stripeUnitStr[12];
	int          nstripes = -1;
	int          stripeUnit = -1;
	MPI_Offset   headerSize = 0;

	MPI_Init(0,0);

	MPI_Comm_rank(MPI_COMM_WORLD, &wrank);

	/* Get global array size */
	m = n = 128;      /* Set default size */

	/* ioda [ n ] [ m ] [ nstripes ] [ stripeunit ] [ headersize ] */
	if (argc > 0) {
		if (argc > 1) m = atoi(argv[1]);
		if (argc > 2) n = atoi(argv[2]);
		if (argc > 3) nstripes = atoi(argv[3]);
		if (argc > 4) stripeUnit = atoi(argv[4]);
	        if (argc > 5) headerSize = atoi(argv[5]);
		if (argc > 6) {
			if (wrank == 0)
				fprintf(stderr,"Unrecognized argument %s\n", argv[6]);
			MPI_Abort(MPI_COMM_WORLD,1);
		}
	}
	if (wrank == 0) printf("Matrix is [%d,%d]; file dir = %s\n", m, n, MYSCRATCHDIR );

	/* The default number of stripes = totalsize/1M */
	if (nstripes < 0) {
		nstripes = n * m * sizeof(double) / (1024*1024);
		if (nstripes < 1) nstripes = 1;
	}
	if (wrank == 0) printf("nstripes = %d, stripeUnit = %d, header size = %d\n",
			       nstripes, stripeUnit, (int)headerSize);

	/* Use topology routines to get decomposition and coordinates */
	MPI_Comm_size(MPI_COMM_WORLD, &wsize);
	nproc[0] = 0; nproc[1] = 0;
	ndims = 2;
	MPI_Dims_create(wsize, ndims, nproc);
	periods[0] = 0; periods[1] = 0;
	MPI_Cart_create(MPI_COMM_WORLD, ndims, nproc, periods, 1, &cartcomm);
	MPI_Comm_rank(cartcomm, &crank);
	MPI_Cart_coords(cartcomm, crank, ndims, icoord);

	iarrayOfSizes[0]    = m;
	iarrayOfSizes[1]    = n;
	iarrayOfSubsizes[0] = m/nproc[0];
	iarrayOfSubsizes[1] = n/nproc[1];
	iarrayOfStarts[0]   = icoord[0] * iarrayOfSubsizes[0];
	iarrayOfStarts[1]   = icoord[1] * iarrayOfSubsizes[1];

	/* Initialize my block of the data */
	ilocal_size = iarrayOfSubsizes[0] * iarrayOfSubsizes[1];
	lrows = iarrayOfSubsizes[0];
	lcols = iarrayOfSubsizes[1];
	local_array = (double *)malloc(lrows*lcols*sizeof(double));
	gcol  = iarrayOfStarts[1];
	grow = iarrayOfStarts[0];
	for (i=0; i<lrows; i++) {
		for (j=0; j<lcols; j++) {
			local_array[j*lrows+i] = (grow+i) + (gcol+j)*m;
		}
	}

	/* Fortran order simply means the data is stored by columns */
	MPI_Type_create_subarray(ndims, iarrayOfSizes, iarrayOfSubsizes,
				 iarrayOfStarts, MPI_ORDER_FORTRAN, MPI_DOUBLE,
				 &filetype);
	MPI_Type_commit(&filetype);

	info0 = MPI_INFO_NULL;
	info3 = MPI_INFO_NULL;
	if (nstripes > 0 || stripeUnit > 0) {
		MPI_Info_create(&info0);
		if (nstripes > 0) {
			snprintf(nstripesStr, sizeof(nstripesStr), "%d", nstripes);
			MPI_Info_set(info0, "striping_factor", nstripesStr);
			MPI_Info_set(info0, "cb_nodes", nstripesStr);
		}
		if (stripeUnit > 0) {
			snprintf(stripeUnitStr, sizeof(stripeUnitStr), "%d", stripeUnit);
			MPI_Info_set(info0, "striping_unit", stripeUnitStr);
		}
		MPI_Info_dup(info0, &info3);
		MPI_Info_set(info3, "romio_no_indep_rw", "true");

		/* Other hints to consider:
		      direct_io=true

		         The default cb_buffer_size is 16777216 , but is overridden by the
			    striping unit, which is smaller by default.
		*/
	}

	/* level - 3 */
	MPI_Barrier(MPI_COMM_WORLD);
	t = MPI_Wtime();
	err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-3.out",
			    MPI_MODE_CREATE | MPI_MODE_RDWR, info3, &fh);
	topen = MPI_Wtime() - t;
	if (err != MPI_SUCCESS) myAbort(err, "open testfile-3.out");

	if (headerSize > 0) {
	        /* Simulate writing a header */
	        if (wrank == 0) {
			char *header;
			header = (char *)calloc(1,(size_t)headerSize);
			MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
			free(header);
		}
	        MPI_Barrier(cartcomm);
	}

	MPI_File_set_view(fh, headerSize, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL);

	MPI_Barrier(MPI_COMM_WORLD);
	t = MPI_Wtime();
	err = MPI_File_write_all(fh, local_array, ilocal_size, MPI_DOUBLE,
				 MPI_STATUS_IGNORE);
	twrite = MPI_Wtime() - t;
	if (err != MPI_SUCCESS) myAbort(err, "collective write");

	err = MPI_File_close(&fh);
	tclose = MPI_Wtime() - t;
	/* tclose is the time for the write(s) + the close, in case the
	   implementation delays (some of) the writes until the close */
	if (err != MPI_SUCCESS) myAbort(err, "close testfile-3.out");

	MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	if (twrite > 0)
		wrate = (double)m * (double)n * sizeof(double)/twrite;
	if (wrank == 0)
		printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
		       twrite, tclose, wrate);

	/* level - 0 */
	MPI_Barrier(MPI_COMM_WORLD);
	t = MPI_Wtime();
	err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-0.out",
			    MPI_MODE_CREATE | MPI_MODE_RDWR, info0, &fh);
	topen = MPI_Wtime() - t;
	if (err != MPI_SUCCESS) myAbort(err, "open testfile-0.out");

	if (headerSize > 0) {
	        /* Simulate writing a header */
	        if (wrank == 0) {
			char *header;
			header = (char *)calloc(1,(size_t)headerSize);
			MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
			free(header);
		}
	        MPI_Barrier(cartcomm);
	}

	MPI_Barrier(MPI_COMM_WORLD);
	t = MPI_Wtime();
	gcol = iarrayOfStarts[1];
	grow = iarrayOfStarts[0];
	for (j=0; j<lcols; j++) {
	MPI_Offset offset = headerSize +
		((MPI_Offset)(grow) + (MPI_Offset)(gcol+j)*m) * sizeof(double);
	err = MPI_File_write_at(fh, offset, local_array+j*lrows, lrows, MPI_DOUBLE,
				MPI_STATUS_IGNORE);
	if (err != MPI_SUCCESS) myAbort(err, "write at");
	}
	twrite = MPI_Wtime() - t;

	err = MPI_File_close(&fh);
	tclose = MPI_Wtime() - t;
	/* tclose is the time for the write(s) + the close, in case the
	   implementation delays (some of) the writes until the close */
	if (err != MPI_SUCCESS) myAbort(err, "close testfile-0");

	MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
	if (twrite > 0)
		wrate = (double)m * (double)n * sizeof(double)/twrite;
	if (wrank == 0)
		printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
		       twrite, tclose, wrate);

	if (info0 != MPI_INFO_NULL) {
		MPI_Info_free(&info0);
		MPI_Info_free(&info3);
	}
	free(local_array);
	MPI_Finalize();
	return 0;
}
Ejemplo n.º 7
0
void PETSC_STDCALL  mpi_init_(int *ierr)
{
  *ierr = MPI_Init((int*)0, (char***)0);
}
Ejemplo n.º 8
0
static void
MakeCache(const std::string &prefix, int argc, char *argv[], bool _doEmbedded)
{
	MPI_Status status;
	int xi;
 	std::map<std::string, std::list<std::string> >::const_iterator pIter;
	int np, me; 

	cachePath = prefix;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &np);
	MPI_Comm_rank(MPI_COMM_WORLD, &me);

	doEmbedded = _doEmbedded;

	LoadEverything();
//	outputFormat = CLONEWISE_OUTPUT_XML;
	printf("# loaded everything\n");
	fflush(stdout);

	if (embeddedOnly) {
		std::map<std::string, std::set<std::string> >::const_iterator eIter;

		if (embeddedList.size() == 0) {
			char s[1024];

		        snprintf(s, sizeof(s), "/var/lib/Clonewise/clones/distros/%s/embedded-code-copies", distroString);
		        LoadEmbeddedCodeCopiesList(s);
		}
		for (	eIter  = embeddedList.begin(), xi = 0;
			eIter != embeddedList.end();
			eIter++)
		{
			vPackages.push_back(eIter->first);
			packageQueue.push_back(xi++);
		}
	} else {
		for (	pIter  = packages.begin(), xi = 0;
			pIter != packages.end();
			pIter++)
		{
			vPackages.push_back(pIter->first);
			packageQueue.push_back(xi++);
		}
	}

	printf("# going to scan %i packages\n", xi);
	fflush(stdout);
	if (me == 0) {
		while (packageQueue.size() != 0) {
			int index, which;

			MPI_Recv(&which, 1, MPI_INT, MPI_ANY_SOURCE, TAG1, MPI_COMM_WORLD, &status); 
			index = packageQueue.front();
			packageQueue.pop_front();
			MPI_Send(&index, 1, MPI_INT, which, TAG1, MPI_COMM_WORLD); 
		}
		for (int i = 1; i < np; i++) {
			int which, neg = -1;

			MPI_Recv(&which, 1, MPI_INT, i, TAG1, MPI_COMM_WORLD, &status); 
			MPI_Send(&neg, 1, MPI_INT, i, TAG1, MPI_COMM_WORLD); 
		}
		for (size_t i = 0; i < vPackages.size(); i++) { 
			int which;
			int r[2], size;
			char *result;
			FILE *f;
			char s[1024];

			MPI_Recv(&which, 1, MPI_INT, MPI_ANY_SOURCE, TAG1, MPI_COMM_WORLD, &status); 
			MPI_Recv(r, 2, MPI_INT, which, TAG1, MPI_COMM_WORLD, &status); 

			size = r[1];
			result = new char[size];

			MPI_Recv(result, size, MPI_CHAR, which, TAG1, MPI_COMM_WORLD, &status); 

			snprintf(s, sizeof(s), "/var/lib/Clonewise/clones/distros/%s/%s/%s", distroString, prefix.c_str(), vPackages[r[0]].c_str());
			f = fopen(s, "w");
			fwrite(result, 1, size, f);
			fclose(f), f = NULL;

			delete [] result;
		}
	} else {
		DoWorkLoop(me);
	}

	MPI_Finalize(); 
	exit(0); 
}
Ejemplo n.º 9
0
Archivo: ncbo.c Proyecto: hdfeos/nco
int 
main(int argc,char **argv)
{
  nco_bool CNV_CCM_CCSM_CF;
  nco_bool EXCLUDE_INPUT_LIST=False; /* Option c */
  nco_bool EXTRACT_ALL_COORDINATES=False; /* Option c */
  nco_bool EXTRACT_ASSOCIATED_COORDINATES=True; /* Option C */
  nco_bool FILE_1_RETRIEVED_FROM_REMOTE_LOCATION=False;
  nco_bool FILE_2_RETRIEVED_FROM_REMOTE_LOCATION=False;
  nco_bool FL_LST_IN_FROM_STDIN=False; /* [flg] fl_lst_in comes from stdin */
  nco_bool FORCE_APPEND=False; /* Option A */
  nco_bool FORCE_OVERWRITE=False; /* Option O */
  nco_bool FORTRAN_IDX_CNV=False; /* Option F */
  nco_bool GRP_VAR_UNN=False; /* [flg] Select union of specified groups and variables */
  nco_bool HISTORY_APPEND=True; /* Option h */
  nco_bool MSA_USR_RDR=False; /* [flg] Multi-Slab Algorithm returns hyperslabs in user-specified order*/
  nco_bool RAM_CREATE=False; /* [flg] Create file in RAM */
  nco_bool RAM_OPEN=False; /* [flg] Open (netCDF3-only) file(s) in RAM */
  nco_bool RM_RMT_FL_PST_PRC=True; /* Option R */
  nco_bool WRT_TMP_FL=True; /* [flg] Write output to temporary file */
  nco_bool flg_cln=True; /* [flg] Clean memory prior to exit */
  nco_bool flg_ddra=False; /* [flg] DDRA diagnostics */

  char **fl_lst_abb=NULL; /* Option a */
  char **fl_lst_in;
  char **gaa_arg=NULL; /* [sng] Global attribute arguments */
  char **var_lst_in=NULL_CEWI;
  char *aux_arg[NC_MAX_DIMS];
  char *cmd_ln;
  char *cnk_arg[NC_MAX_DIMS];
  char *cnk_map_sng=NULL_CEWI; /* [sng] Chunking map */
  char *cnk_plc_sng=NULL_CEWI; /* [sng] Chunking policy */
  char *fl_in_1=NULL; /* fl_in_1 is nco_realloc'd when not NULL */
  char *fl_in_2=NULL; /* fl_in_2 is nco_realloc'd when not NULL */
  char *fl_out=NULL; /* Option o */
  char *fl_out_tmp=NULL;
  char *fl_pth=NULL; /* Option p */
  char *fl_pth_lcl=NULL; /* Option l */
  char *lmt_arg[NC_MAX_DIMS];
  char *nco_op_typ_sng=NULL; /* [sng] Operation type */
  char *opt_crr=NULL; /* [sng] String representation of current long-option name */
  char *optarg_lcl=NULL; /* [sng] Local copy of system optarg */
  char *ppc_arg[NC_MAX_VARS]; /* [sng] PPC arguments */
  char *sng_cnv_rcd=NULL_CEWI; /* [sng] strtol()/strtoul() return code */
  char **grp_lst_in=NULL; /* [sng] User-specified list of groups */

  char trv_pth[]="/"; /* [sng] Root path of traversal tree */

  const char * const CVS_Id="$Id$"; 
  const char * const CVS_Revision="$Revision$";
  const char * const opt_sht_lst="3467ACcD:d:FG:g:hL:l:Oo:p:rRt:v:X:xzy:-:";

  cnk_sct cnk; /* [sct] Chunking structure */

#if defined(__cplusplus) || defined(PGI_CC)
  ddra_info_sct ddra_info;
  ddra_info.flg_ddra=False;
#else /* !__cplusplus */
  ddra_info_sct ddra_info={.MRV_flg=False,.flg_ddra=False,.lmn_nbr=0LL,.lmn_nbr_avg=0LL,.lmn_nbr_wgt=0LL,.nco_op_typ=nco_op_nil,.rnk_avg=0,.rnk_var=0,.rnk_wgt=0,.tmr_flg=nco_tmr_srt,.var_idx=0,.wgt_brd_flg=False,.wrd_sz=0};
#endif /* !__cplusplus */

  extern char *optarg;
  extern int optind;

  gpe_sct *gpe=NULL; /* [sng] Group Path Editing (GPE) structure */

  int *in_id_1_arr;
  int *in_id_2_arr;

  int abb_arg_nbr=0;
  int aux_nbr=0; /* [nbr] Number of auxiliary coordinate hyperslabs specified */
  int cnk_map=nco_cnk_map_nil; /* [enm] Chunking map */
  int cnk_nbr=0; /* [nbr] Number of chunk sizes */
  int cnk_plc=nco_cnk_plc_nil; /* [enm] Chunking policy */
  int dfl_lvl=NCO_DFL_LVL_UNDEFINED; /* [enm] Deflate level */
  int fl_idx;
  int fl_nbr=0;
  int fl_in_fmt_1; /* [enm] Input file format */
  int fl_in_fmt_2; /* [enm] Input file format */
  int fl_out_fmt=NCO_FORMAT_UNDEFINED; /* [enm] Output file format */
  int fll_md_old; /* [enm] Old fill mode */
  int gaa_nbr=0; /* [nbr] Number of global attributes to add */
  int idx;
  int in_id_1;  
  int in_id_2;  
  int lmt_nbr=0; /* Option d. NB: lmt_nbr gets incremented */
  int md_open; /* [enm] Mode flag for nc_open() call */

  int nbr_glb_att_1;  /* [nbr] Number of global attributes in file */
  int nbr_glb_att_2;  /* [nbr] Number of global attributes in file */
  int nbr_grp_att_1;  /* [nbr] Number of group attributes in file */
  int nbr_grp_att_2;  /* [nbr] Number of group attributes in file */
  int nbr_att_var_1;  /* [nbr] Number of variable attributes in file */
  int nbr_att_var_2;  /* [nbr] Number of variable attributes in file */
  int nbr_dmn_fl_1;   /* [nbr] Number of dimensions in file */
  int nbr_dmn_fl_2;   /* [nbr] Number of dimensions in file */
  int nbr_rec_fl_1;   /* [nbr] Number of record dimensions in file */
  int nbr_rec_fl_2;   /* [nbr] Number of record dimensions in file */
  int grp_dpt_fl_1;   /* [nbr] Maximum group depth (root = 0) */
  int grp_dpt_fl_2;   /* [nbr] Maximum group depth (root = 0) */ 
  int grp_lst_in_nbr=0; /* [nbr] Number of groups explicitly specified by user */
  int nbr_grp_fl_1;   /* [nbr] Number of groups in file */
  int nbr_grp_fl_2;   /* [nbr] Number of groups in file */
  int var_ntm_fl_1;   /* [nbr] Number of non-atomic variables in file */
  int var_ntm_fl_2;   /* [nbr] Number of non-atomic variables in file */
  int nbr_var_fl_1;   /* [nbr] Number of atomic-type variables in file */
  int nbr_var_fl_2;   /* [nbr] Number of atomic-type variables in file */
  int nbr_gpe_nm;     /* [nbr] Number of GPE entries */ 
  int nbr_cmn_nm=0;     /* [nbr] Number of common entries */
  int nco_op_typ=nco_op_nil; /* [enm] Operation type */
  int opt;
  int out_id;  
  int ppc_nbr=0; /* [nbr] Number of PPC arguments */
  int rcd=NC_NOERR; /* [rcd] Return code */
  int thr_idx; /* [idx] Index of current thread */
  int thr_nbr=int_CEWI; /* [nbr] Thread number Option t */
  int var_lst_in_nbr=0;

  size_t bfr_sz_hnt=NC_SIZEHINT_DEFAULT; /* [B] Buffer size hint */
  size_t cnk_min_byt=NCO_CNK_SZ_MIN_BYT_DFL; /* [B] Minimize size of variable to chunk */
  size_t cnk_sz_byt=0UL; /* [B] Chunk size in bytes */
  size_t cnk_sz_scl=0UL; /* [nbr] Chunk size scalar */
  size_t hdr_pad=0UL; /* [B] Pad at end of header section */

  trv_tbl_sct *trv_tbl_1=NULL; /* [lst] Traversal table input file 1 */
  trv_tbl_sct *trv_tbl_2=NULL; /* [lst] Traversal table input file 2 */

  gpe_nm_sct *gpe_nm=NULL; /* [sct] GPE name duplicate check array */
  
  nco_cmn_t *cmn_lst=NULL; /* [sct] A list of common names */ 
  
  nco_dmn_dne_t *flg_dne1=NULL; /* [lst] Flag to check if input dimension -d "does not exist" */
  nco_dmn_dne_t *flg_dne2=NULL; /* [lst] Flag to check if input dimension -d "does not exist" */
  nco_dmn_dne_t *flg_dne=NULL; /* [lst] Flag to check if input dimension -d "does not exist" */

#ifdef ENABLE_MPI
  /* Declare all MPI-specific variables here */
  MPI_Comm mpi_cmm=MPI_COMM_WORLD; /* [prc] Communicator */
  int prc_rnk; /* [idx] Process rank */
  int prc_nbr=0; /* [nbr] Number of MPI processes */
#endif /* !ENABLE_MPI */
  
  static struct option opt_lng[]={ /* Structure ordered by short option key if possible */
    /* Long options with no argument, no short option counterpart */
    {"cln",no_argument,0,0}, /* [flg] Clean memory prior to exit */
    {"clean",no_argument,0,0}, /* [flg] Clean memory prior to exit */
    {"mmr_cln",no_argument,0,0}, /* [flg] Clean memory prior to exit */
    {"drt",no_argument,0,0}, /* [flg] Allow dirty memory on exit */
    {"dirty",no_argument,0,0}, /* [flg] Allow dirty memory on exit */
    {"mmr_drt",no_argument,0,0}, /* [flg] Allow dirty memory on exit */
    {"ddra",no_argument,0,0}, /* [flg] DDRA diagnostics */
    {"mdl_cmp",no_argument,0,0}, /* [flg] DDRA diagnostics */
    {"hdf4",no_argument,0,0}, /* [flg] Treat file as HDF4 */
    {"hdf_upk",no_argument,0,0}, /* [flg] HDF unpack convention: unpacked=scale_factor*(packed-add_offset) */
    {"hdf_unpack",no_argument,0,0}, /* [flg] HDF unpack convention: unpacked=scale_factor*(packed-add_offset) */
    {"msa_usr_rdr",no_argument,0,0}, /* [flg] Multi-Slab Algorithm returns hyperslabs in user-specified order */	  
    {"msa_user_order",no_argument,0,0}, /* [flg] Multi-Slab Algorithm returns hyperslabs in user-specified order */
    {"ram_all",no_argument,0,0}, /* [flg] Open (netCDF3) and create file(s) in RAM */
    {"create_ram",no_argument,0,0}, /* [flg] Create file in RAM */
    {"open_ram",no_argument,0,0}, /* [flg] Open (netCDF3) file(s) in RAM */
    {"diskless_all",no_argument,0,0}, /* [flg] Open (netCDF3) and create file(s) in RAM */
    {"wrt_tmp_fl",no_argument,0,0}, /* [flg] Write output to temporary file */
    {"write_tmp_fl",no_argument,0,0}, /* [flg] Write output to temporary file */
    {"no_tmp_fl",no_argument,0,0}, /* [flg] Do not write output to temporary file */
    {"intersection",no_argument,0,0}, /* [flg] Select intersection of specified groups and variables */
    {"nsx",no_argument,0,0}, /* [flg] Select intersection of specified groups and variables */
    {"union",no_argument,0,0}, /* [flg] Select union of specified groups and variables */
    {"unn",no_argument,0,0}, /* [flg] Select union of specified groups and variables */
    {"version",no_argument,0,0},
    {"vrs",no_argument,0,0},
    /* Long options with argument, no short option counterpart */
    {"bfr_sz_hnt",required_argument,0,0}, /* [B] Buffer size hint */
    {"buffer_size_hint",required_argument,0,0}, /* [B] Buffer size hint */
    {"cnk_byt",required_argument,0,0}, /* [B] Chunk size in bytes */
    {"chunk_byte",required_argument,0,0}, /* [B] Chunk size in bytes */
    {"cnk_dmn",required_argument,0,0}, /* [nbr] Chunk size */
    {"chunk_dimension",required_argument,0,0}, /* [nbr] Chunk size */
    {"cnk_map",required_argument,0,0}, /* [nbr] Chunking map */
    {"chunk_map",required_argument,0,0}, /* [nbr] Chunking map */
    {"cnk_min",required_argument,0,0}, /* [B] Minimize size of variable to chunk */
    {"chunk_min",required_argument,0,0}, /* [B] Minimize size of variable to chunk */
    {"cnk_plc",required_argument,0,0}, /* [nbr] Chunking policy */
    {"chunk_policy",required_argument,0,0}, /* [nbr] Chunking policy */
    {"cnk_scl",required_argument,0,0}, /* [nbr] Chunk size scalar */
    {"chunk_scalar",required_argument,0,0}, /* [nbr] Chunk size scalar */
    {"fl_fmt",required_argument,0,0},
    {"file_format",required_argument,0,0},
    {"gaa",required_argument,0,0}, /* [sng] Global attribute add */
    {"glb_att_add",required_argument,0,0}, /* [sng] Global attribute add */
    {"hdr_pad",required_argument,0,0},
    {"header_pad",required_argument,0,0},
    {"ppc",required_argument,0,0}, /* [nbr] Precision-preserving compression, i.e., number of total or decimal significant digits */
    {"precision_preserving_compression",required_argument,0,0}, /* [nbr] Precision-preserving compression, i.e., number of total or decimal significant digits */
    {"quantize",required_argument,0,0}, /* [nbr] Precision-preserving compression, i.e., number of total or decimal significant digits */
    /* Long options with short counterparts */
    {"3",no_argument,0,'3'},
    {"4",no_argument,0,'4'},
    {"64bit",no_argument,0,'4'},
    {"netcdf4",no_argument,0,'4'},
    {"7",no_argument,0,'7'},
    {"append",no_argument,0,'A'},
    {"coords",no_argument,0,'c'},
    {"crd",no_argument,0,'c'},
    {"no-coords",no_argument,0,'C'},
    {"no-crd",no_argument,0,'C'},
    {"debug",required_argument,0,'D'},
    {"nco_dbg_lvl",required_argument,0,'D'},
    {"dimension",required_argument,0,'d'},
    {"dmn",required_argument,0,'d'},
    {"fortran",no_argument,0,'F'},
    {"ftn",no_argument,0,'F'},
    {"gpe",required_argument,0,'G'}, /* [sng] Group Path Edit (GPE) */
    {"grp",required_argument,0,'g'},
    {"group",required_argument,0,'g'},
    {"history",no_argument,0,'h'},
    {"hst",no_argument,0,'h'},
    {"dfl_lvl",required_argument,0,'L'}, /* [enm] Deflate level */
    {"deflate",required_argument,0,'L'}, /* [enm] Deflate level */
    {"local",required_argument,0,'l'},
    {"lcl",required_argument,0,'l'},
    {"overwrite",no_argument,0,'O'},
    {"ovr",no_argument,0,'O'},
    {"path",required_argument,0,'p'},
    {"retain",no_argument,0,'R'},
    {"rtn",no_argument,0,'R'},
    {"revision",no_argument,0,'r'},
    {"thr_nbr",required_argument,0,'t'},
    {"threads",required_argument,0,'t'},
    {"omp_num_threads",required_argument,0,'t'},
    {"variable",required_argument,0,'v'},
    {"auxiliary",required_argument,0,'X'},
    {"exclude",no_argument,0,'x'},
    {"xcl",no_argument,0,'x'},
    {"operation",required_argument,0,'y'},
    {"op_typ",required_argument,0,'y'},
    {"help",no_argument,0,'?'},
    {"hlp",no_argument,0,'?'},
    {0,0,0,0}
  }; /* end opt_lng */
  int opt_idx=0; /* Index of current long option into opt_lng array */
  
  nbr_gpe_nm=0;
  
  /* Start timer and save command line */ 
  ddra_info.tmr_flg=nco_tmr_srt;
  rcd+=nco_ddra((char *)NULL,(char *)NULL,&ddra_info);
  ddra_info.tmr_flg=nco_tmr_mtd;
  cmd_ln=nco_cmd_ln_sng(argc,argv);
  
  /* Get program name and set program enum (e.g., nco_prg_id=ncra) */
  nco_prg_nm=nco_prg_prs(argv[0],&nco_prg_id);
  
#ifdef ENABLE_MPI
  /* MPI Initialization */
  if(False) (void)fprintf(stdout,gettext("%s: WARNING Compiled with MPI\n"),nco_prg_nm);
  MPI_Init(&argc,&argv);
  MPI_Comm_size(mpi_cmm,&prc_nbr);
  MPI_Comm_rank(mpi_cmm,&prc_rnk);
#endif /* !ENABLE_MPI */
  
  /* Parse command line arguments */
  while(1){
    /* getopt_long_only() allows one dash to prefix long options */
    opt=getopt_long(argc,argv,opt_sht_lst,opt_lng,&opt_idx);
    /* NB: access to opt_crr is only valid when long_opt is detected */
    if(opt == EOF) break; /* Parse positional arguments once getopt_long() returns EOF */
    opt_crr=(char *)strdup(opt_lng[opt_idx].name);
    
    /* Process long options without short option counterparts */
    if(opt == 0){
      if(!strcmp(opt_crr,"bfr_sz_hnt") || !strcmp(opt_crr,"buffer_size_hint")){
        bfr_sz_hnt=strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
        if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      } /* endif cnk */
      if(!strcmp(opt_crr,"cnk_byt") || !strcmp(opt_crr,"chunk_byte")){
        cnk_sz_byt=strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
        if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      } /* endif cnk_byt */
      if(!strcmp(opt_crr,"cnk_min") || !strcmp(opt_crr,"chunk_min")){
        cnk_min_byt=strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
        if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      } /* endif cnk_min */
      if(!strcmp(opt_crr,"cnk_dmn") || !strcmp(opt_crr,"chunk_dimension")){
        /* Copy limit argument for later processing */
        cnk_arg[cnk_nbr]=(char *)strdup(optarg);
        cnk_nbr++;
      } /* endif cnk_dmn */
      if(!strcmp(opt_crr,"cnk_scl") || !strcmp(opt_crr,"chunk_scalar")){
        cnk_sz_scl=strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
        if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      } /* endif cnk */
      if(!strcmp(opt_crr,"cnk_map") || !strcmp(opt_crr,"chunk_map")){
        /* Chunking map */
        cnk_map_sng=(char *)strdup(optarg);
        cnk_map=nco_cnk_map_get(cnk_map_sng);
      } /* endif cnk */
      if(!strcmp(opt_crr,"cnk_plc") || !strcmp(opt_crr,"chunk_policy")){
        /* Chunking policy */
        cnk_plc_sng=(char *)strdup(optarg);
        cnk_plc=nco_cnk_plc_get(cnk_plc_sng);
      } /* endif cnk */
      if(!strcmp(opt_crr,"cln") || !strcmp(opt_crr,"mmr_cln") || !strcmp(opt_crr,"clean")) flg_cln=True; /* [flg] Clean memory prior to exit */
      if(!strcmp(opt_crr,"drt") || !strcmp(opt_crr,"mmr_drt") || !strcmp(opt_crr,"dirty")) flg_cln=False; /* [flg] Clean memory prior to exit */
      if(!strcmp(opt_crr,"ddra") || !strcmp(opt_crr,"mdl_cmp")) ddra_info.flg_ddra=flg_ddra=True; /* [flg] DDRA diagnostics */
      if(!strcmp(opt_crr,"fl_fmt") || !strcmp(opt_crr,"file_format")) rcd=nco_create_mode_prs(optarg,&fl_out_fmt);
      if(!strcmp(opt_crr,"gaa") || !strcmp(opt_crr,"glb_att_add")){
        gaa_arg=(char **)nco_realloc(gaa_arg,(gaa_nbr+1)*sizeof(char *));
        gaa_arg[gaa_nbr++]=(char *)strdup(optarg);
      } /* endif gaa */
      if(!strcmp(opt_crr,"hdf4")) nco_fmt_xtn=nco_fmt_xtn_hdf4; /* [enm] Treat file as HDF4 */
      if(!strcmp(opt_crr,"hdf_upk") || !strcmp(opt_crr,"hdf_unpack")) nco_upk_cnv=nco_upk_HDF; /* [flg] HDF unpack convention: unpacked=scale_factor*(packed-add_offset) */
      if(!strcmp(opt_crr,"hdr_pad") || !strcmp(opt_crr,"header_pad")){
        hdr_pad=strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
        if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      } /* endif "hdr_pad" */
      if(!strcmp(opt_crr,"msa_usr_rdr") || !strcmp(opt_crr,"msa_user_order")) MSA_USR_RDR=True; /* [flg] Multi-Slab Algorithm returns hyperslabs in user-specified order */
      if(!strcmp(opt_crr,"ppc") || !strcmp(opt_crr,"precision_preserving_compression") || !strcmp(opt_crr,"quantize")){
        ppc_arg[ppc_nbr]=(char *)strdup(optarg);
        ppc_nbr++;
      } /* endif "ppc" */
      if(!strcmp(opt_crr,"ram_all") || !strcmp(opt_crr,"create_ram") || !strcmp(opt_crr,"diskless_all")) RAM_CREATE=True; /* [flg] Open (netCDF3) file(s) in RAM */
      if(!strcmp(opt_crr,"ram_all") || !strcmp(opt_crr,"open_ram") || !strcmp(opt_crr,"diskless_all")) RAM_OPEN=True; /* [flg] Create file in RAM */
      if(!strcmp(opt_crr,"unn") || !strcmp(opt_crr,"union")) GRP_VAR_UNN=True;
      if(!strcmp(opt_crr,"nsx") || !strcmp(opt_crr,"intersection")) GRP_VAR_UNN=False;
      if(!strcmp(opt_crr,"vrs") || !strcmp(opt_crr,"version")){
        (void)nco_vrs_prn(CVS_Id,CVS_Revision);
        nco_exit(EXIT_SUCCESS);
      } /* endif "vrs" */
      if(!strcmp(opt_crr,"wrt_tmp_fl") || !strcmp(opt_crr,"write_tmp_fl")) WRT_TMP_FL=True;
      if(!strcmp(opt_crr,"no_tmp_fl")) WRT_TMP_FL=False;
    } /* opt != 0 */
    /* Process short options */
    switch(opt){
    case 0: /* Long options have already been processed, return */
      break;
    case '3': /* Request netCDF3 output storage format */
      fl_out_fmt=NC_FORMAT_CLASSIC;
      break;
    case '4': /* Catch-all to prescribe output storage format */
      if(!strcmp(opt_crr,"64bit")) fl_out_fmt=NC_FORMAT_64BIT; else fl_out_fmt=NC_FORMAT_NETCDF4; 
      break;
    case '6': /* Request netCDF3 64-bit offset output storage format */
      fl_out_fmt=NC_FORMAT_64BIT;
      break;
    case '7': /* Request netCDF4-classic output storage format */
      fl_out_fmt=NC_FORMAT_NETCDF4_CLASSIC;
      break;
    case 'A': /* Toggle FORCE_APPEND */
      FORCE_APPEND=!FORCE_APPEND;
      break;
    case 'C': /* Extract all coordinates associated with extracted variables? */
      EXTRACT_ASSOCIATED_COORDINATES=False;
      break;
    case 'c':
      EXTRACT_ALL_COORDINATES=True;
      break;
    case 'D': /* The debugging level. Default is 0. */
      nco_dbg_lvl=(unsigned short int)strtoul(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
      if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtoul",sng_cnv_rcd);
      nc_set_log_level(nco_dbg_lvl);
      break;
    case 'd': /* Copy limit argument for later processing */
      lmt_arg[lmt_nbr]=(char *)strdup(optarg);
      lmt_nbr++;
      break;
    case 'F': /* Toggle index convention. Default is 0-based arrays (C-style). */
      FORTRAN_IDX_CNV=!FORTRAN_IDX_CNV;
      break;
    case 'G': /* Apply Group Path Editing (GPE) to output group */
      gpe=nco_gpe_prs_arg(optarg);
      fl_out_fmt=NC_FORMAT_NETCDF4; 
      break;
    case 'g': /* Copy group argument for later processing */
      /* Replace commas with hashes when within braces (convert back later) */
      optarg_lcl=(char *)strdup(optarg);
      (void)nco_rx_comma2hash(optarg_lcl);
      grp_lst_in=nco_lst_prs_2D(optarg_lcl,",",&grp_lst_in_nbr);
      optarg_lcl=(char *)nco_free(optarg_lcl);
      break;
    case 'h': /* Toggle appending to history global attribute */
      HISTORY_APPEND=!HISTORY_APPEND;
      break;
    case 'L': /* [enm] Deflate level. Default is 0. */
      dfl_lvl=(int)strtol(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
      if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtol",sng_cnv_rcd);
      break;
    case 'l': /* Local path prefix for files retrieved from remote file system */
      fl_pth_lcl=(char *)strdup(optarg);
      break;
    case 'O': /* Toggle FORCE_OVERWRITE */
      FORCE_OVERWRITE=!FORCE_OVERWRITE;
      break;
    case 'o': /* Name of output file */
      fl_out=(char *)strdup(optarg);
      break;
    case 'p': /* Common file path */
      fl_pth=(char *)strdup(optarg);
      break;
    case 'R': /* Toggle removal of remotely-retrieved-files. Default is True. */
      RM_RMT_FL_PST_PRC=!RM_RMT_FL_PST_PRC;
      break;
    case 'r': /* Print CVS program information and copyright notice */
      (void)nco_vrs_prn(CVS_Id,CVS_Revision);
      (void)nco_lbr_vrs_prn();
      (void)nco_cpy_prn();
      (void)nco_cnf_prn();
      nco_exit(EXIT_SUCCESS);
      break;
    case 't': /* Thread number */
      thr_nbr=(int)strtol(optarg,&sng_cnv_rcd,NCO_SNG_CNV_BASE10);
      if(*sng_cnv_rcd) nco_sng_cnv_err(optarg,"strtol",sng_cnv_rcd);
      break;
    case 'v': /* Variables to extract/exclude */
      /* Replace commas with hashes when within braces (convert back later) */
      optarg_lcl=(char *)strdup(optarg);
      (void)nco_rx_comma2hash(optarg_lcl);
      var_lst_in=nco_lst_prs_2D(optarg_lcl,",",&var_lst_in_nbr);
      optarg_lcl=(char *)nco_free(optarg_lcl);
      break;
    case 'X': /* Copy auxiliary coordinate argument for later processing */
      aux_arg[aux_nbr]=(char *)strdup(optarg);
      aux_nbr++;
      MSA_USR_RDR=True; /* [flg] Multi-Slab Algorithm returns hyperslabs in user-specified order */      
      break;
    case 'x': /* Exclude rather than extract variables specified with -v */
      EXCLUDE_INPUT_LIST=True;
      break;
    case 'y': /* User-specified operation type overrides invocation default */
      nco_op_typ_sng=(char *)strdup(optarg);
      nco_op_typ=nco_op_typ_get(nco_op_typ_sng);
      break;
    case '?': /* Print proper usage */
      (void)nco_usg_prn();
      nco_exit(EXIT_SUCCESS);
      break;
    case '-': /* Long options are not allowed */
      (void)fprintf(stderr,"%s: ERROR Long options are not available in this build. Use single letter options instead.\n",nco_prg_nm_get());
      nco_exit(EXIT_FAILURE);
      break;
    default: /* Print proper usage */
      (void)fprintf(stdout,"%s ERROR in command-line syntax/options. Please reformulate command accordingly.\n",nco_prg_nm_get());
      (void)nco_usg_prn();
      nco_exit(EXIT_FAILURE);
      break;
    } /* end switch */
    if(opt_crr) opt_crr=(char *)nco_free(opt_crr);
  } /* end while loop */

  /* Initialize traversal tables */
  (void)trv_tbl_init(&trv_tbl_1);
  (void)trv_tbl_init(&trv_tbl_2);

  /* Default operation depends on invocation name */
  if(nco_op_typ_sng == NULL) nco_op_typ=nco_op_typ_get(nco_op_typ_sng);

  /* Process positional arguments and fill in filenames */
  fl_lst_in=nco_fl_lst_mk(argv,argc,optind,&fl_nbr,&fl_out,&FL_LST_IN_FROM_STDIN);

  /* Initialize thread information */
  thr_nbr=nco_openmp_ini(thr_nbr);
  in_id_1_arr=(int *)nco_malloc(thr_nbr*sizeof(int));
  in_id_2_arr=(int *)nco_malloc(thr_nbr*sizeof(int));

  /* Parse filenames */
  fl_idx=0; /* Input file _1 */
  fl_in_1=nco_fl_nm_prs(fl_in_1,fl_idx,&fl_nbr,fl_lst_in,abb_arg_nbr,fl_lst_abb,fl_pth);
  if(nco_dbg_lvl >= nco_dbg_fl) (void)fprintf(stderr,"%s: INFO Input file %d is %s",nco_prg_nm_get(),fl_idx,fl_in_1);
  /* Make sure file is on local system and is readable or die trying */
  fl_in_1=nco_fl_mk_lcl(fl_in_1,fl_pth_lcl,&FILE_1_RETRIEVED_FROM_REMOTE_LOCATION);
  if(nco_dbg_lvl >= nco_dbg_fl && FILE_1_RETRIEVED_FROM_REMOTE_LOCATION) (void)fprintf(stderr,", local file is %s",fl_in_1);
  if(nco_dbg_lvl >= nco_dbg_fl) (void)fprintf(stderr,"\n");
  /* Open file once per thread to improve caching */
  if(RAM_OPEN) md_open=NC_NOWRITE|NC_DISKLESS; else md_open=NC_NOWRITE;
  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++) rcd+=nco_fl_open(fl_in_1,md_open,&bfr_sz_hnt,in_id_1_arr+thr_idx);
  in_id_1=in_id_1_arr[0];

  fl_idx=1; /* Input file _2 */
  fl_in_2=nco_fl_nm_prs(fl_in_2,fl_idx,&fl_nbr,fl_lst_in,abb_arg_nbr,fl_lst_abb,fl_pth);
  if(nco_dbg_lvl >= nco_dbg_fl) (void)fprintf(stderr,"%s: INFO Input file %d is %s",nco_prg_nm_get(),fl_idx,fl_in_2);
  /* Make sure file is on local system and is readable or die trying */
  fl_in_2=nco_fl_mk_lcl(fl_in_2,fl_pth_lcl,&FILE_2_RETRIEVED_FROM_REMOTE_LOCATION);
  if(nco_dbg_lvl >= nco_dbg_fl && FILE_2_RETRIEVED_FROM_REMOTE_LOCATION) (void)fprintf(stderr,", local file is %s",fl_in_2);
  if(nco_dbg_lvl >= nco_dbg_fl) (void)fprintf(stderr,"\n");
  /* Open file once per thread to improve caching */
  if(RAM_OPEN) md_open=NC_NOWRITE|NC_DISKLESS; else md_open=NC_NOWRITE;
  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++) rcd+=nco_fl_open(fl_in_2,md_open,&bfr_sz_hnt,in_id_2_arr+thr_idx);
  in_id_2=in_id_2_arr[0];

  /* Construct GTT, Group Traversal Table (groups,variables,dimensions, limits) */
  (void)nco_bld_trv_tbl(in_id_1,trv_pth,lmt_nbr,lmt_arg,aux_nbr,aux_arg,MSA_USR_RDR,FORTRAN_IDX_CNV,grp_lst_in,grp_lst_in_nbr,var_lst_in,var_lst_in_nbr,EXTRACT_ALL_COORDINATES,GRP_VAR_UNN,False,EXCLUDE_INPUT_LIST,EXTRACT_ASSOCIATED_COORDINATES,nco_pck_plc_nil,&flg_dne1,trv_tbl_1);
  (void)nco_bld_trv_tbl(in_id_2,trv_pth,lmt_nbr,lmt_arg,aux_nbr,aux_arg,MSA_USR_RDR,FORTRAN_IDX_CNV,grp_lst_in,grp_lst_in_nbr,var_lst_in,var_lst_in_nbr,EXTRACT_ALL_COORDINATES,GRP_VAR_UNN,False,EXCLUDE_INPUT_LIST,EXTRACT_ASSOCIATED_COORDINATES,nco_pck_plc_nil,&flg_dne2,trv_tbl_2);

  /* Merge DNE lists before checking */
  flg_dne=(nco_dmn_dne_t *)nco_malloc(2*lmt_nbr*sizeof(nco_dmn_dne_t));

  /* Merge "dne" lists */
  for(int lmt_idx=0;lmt_idx<lmt_nbr;lmt_idx++) flg_dne[lmt_idx]=flg_dne1[lmt_idx]; 
  for(int lmt_idx=0;lmt_idx<lmt_nbr;lmt_idx++) flg_dne[lmt_nbr+lmt_idx]=flg_dne2[lmt_idx];
 
  /* Were all user-specified dimensions found? */ 
  (void)nco_chk_dmn(lmt_nbr,flg_dne);

  /* Get number of variables, dimensions, and global attributes in file, file format */
  (void)trv_tbl_inq(&nbr_glb_att_1,&nbr_grp_att_1,&nbr_att_var_1,&nbr_dmn_fl_1,&nbr_rec_fl_1,&grp_dpt_fl_1,&nbr_grp_fl_1,&var_ntm_fl_1,&nbr_var_fl_1,trv_tbl_1);
  (void)trv_tbl_inq(&nbr_glb_att_2,&nbr_grp_att_2,&nbr_att_var_2,&nbr_dmn_fl_2,&nbr_rec_fl_2,&grp_dpt_fl_2,&nbr_grp_fl_2,&var_ntm_fl_2,&nbr_var_fl_2,trv_tbl_2);

  (void)nco_inq_format(in_id_1,&fl_in_fmt_1);
  (void)nco_inq_format(in_id_2,&fl_in_fmt_2);

  /* We now have final list of variables to extract. Phew. */

  /* Make output and input files consanguinous */
  if(fl_out_fmt == NCO_FORMAT_UNDEFINED) fl_out_fmt=fl_in_fmt_1;

  /* Initialize, decode, and set PPC information */
  if(ppc_nbr > 0) nco_ppc_ini(in_id_1,&dfl_lvl,fl_out_fmt,ppc_arg,ppc_nbr,trv_tbl_1);

  /* Verify output file format supports requested actions */
  (void)nco_fl_fmt_vet(fl_out_fmt,cnk_nbr,dfl_lvl);

  /* Open output file */
  fl_out_tmp=nco_fl_out_open(fl_out,FORCE_APPEND,FORCE_OVERWRITE,fl_out_fmt,&bfr_sz_hnt,RAM_CREATE,RAM_OPEN,WRT_TMP_FL,&out_id);

  /* Initialize chunking from user-specified inputs */
  if(fl_out_fmt == NC_FORMAT_NETCDF4 || fl_out_fmt == NC_FORMAT_NETCDF4_CLASSIC) rcd+=nco_cnk_ini(in_id_1,fl_out,cnk_arg,cnk_nbr,cnk_map,cnk_plc,cnk_min_byt,cnk_sz_byt,cnk_sz_scl,&cnk);

  if(gpe){
    if(nco_dbg_lvl >= nco_dbg_fl) (void)fprintf(stderr,"%s: INFO Group Path Edit (GPE) feature enabled\n",nco_prg_nm_get());
    if(fl_out_fmt != NC_FORMAT_NETCDF4) (void)fprintf(stderr,"%s: WARNING Group Path Edit (GPE) requires netCDF4 output format in most cases (except flattening) but user explicitly requested output format = %s. This command will fail if the output file requires netCDF4 features like groups or netCDF4 atomic types (e.g., NC_STRING, NC_UBYTE...).\n",nco_prg_nm_get(),nco_fmt_sng(fl_out_fmt));
  } /* !gpe */

  /* Is this a CCM/CCSM/CF-format history tape? */
  CNV_CCM_CCSM_CF=nco_cnv_ccm_ccsm_cf_inq(in_id_1);

  /* Group broadcating (DEFINE mode, True as flg_dfn parameter) */
  (void)nco_grp_brd(in_id_1,in_id_2,out_id,&cnk,dfl_lvl,gpe,gpe_nm,nbr_gpe_nm,CNV_CCM_CCSM_CF,nco_op_typ,trv_tbl_1,trv_tbl_2,(nco_bool)True);

  /* Copy global attributes from file 1 */
  (void)nco_att_cpy(in_id_1,out_id,NC_GLOBAL,NC_GLOBAL,(nco_bool)True);
  /* Catenate time-stamped command line to "history" global attribute */
  if(HISTORY_APPEND) (void)nco_hst_att_cat(out_id,cmd_ln);
  if(HISTORY_APPEND && FORCE_APPEND) (void)nco_prv_att_cat(fl_in_1,in_id_1,out_id);
  if(gaa_nbr > 0) (void)nco_glb_att_add(out_id,gaa_arg,gaa_nbr);
  if(HISTORY_APPEND) (void)nco_vrs_att_cat(out_id);

  /* Turn off default filling behavior to enhance efficiency */
  nco_set_fill(out_id,NC_NOFILL,&fll_md_old);

  /* Take output file out of define mode */
  if(hdr_pad == 0UL){
    (void)nco_enddef(out_id);
  }else{
    (void)nco__enddef(out_id,hdr_pad);
    if(nco_dbg_lvl >= nco_dbg_scl) (void)fprintf(stderr,"%s: INFO Padding header with %lu extra bytes\n",nco_prg_nm_get(),(unsigned long)hdr_pad);
  } /* hdr_pad */

  /* Group broadcating (WRITE mode, False as flg_dfn parameter) */
  (void)nco_grp_brd(in_id_1,in_id_2,out_id,&cnk,dfl_lvl,gpe,gpe_nm,nbr_gpe_nm,CNV_CCM_CCSM_CF,nco_op_typ,trv_tbl_1,trv_tbl_2,(nco_bool)False);

  /* Close input netCDF files */
  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++) nco_close(in_id_1_arr[thr_idx]);
  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++) nco_close(in_id_2_arr[thr_idx]);

  /* Close output file and move it from temporary to permanent location */
  if(fl_out_tmp) (void)nco_fl_out_cls(fl_out,fl_out_tmp,out_id);

  /* Remove local copy of file */
  if(FILE_1_RETRIEVED_FROM_REMOTE_LOCATION && RM_RMT_FL_PST_PRC) (void)nco_fl_rm(fl_in_1);
  if(FILE_2_RETRIEVED_FROM_REMOTE_LOCATION && RM_RMT_FL_PST_PRC) (void)nco_fl_rm(fl_in_2);

  /* Clean memory unless dirty memory allowed */
  if(flg_cln){
    /* ncbo-specific memory */
    if(fl_in_1) fl_in_1=(char *)nco_free(fl_in_1);
    if(fl_in_2) fl_in_2=(char *)nco_free(fl_in_2);

    if(cmd_ln) cmd_ln=(char *)nco_free(cmd_ln);
    if(cnk_map_sng) cnk_map_sng=(char *)nco_free(cnk_map_sng);
    if(cnk_plc_sng) cnk_plc_sng=(char *)nco_free(cnk_plc_sng);
    if(fl_out) fl_out=(char *)nco_free(fl_out);
    if(fl_out_tmp) fl_out_tmp=(char *)nco_free(fl_out_tmp);
    if(fl_pth) fl_pth=(char *)nco_free(fl_pth);
    if(fl_pth_lcl) fl_pth_lcl=(char *)nco_free(fl_pth_lcl);
    if(in_id_1_arr) in_id_1_arr=(int *)nco_free(in_id_1_arr);
    if(in_id_2_arr) in_id_2_arr=(int *)nco_free(in_id_2_arr);
    /* Free lists of strings */
    if(fl_lst_in && fl_lst_abb == NULL) fl_lst_in=nco_sng_lst_free(fl_lst_in,fl_nbr); 
    if(fl_lst_in && fl_lst_abb) fl_lst_in=nco_sng_lst_free(fl_lst_in,1);
    if(fl_lst_abb) fl_lst_abb=nco_sng_lst_free(fl_lst_abb,abb_arg_nbr);
    if(var_lst_in_nbr > 0) var_lst_in=nco_sng_lst_free(var_lst_in,var_lst_in_nbr);
    /* Free limits */
    for(idx=0;idx<aux_nbr;idx++) aux_arg[idx]=(char *)nco_free(aux_arg[idx]);
    for(idx=0;idx<lmt_nbr;idx++) lmt_arg[idx]=(char *)nco_free(lmt_arg[idx]);
    for(idx=0;idx<ppc_nbr;idx++) ppc_arg[idx]=(char *)nco_free(ppc_arg[idx]);
    /* Free chunking information */
    for(idx=0;idx<cnk_nbr;idx++) cnk_arg[idx]=(char *)nco_free(cnk_arg[idx]);
    if(cnk_nbr > 0) cnk.cnk_dmn=(cnk_dmn_sct **)nco_cnk_lst_free(cnk.cnk_dmn,cnk_nbr);
    trv_tbl_free(trv_tbl_1);
    trv_tbl_free(trv_tbl_2);
    if(gpe) gpe=(gpe_sct *)nco_gpe_free(gpe);
    /* Memory management for GPE names */
    for(idx=0;idx<nbr_gpe_nm;idx++) gpe_nm[idx].var_nm_fll=(char *)nco_free(gpe_nm[idx].var_nm_fll);
    if(nbr_gpe_nm) gpe_nm=(gpe_nm_sct *)nco_free(gpe_nm);
    /* Memory management for common names list */
    for(idx=0;idx<nbr_cmn_nm;idx++) cmn_lst[idx].nm=(char *)nco_free(cmn_lst[idx].nm);
    if(nbr_cmn_nm > 0) cmn_lst=(nco_cmn_t *)nco_free(cmn_lst);
    for(idx=0;idx<lmt_nbr;idx++) flg_dne1[idx].dim_nm=(char *)nco_free(flg_dne1[idx].dim_nm);
    flg_dne1=(nco_dmn_dne_t *)nco_free(flg_dne1);
    for(idx=0;idx<lmt_nbr;idx++) flg_dne2[idx].dim_nm=(char *)nco_free(flg_dne2[idx].dim_nm);
    flg_dne2=(nco_dmn_dne_t *)nco_free(flg_dne2);
    flg_dne=(nco_dmn_dne_t *)nco_free(flg_dne);

  } /* !flg_cln */

#ifdef ENABLE_MPI
  MPI_Finalize();
#endif /* !ENABLE_MPI */
  
  /* End timer */ 
  ddra_info.tmr_flg=nco_tmr_end; /* [enm] Timer flag */
  rcd+=nco_ddra((char *)NULL,(char *)NULL,&ddra_info);
  if(rcd != NC_NOERR) nco_err_exit(rcd,"main");

  nco_exit_gracefully();
  return EXIT_SUCCESS;
} /* end main() */
Ejemplo n.º 10
0
int main(int argc, char ** argv) {
    int nprocs, mynod, errcode;
    options my_options = {NULL, 0, 0};
    MPI_File fh;
    MPI_Status status;
    MPI_Info  info;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

    parse_args(argc, argv, mynod, &my_options);

    if (my_options.do_aggregation) {
        MPI_Info_create(&info);
        MPI_Info_set(info, "romio_no_indep_rw", "true");
        MPI_Info_set(info, "cb_config_list", "leela.mcs.anl.gov:1");
    } else {
        info = MPI_INFO_NULL;
    }

    /* create the file w/o EXCL: this must not fail */
    errcode = MPI_File_open(MPI_COMM_WORLD, my_options.fname,
                            MPI_MODE_CREATE|MPI_MODE_RDWR, info, &fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_open");
    }

    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_close");
    }

    /* now try to open w/ CREAT|EXCL: this must fail */
    errcode = MPI_File_open(MPI_COMM_WORLD, my_options.fname,
                            MPI_MODE_CREATE|MPI_MODE_EXCL|MPI_MODE_RDWR, info, &fh);
    if (errcode == MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_open: expected an error: got");
    }

    /* ignore the error: File_delete is not aggregator-aware */
    MPI_File_delete(my_options.fname, info);

    /* this must succeed: the file no longer exists */
    errcode = MPI_File_open(MPI_COMM_WORLD, my_options.fname,
                            MPI_MODE_CREATE|MPI_MODE_EXCL|MPI_MODE_RDWR, info, &fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_open");
    }

    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_close");
    }

    if (mynod == 0) {
        printf(" No Errors\n");
    }

    MPI_Finalize();
    return 0;
}
Ejemplo n.º 11
0
int main(int argc, char *argv[]) {


#ifdef EPETRA_MPI

  // Initialize MPI

  MPI_Init( &argc, &argv );
  //int size, rank; // Number of MPI processes, My process ID

  //MPI_Comm_size(MPI_COMM_WORLD, &size);
  //MPI_Comm_rank(MPI_COMM_WORLD, &rank);

#else

  //int size = 1; // Serial case (not using MPI)
  //int rank = 0;

#endif

  bool verbose = false;

  int nx = 5;
  int ny = 5;

  if( argc > 1 )
  {
    if( argc > 4 )
    {
      cout << "Usage: " << argv[0] << " [-v [nx [ny]]]" << endl;
      exit(1);
    }

    int loc = 1;
    // Check if we should print results to standard out
    if(argv[loc][0]=='-' && argv[loc][1]=='v')
    { verbose = true; ++loc; }

    if (loc < argc) nx = atoi( argv[loc++] );
    if( loc < argc) ny = atoi( argv[loc] );
  }

#ifdef EPETRA_MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int MyPID = Comm.MyPID();
  int NumProc = Comm.NumProc();

  bool verbose1 = false;
  if(verbose) verbose1 = (MyPID==0);

  if(verbose1)
    cout << EpetraExt::EpetraExt_Version() << endl << endl;

  Comm.Barrier();

  if(verbose) cout << Comm << endl << flush;
  Comm.Barrier();

  int NumGlobalElements = nx * ny;
  if( NumGlobalElements < NumProc )
  {
    cout << "NumGlobalElements = " << NumGlobalElements <<
            " cannot be < number of processors = " << NumProc;
    exit(1);
  } 
	
  int IndexBase = 0;
  Epetra_Map Map( NumGlobalElements, IndexBase, Comm );

  // Extract the global indices of the elements local to this processor
  int NumMyElements = Map.NumMyElements();
  std::vector<int> MyGlobalElements( NumMyElements );
  Map.MyGlobalElements( &MyGlobalElements[0] );
  if( verbose ) cout << Map;

  // Create the number of non-zeros for a tridiagonal (1D problem) or banded
  // (2D problem) matrix
  std::vector<int> NumNz( NumMyElements, 5 );
  int global_i;
  int global_j;
  for (int i = 0; i < NumMyElements; ++i)
  {
    global_j = MyGlobalElements[i] / nx;
    global_i = MyGlobalElements[i] - global_j * nx;
    if (global_i == 0)    NumNz[i] -= 1;  // By having separate statements,
    if (global_i == nx-1) NumNz[i] -= 1;  // this works for 2D as well as 1D
    if (global_j == 0)    NumNz[i] -= 1;  // systems (i.e. nx x 1 or 1 x ny)
    if (global_j == ny-1) NumNz[i] -= 1;  // or even a 1 x 1 system
  }
  if(verbose)
  { 
    cout << endl << "NumNz: ";
    for (int i = 0; i < NumMyElements; i++) cout << NumNz[i] << " ";
    cout << endl;
  } // end if
  
  // Create the Epetra Compressed Row Sparse Graph
  Epetra_CrsGraph A( Copy, Map, &NumNz[0] );
  
  std::vector<int> Indices(5);
  int NumEntries;
  
  for (int i = 0; i < NumMyElements; ++i )
  {
    global_j = MyGlobalElements[i] / nx;
    global_i = MyGlobalElements[i] - global_j * nx;
    NumEntries = 0;
    // (i,j-1) entry
    if (global_j > 0 && ny > 1)
      Indices[NumEntries++] = global_i   + (global_j-1)*nx;
    // (i-1,j) entry
    if (global_i > 0)
      Indices[NumEntries++] = global_i-1 +  global_j   *nx;
    // (i,j) entry
    Indices[NumEntries++] = MyGlobalElements[i];
    // (i+1,j) entry
    if (global_i < nx-1)
      Indices[NumEntries++] = global_i+1 +  global_j   *nx;
    // (i,j+1) entry
    if (global_j < ny-1 && ny > 1)
      Indices[NumEntries++] = global_i   + (global_j+1)*nx;

    // Insert the global indices
    A.InsertGlobalIndices( MyGlobalElements[i], NumEntries, &Indices[0] );
  } // end i loop

  // Finish up graph construction
  A.FillComplete();

  EpetraExt::CrsGraph_MapColoring
    Greedy0MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY,
		                 0, false, verbose );
  Epetra_MapColoring & Greedy0ColorMap = Greedy0MapColoringTransform( A );
  printColoring(Greedy0ColorMap, &A,verbose);

  EpetraExt::CrsGraph_MapColoring
    Greedy1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY,
		                 1, false, verbose );
  Epetra_MapColoring & Greedy1ColorMap = Greedy1MapColoringTransform( A );
  printColoring(Greedy1ColorMap, &A,verbose);

  EpetraExt::CrsGraph_MapColoring
    Greedy2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY,
		                 2, false, verbose );
  Epetra_MapColoring & Greedy2ColorMap = Greedy2MapColoringTransform( A );
  printColoring(Greedy2ColorMap, &A,verbose);

  EpetraExt::CrsGraph_MapColoring
    Lubi0MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY,
		               0, false, verbose );
  Epetra_MapColoring & Lubi0ColorMap = Lubi0MapColoringTransform( A );
  printColoring(Lubi0ColorMap, &A,verbose);

  EpetraExt::CrsGraph_MapColoring
    Lubi1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY,
		               1, false, verbose );
  Epetra_MapColoring & Lubi1ColorMap = Lubi1MapColoringTransform( A );
  printColoring(Lubi1ColorMap, &A,verbose);

  EpetraExt::CrsGraph_MapColoring
    Lubi2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY,
		               2, false, verbose );
  Epetra_MapColoring & Lubi2ColorMap = Lubi2MapColoringTransform( A );
  printColoring(Lubi2ColorMap, &A,verbose);

#ifdef EPETRA_MPI
  if( verbose ) cout << "Parallel Map Coloring 1!\n";
  EpetraExt::CrsGraph_MapColoring
    Parallel1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::PSEUDO_PARALLEL,
		                   0, false, verbose );
  Epetra_MapColoring & Parallel1ColorMap = Parallel1MapColoringTransform( A );
  printColoring(Parallel1ColorMap, &A,verbose);

  if( verbose ) cout << "Parallel Map Coloring 2!\n";
  EpetraExt::CrsGraph_MapColoring
    Parallel2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::JONES_PLASSMAN,
		                   0, false, verbose );
  Epetra_MapColoring & Parallel2ColorMap = Parallel2MapColoringTransform( A );
  printColoring(Parallel2ColorMap, &A,verbose);
#endif


#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  return 0;
}
Ejemplo n.º 12
0
int main(int argc, char **argv) {
    double *a, *x, *y, *aa, *xx, *yy;
    int N = 10000, M = 1000, n, m, i, j;
    int rank, p;
    struct timeval start, finish;
    double duration;

    if (argc > 2) {
        N = atoi(argv[argc - 2]);
        M = atoi(argv[argc - 1]);

        if (N <= 0 || M <= 0) {
            N = 10000;
            M = 1000;
        }
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &p);

    n = (N % p == 0) ? (N / p) : (N / p + 1);
    m = (M % p == 0) ? (M / p) : (M / p + 1);

    a = (double *)malloc(n * M * sizeof(double));
    x = (double *)malloc(m * sizeof(double));
    y = (double *)malloc(N * sizeof(double));
    aa = (double *)malloc(n * p * M * sizeof(double));
    xx = (double *)malloc(m * p * sizeof(double));
    yy = (double *)malloc(N * sizeof(double));

    for (i = 0; i < n; i++) {
        for (j = 0; j < M; j++) {
            a[i * M + j] = 1.0;
        }
    }

    for (i = 0; i < m; i++) {
        x[i] = 1.0;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
        gettimeofday(&start, NULL);
    }

    MPI_Allgather(a, n * M, MPI_DOUBLE, aa, n * M, MPI_DOUBLE, MPI_COMM_WORLD);
    MPI_Allgather(x, m, MPI_DOUBLE, xx, m, MPI_DOUBLE, MPI_COMM_WORLD);

    for (i = 0; i < N; i++) {
        y[i] = 0.0;

        for (j = rank; j < M; j += p) {
            y[i] += aa[i * M + j] * xx[j];
        }
    }

    MPI_Reduce(y, yy, N, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0) {
        gettimeofday(&finish, NULL);
        duration = ((finish.tv_sec * 1e6 + finish.tv_usec) - (start.tv_sec * 1e6 + start.tv_usec)) / 1e6;
        printf("n: %d, m: %d, p: %d, time: %.6lf\n", N, M, p, duration);
    }

    free(a);
    free(x);
    free(y);
    free(aa);
    free(xx);
    free(yy);

    MPI_Finalize();
    return 0;
}
Ejemplo n.º 13
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Request req;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3)
    {
      printf ("not enough tasks\n");
    }
  else if (rank == 0)
    {
      MPI_Irecv (buf0, buf_size, MPI_INT, 
		 MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);

      MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);

      MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);

      MPI_Recv (buf1, buf_size, MPI_INT, 
		MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);

      MPI_Wait (&req, &status);
    }
  else if (rank == 1)
    {
      memset (buf0, 0, buf_size);

      MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);

      MPI_Recv (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);

      MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
  else if (rank == 2)
    {
      memset (buf1, 1, buf_size);

      MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Ejemplo n.º 14
0
int main(int argc, char *argv[])
{
	int m, n, c, iters, i, j;
	int my_m, my_n, my_rank, num_procs, size;
	float kappa;
	image u, u_bar;
	unsigned char *image_chars;
	char *input_jpeg_filename, *output_jpeg_filename;
	int* sendcounts, displs, recvcounts;

	sendcounts = (int*)malloc(num_procs*sizeof(int));
	displs = (int*)malloc(num_procs*sizeof(int));
	recvcounts = (int*)malloc(num_procs*sizeof(int));

	printf("Now in main program\n");

	MPI_Init (&argc, &argv);
	MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size (MPI_COMM_WORLD, &num_procs);

	/* read from kommand line: kappa, iters, input_jpeg filename, output_jpeg_filename */
	kappa = atof(argv[1]);
	iters = atoi(argv[2]);
	input_jpeg_filename = argv[3];
	output_jpeg_filename = argv[4];
	/* Test that parameters are read correctly from command line: 
	printf("kappa: %f\n", kappa);
	printf("iters: %d\n", iters);
	printf("input_jpeg_filename: %s\n", input_jpeg_filename);
	printf("output_jpeg_filename: %s\n", output_jpeg_filename);
	*/

	
	if (my_rank==0)
		import_JPEG_file(input_jpeg_filename, &image_chars, &m, &n, &c);
		printf("Successfully imported JPEG image.\n");
	

	MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast (&n, 1, MPI_INT, 0, MPI_COMM_WORLD);


	/* Divide the m x n pixels evenly among the MPI processes */
	my_m = m/num_procs;
	my_n = n;


	/* If the pixels cannot be evenly divided, the last process picks up 	*/
	/* the remainder.  														*/
	/* Each process needs the rows above and below it. 						*/
	/* The first and last process only need 1 additional row. 				*/
	if (my_rank == num_procs - 1){
		my_m += m % num_procs;
		allocate_image(&u, my_m+1, my_n);
		allocate_image(&u_bar, my_m+1, my_n);
	} else if (my_rank == 0){
		allocate_image(&u_bar, my_m+1, my_n);
	} else {
		allocate_image (&u, my_m+2, my_n);
		allocate_image (&u_bar, my_m+2, my_n);
	}

	/* Each process asks process 0 for a partitioned region */
	/* of image_chars and copy the values into u */

	if (my_rank==0){
		size = (my_m + 1)*my_n;
		sendcounts[my_rank] = size;
		displs[my_rank] = my_rank;
		displs[my_rank+1] = my_n*(my_rank*my_m - 1);
	} else if (my_rank==num_procs-1){
		size = (my_m + 1)*my_n;
		sendcounts[my_rank] = size;
	} else {
		size = (my_m + 2)*my_n;
		sendcounts[my_rank] = size;
		displs[my_rank+1] = my_n*(my_rank*my_m - 1);
	}


	
	MPI_Scatterv(&image_chars, &sendcounts, &displs, MPI_UNSIGNED_CHAR, &u.image_data, size, MPI_UNSIGNED_CHAR,
		0, MPI_COMM_WORLD);

	/* Convert data type from unsigned char to float: */
	for (i=0; i<my_m; i++)
	{
		for (j=0; j<my_n; j++)
		{
			u.image_data[i][j] = (float)u.image_data[i][j];
		}
	}

	iso_diffusion_denoising (&u, &u_bar, kappa, iters);

	/* Each process must convert the data type in u back */
	/* to unsigned char. */
	for (i=0; i<my_m; i++)
	{
		for (j=0; j<my_n; j++)
		{
			u.image_data[i][j] = (unsigned char)u.image_data[i][j];
		}
	}

	/* Each process sends its resulting content of u to process 0 */
	/* Process 0 recieves from each process incoming values and */
	/* copy them into the designated region of image_chars */
	/* ... */


	if (my_rank==0){
		displs[my_rank] = 0;
	
	displs[my_rank+1] = my_rank*my_m*my_n;
	size = my_m*my_n




	if (my_rank==0)
		c = 1;
		export_JPEG_file(output_jpeg_filename, image_chars, m, n, c, 75);
		printf("Successfully exported JPEG image! \n");

	deallocate_image(&u);
	deallocate_image(&u_bar);

	MPI_Finalize ();

	printf("Finished the program!\n");

	return 0;
}
Ejemplo n.º 15
0
int main(int argc, char **argv) {
    int           procid, nproc, i, j, my_nelem;
    int           pollint = 0;
    double        time;
    MPI_Win       llist_win;
    llist_ptr_t   head_ptr, tail_ptr;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &procid);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);

    /* Process 0 creates the head node */
    if (procid == 0)
        head_ptr.disp = alloc_elem(procid, llist_win);

    /* Broadcast the head pointer to everyone */
    head_ptr.rank = 0;
    MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
    tail_ptr = head_ptr;

    /* All processes append NUM_ELEMS elements to the list; rank 0 has already
     * appended an element. */
    if (procid == 0)
        i = 1;
    else
        i = 0;
    my_nelem = NUM_ELEMS/nproc;
    if (procid < NUM_ELEMS % nproc)
        my_nelem++;

    MPI_Barrier(MPI_COMM_WORLD);
    time = MPI_Wtime();

    for ( ; i < my_nelem; i++) {
        llist_ptr_t new_elem_ptr;
        int success = 0;

        /* Create a new list element and register it with the window */
        new_elem_ptr.rank = procid;
        new_elem_ptr.disp = alloc_elem(procid, llist_win);

        /* Append the new node to the list.  This might take multiple attempts if
           others have already appended and our tail pointer is stale. */
        do {
            int flag;

            /* The tail is at my left neighbor, append my element. */
            if (tail_ptr.rank == (procid + nproc-1) % nproc)
            {
                if (verbose)
                    printf("%d: Appending to <%d, %p>\n", procid, tail_ptr.rank, (void*) tail_ptr.disp);

                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
#if USE_ACC
                MPI_Accumulate(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                               (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t),
                               MPI_BYTE, MPI_REPLACE, llist_win);
#else
                MPI_Put(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                        (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t),
                        MPI_BYTE, llist_win);
#endif
                MPI_Win_unlock(tail_ptr.rank, llist_win);

                success = 1;
                tail_ptr = new_elem_ptr;
            }

            /* Otherwise, chase the tail. */
            else
            {
                llist_ptr_t next_tail_ptr;

                MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
#if USE_ACC
                MPI_Get_accumulate( NULL, 0, MPI_DATATYPE_NULL, &next_tail_ptr,
                                    sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                                    (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next),
                                    sizeof(llist_ptr_t), MPI_BYTE, MPI_NO_OP, llist_win);
#else
                MPI_Get(&next_tail_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                        (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next),
                        sizeof(llist_ptr_t), MPI_BYTE, llist_win);
#endif
                MPI_Win_unlock(tail_ptr.rank, llist_win);

                if (next_tail_ptr.rank != nil.rank) {
                    if (verbose)
                        printf("%d: Chasing to <%d, %p>\n", procid, next_tail_ptr.rank, (void*) next_tail_ptr.disp);
                    tail_ptr = next_tail_ptr;
                    pollint = MAX(MIN_NPROBE, pollint/2);
                }
                else {
                    for (j = 0; j < pollint; j++)
                        MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);

                    pollint = MIN(MAX_NPROBE, pollint*2);
                }
            }
        } while (!success);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    time = MPI_Wtime() - time;

    /* Traverse the list and verify that all processes inserted exactly the correct
       number of elements. */
    if (procid == 0) {
        int  errors    = 0;
        int *counts, count = 0;

        counts = (int*) malloc(sizeof(int) * nproc);
        assert(counts != NULL);

        for (i = 0; i < nproc; i++)
            counts[i] = 0;

        tail_ptr = head_ptr;

        MPI_Win_lock_all(0, llist_win);

        /* Walk the list and tally up the number of elements inserted by each rank */
        while (tail_ptr.disp != nil.disp) {
            llist_elem_t elem;

            MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
                    tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);

            MPI_Win_flush(tail_ptr.rank, llist_win);

            tail_ptr = elem.next;

            assert(elem.value >= 0 && elem.value < nproc);
            counts[elem.value]++;
            count++;

            if (verbose) {
                int last_elem = tail_ptr.disp == nil.disp;
                printf("%2d%s", elem.value, last_elem ? "" : " -> ");
                if (count % ELEM_PER_ROW == 0 && !last_elem)
                    printf("\n");
            }
        }

        MPI_Win_unlock_all(llist_win);

        if (verbose)
          printf("\n\n");

        /* Verify the counts we collected */
        for (i = 0; i < nproc; i++) {
            int expected;

            expected = NUM_ELEMS/nproc;
            if (i < NUM_ELEMS % nproc)
                expected++;

            if (counts[i] != expected) {
                printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected);
                errors++;
            }
        }

        printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
        free(counts);
    }

    if (print_perf) {
        double max_time;

        MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

        if (procid == 0) {
            printf("Total time = %0.2f sec, elem/sec = %0.2f, sec/elem = %0.2f usec\n", max_time, NUM_ELEMS/max_time, max_time/NUM_ELEMS*1.0e6);
        }
    }

    MPI_Win_free(&llist_win);

    /* Free all the elements in the list */
    for ( ; my_elems_count > 0; my_elems_count--)
        MPI_Free_mem(my_elems[my_elems_count-1]);

    MPI_Finalize();
    return 0;
}
Ejemplo n.º 16
0
/* main function */
int main(int argc, char* argv[]) 
{
    clock_t tstart,tend;
    double duration;

    /*flags*/
    bool verb, adj; /* migration(adjoint) flag */
    bool wantwf; /* outputs wavefield snapshots */
    bool wantrecord; /* actually means "need record" */
    bool illum; /* source illumination flag*/
    bool roll; /* survey strategy */
    
    /*I/O*/
    sf_file Fvel;
    sf_file left, right, leftb, rightb;
    sf_file Fsrc, Frcd/*source and record*/;
    sf_file Ftmpwf;
    sf_file Fimg;

    /*axis*/
    sf_axis at, ax, az, as;

    /*grid index variables*/
    int nx, nz, nt, wfnt;
    int nzx, nx2, nz2, n2, m2, m2b, pad1, nk;
    int ix, iz, it, is;
    int nxb, nzb;
    int snpint;
    float dt, dx, dz, wfdt;
    float ox, oz;

    /*source/geophone location*/
    int   spx, spz;
    int   gpz,gpx,gpl; /*geophone depth/x-crd/length*/

    /*Model*/
    sf_complex **lt, **rt;
    sf_complex **ltb, **rtb;

    /*Data*/
    sf_complex ***wavefld;
    sf_complex ***record, **tmprec, **img, **imgsum;
    float **sill;

    /*source*/
    sf_complex *ww;
    float *rr;
    int rectz,rectx,repeat; /*smoothing parameters*/
    float trunc;
    int sht0,shtbgn,shtend,shtnum,shtnum0,shtint,shtcur;

    /*abc boundary*/
    int top,bot,lft,rht;

    /*tmp*/
    int tmpint;

    /*parameter structs*/
    geopar geop;
    mpipar mpip;

    /*MPI*/
    int rank, nodes;
    sf_complex *sendbuf, *recvbuf;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nodes);

    sf_init(argc, argv);

    if(rank==0) sf_warning("nodes=%d",nodes);

    if (!sf_getbool("verb", &verb)) verb=false; /*verbosity*/
    if (!sf_getbool("adj", &adj)) adj=true; /*migration*/
    if (!sf_getbool("wantwf", &wantwf)) wantwf=false; /*output forward and backward wavefield*/
    if (!sf_getbool("wantrecord", &wantrecord)) wantrecord=true; /*if n, using record data generated by this program */
    if (!sf_getbool("illum", &illum)) illum=false; /*if n, no source illumination applied */
    if (!sf_getbool("roll", &roll)) roll=false; /*if n, receiver is independent of source location and gpl=nx*/
    /* source/receiver info */
    if (!sf_getint("shtbgn", &shtbgn)) sf_error("Need shot starting location on grid!");
    if (!sf_getint("sht0", &sht0)) sht0=shtbgn; /*actual shot origin on grid*/
    if (!sf_getint("shtend", &shtend)) sf_error("Need shot ending location on grid!");
    if (!sf_getint("shtint", &shtint)) sf_error("Need shot interval on grid!");
    shtnum = (int)((shtend-shtbgn)/shtint) + 1;
    shtnum0 = shtnum;
    if (!sf_getint("spz", &spz)) sf_error("Need source depth!");
    if (!sf_getint("gpz", &gpz)) sf_error("Need receiver depth!");
    if (roll) if (!sf_getint("gpl", &gpl)) sf_error("Need receiver length");
    if (!sf_getint("snapinter", &snpint)) snpint=1;     /* snap interval */
    /*--- parameters of source ---*/
    if (!sf_getfloat("srctrunc", &trunc)) trunc=0.4;
    if (!sf_getint("rectz", &rectz)) rectz=1;
    if (!sf_getint("rectx", &rectx)) rectx=1;
    if (!sf_getint("repeat", &repeat)) repeat=0;
    /* abc parameters */
    if (!sf_getint("top", &top)) top=40;
    if (!sf_getint("bot", &bot)) bot=40;
    if (!sf_getint("lft", &lft)) lft=40;
    if (!sf_getint("rht", &rht)) rht=40;

    /*Set I/O file*/
    if (adj) { /* migration */
      if (wantrecord) {
	Frcd = sf_input("input"); /*record from elsewhere*/
	Fsrc  = sf_input("src");   /*source wavelet*/      
      } else {
	Frcd = sf_output("rec"); /*record produced by forward modeling*/
	Fsrc = sf_input("input");   /*source wavelet*/
      }
      Fimg  = sf_output("output");
    } else { /* modeling */
      Fimg = sf_input("input");
      Frcd = sf_output("output");
      Fsrc  = sf_input("src");   /*source wavelet*/      
    }
    left  = sf_input("left");
    right = sf_input("right");
    leftb  = sf_input("leftb");
    rightb = sf_input("rightb");
    Fvel  = sf_input("vel");  /*velocity - just for model dimension*/
    if (wantwf) {
	Ftmpwf  = sf_output("tmpwf");/*wavefield snap*/
    } else {
	Ftmpwf  = NULL;
    }

    /*--- Axes parameters ---*/
    at = sf_iaxa(Fsrc, 1); nt = sf_n(at);  dt = sf_d(at);      
    az = sf_iaxa(Fvel, 1); nzb = sf_n(az); dz = sf_d(az); oz = sf_o(az);
    ax = sf_iaxa(Fvel, 2); nxb = sf_n(ax); dx = sf_d(ax); ox = sf_o(ax);
    nzx = nzb*nxb;
    nz = nzb - top - bot;
    nx = nxb - lft - rht;
    if (!roll) gpl = nx; /* global survey setting */
    /* wavefield axis */
    wfnt = (int)(nt-1)/snpint+1;
    wfdt = dt*snpint;

    /* propagator matrices */
    if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */
    nz2 = kiss_fft_next_fast_size(nzb*pad1);
    nx2 = kiss_fft_next_fast_size(nxb);
    nk = nz2*nx2; /*wavenumber*/
    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("Need n2= in left");
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);

    if (!sf_histint(leftb,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(leftb,"n2",&m2b))  sf_error("Need n2= in left");
    if (!sf_histint(rightb,"n1",&n2) || n2 != m2b) sf_error("Need n1=%d in right",m2b);
    if (!sf_histint(rightb,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);

    /*check record data*/
    if (adj && wantrecord){
	sf_histint(Frcd,"n1", &tmpint);
	if (tmpint != nt ) sf_error("Error parameter n1 in record!");
	sf_histint(Frcd,"n2", &tmpint);
	if (tmpint != gpl ) sf_error("Error parameter n2 in record!");
	sf_histint(Frcd,"n3", &tmpint);
	if (tmpint != shtnum0 ) sf_error("Error parameter n3 in record!");
    }

    /*allocate memory*/
    ww=sf_complexalloc(nt);
    rr=sf_floatalloc(nzx);
    lt = sf_complexalloc2(nzx,m2);
    rt = sf_complexalloc2(m2,nk);
    ltb = sf_complexalloc2(nzx,m2b);
    rtb = sf_complexalloc2(m2b,nk);
    geop = (geopar) sf_alloc(1, sizeof(*geop));
    mpip = (mpipar) sf_alloc(1, sizeof(*mpip));
    tmprec = sf_complexalloc2(nt, gpl);
    if (shtnum%nodes!=0) {
      shtnum += nodes-shtnum%nodes;
      if (verb) sf_warning("Total shot number is not divisible by total number of nodes! shunum padded to %d.", shtnum);
    }
    if (rank==0) {
      record = sf_complexalloc3(nt, gpl, shtnum);
    } else record = NULL;
    wavefld = sf_complexalloc3(nz, nx, wfnt);
    if (illum) sill = sf_floatalloc2(nz, nx);
    else sill = NULL;
    img = sf_complexalloc2(nz, nx);
    if (adj) {
      imgsum = sf_complexalloc2(nz, nx);
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
	for (ix=0; ix<nx; ix++)
	  for (iz=0; iz<nz; iz++)
	    imgsum[ix][iz] = sf_cmplx(0.,0.);
    }
    /*read from files*/
    sf_complexread(ww,nt,Fsrc);
    sf_complexread(lt[0],nzx*m2,left);
    sf_complexread(rt[0],m2*nk,right);
    sf_complexread(ltb[0],nzx*m2b,leftb);
    sf_complexread(rtb[0],m2b*nk,rightb);
    if(!adj) sf_complexread(img[0],nx*nz,Fimg);
    if (rank==0) {
      if(adj && wantrecord) {
	sf_complexread(record[0][0], shtnum0*gpl*nt, Frcd);
	if (shtnum0%nodes!=0) {
#ifdef _OPENMP
#pragma omp parallel for private(is,ix,it)
#endif
	  for (is=shtnum0; is<shtnum; is++)
	    for (ix=0; ix<gpl; ix++)
	      for (it=0; it<nt; it++)
		record[is][ix][it] = sf_cmplx(0.,0.);
	}
      } else {
#ifdef _OPENMP
#pragma omp parallel for private(is,ix,it)
#endif
	for (is=0; is<shtnum; is++)
	  for (ix=0; ix<gpl; ix++)
	    for (it=0; it<nt; it++)
	      record[is][ix][it] = sf_cmplx(0.,0.);
      }
    }
    
    /*close RSF files*/
    sf_fileclose(Fsrc);
    sf_fileclose(left);
    sf_fileclose(right);
    sf_fileclose(leftb);
    sf_fileclose(rightb);

    /*load constant geopar elements*/
    mpip->cpuid=rank;
    mpip->numprocs=nodes;
    /*load constant geopar elements*/
    geop->nx  = nx;
    geop->nz  = nz;
    geop->nxb = nxb;
    geop->nzb = nzb;
    geop->dx  = dx;
    geop->dz  = dz;
    geop->ox  = ox;
    geop->oz  = oz;
    geop->snpint = snpint;
    geop->spz = spz;
    geop->gpz = gpz;
    geop->gpl = gpl;
    geop->top = top;
    geop->bot = bot;
    geop->lft = lft;
    geop->rht = rht;
    geop->nt = nt;
    geop->dt = dt;
    geop->trunc = trunc;
    geop->shtnum = shtnum;

    /* output RSF files */

    if (rank==0) {
      sf_setn(ax, gpl);
      sf_setn(az, nz);
      as = sf_iaxa(Fvel, 2);
      sf_setn(as,shtnum0);
      sf_setd(as,shtint*dx);
      sf_seto(as,shtbgn*dx+ox);
      
      if (adj) { /* migration */
	if(!wantrecord) {
	  sf_oaxa(Frcd, at, 1);
	  sf_oaxa(Frcd, ax, 2);
	  sf_oaxa(Frcd, as, 3);
	  sf_settype(Frcd,SF_COMPLEX);	
	}
	sf_setn(ax, nx);
	/*write image*/
	sf_oaxa(Fimg, az, 1);
	sf_oaxa(Fimg, ax, 2);
	sf_settype(Fimg,SF_COMPLEX);
      } else { /* modeling */
	sf_oaxa(Frcd, at, 1);
	sf_oaxa(Frcd, ax, 2);
	sf_oaxa(Frcd, as ,3);
	sf_settype(Frcd,SF_COMPLEX);
      }
      
      if (wantwf) {
	sf_setn(ax, nx);
	/*write temp wavefield */
	sf_setn(at, wfnt);
	sf_setd(at, wfdt);
	
	sf_oaxa(Ftmpwf, az, 1);
	sf_oaxa(Ftmpwf, ax, 2);
	sf_oaxa(Ftmpwf, at, 3);
	sf_settype(Ftmpwf,SF_COMPLEX);
      }
    }
    
    tstart = clock();

    for (is=0; is*nodes<shtnum; is++){

      shtcur = is*nodes+rank; // current shot index

      if (shtcur<shtnum0) {
	spx = shtbgn + shtint*(shtcur);
	if (roll)
	  gpx = spx - (int)(gpl/2);
	else
	  gpx = 0;
	geop->spx = spx;
	geop->gpx = gpx;
	
	if (verb) {
	  sf_warning("============================");
	  sf_warning("processing shot #%d", shtcur);
	  sf_warning("nx=%d nz=%d nt=%d", geop->nx, geop->nz, geop->nt);
	  sf_warning("nxb=%d nzb=%d ", geop->nxb, geop->nzb);
	  sf_warning("dx=%f dz=%f dt=%f", geop->dx, geop->dz, geop->dt);
	  sf_warning("top=%d bot=%d lft=%d rht=%d", geop->top, geop->bot, geop->lft, geop->rht);
	  sf_warning("rectz=%d rectx=%d repeat=%d srctrunc=%f",rectz,rectx,repeat,geop->trunc);
	  sf_warning("spz=%d spx=%d gpz=%d gpx=%d gpl=%d", spz, spx, gpz, gpx, gpl);
	  sf_warning("snpint=%d wfdt=%f wfnt=%d ", snpint, wfdt, wfnt);
	  sf_warning("sht0=%d shtbgn=%d shtend=%d shtnum0=%d shtnum=%d", sht0, shtbgn, shtend, shtnum0, shtnum);
	  if (roll) sf_warning("Rolling survey!");
	  else sf_warning("Global survey (gpl=nx)!");
	  if (illum) sf_warning("Using source illumination!");
	  else sf_warning("No source illumination!");
	  sf_warning("============================");
	}
	
	/*generate reflectivity map*/
	reflgen(nzb, nxb, spz+top, spx+lft, rectz, rectx, repeat, rr);
	
	lrosfor2(wavefld, sill, tmprec, verb, lt, rt, m2, geop, ww, rr, pad1, illum);
      }

      if(adj && wantrecord) {
	if (rank==0) sendbuf = record[is*nodes][0];
	else sendbuf = NULL;
	recvbuf = tmprec[0];
	MPI_Scatter(sendbuf, gpl*nt, MPI_COMPLEX, recvbuf, gpl*nt, MPI_COMPLEX, 0, MPI_COMM_WORLD); // tmprec[ix][it] = record[is][ix][it];
      }
      
      if (shtcur<shtnum0) {
	lrosback2(img, wavefld, sill, tmprec, adj, verb, wantwf, ltb, rtb, m2b, geop, pad1, illum);
	if (adj) { /*local image reduction*/
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
	  for (ix=0; ix<nx; ix++) {
	    for (iz=0; iz<nz; iz++) {
#ifdef SF_HAS_COMPLEX_H
	      imgsum[ix][iz] += img[ix][iz];
#else
	      imgsum[ix][iz] = sf_cadd(imgsum[ix][iz],img[ix][iz]);
#endif      
	    }
	  }
	}
      }

      if (!adj || !wantrecord) {
	//	MPI_Barrier(MPI_COMM_WORLD);
	if (rank==0) recvbuf = record[is*nodes][0];
	else recvbuf = NULL;
	sendbuf = tmprec[0];
	MPI_Gather(sendbuf, gpl*nt, MPI_COMPLEX, recvbuf, gpl*nt, MPI_COMPLEX, 0, MPI_COMM_WORLD); // record[is][ix][it] = tmprec[ix][it];
      }

      if (wantwf && shtcur==0)
	sf_complexwrite(wavefld[0][0], wfnt*nx*nz, Ftmpwf);
    } /*shot iteration*/

    MPI_Barrier(MPI_COMM_WORLD);
    /*write record/image*/
    if (adj) {
      if (rank==0) {
#if MPI_VERSION >= 2
	sendbuf = (sf_complex *) MPI_IN_PLACE;
#else /* will fail */
	sendbuf = NULL;
#endif 
	recvbuf = imgsum[0];
      } else {
	sendbuf = imgsum[0];
      	recvbuf = NULL;
      }
      MPI_Reduce(sendbuf, recvbuf, nx*nz, MPI_COMPLEX, MPI_SUM, 0, MPI_COMM_WORLD); 
      if (rank==0)
	sf_complexwrite(imgsum[0], nx*nz, Fimg);
    }

    if (!adj || !wantrecord) {
      if (rank==0)
	sf_complexwrite(record[0][0], shtnum0*gpl*nt, Frcd);
    }

    /*free memory*/
    free(ww); free(rr);
    free(*lt); free(lt);
    free(*rt); free(rt);
    free(*ltb);free(ltb);
    free(*rtb);free(rtb);
    free(geop);free(mpip);
    free(*tmprec); free(tmprec);
    if (rank==0) {free(**record); free(*record); free(record);}
    free(**wavefld); free(*wavefld); free(wavefld);
    if (illum) {
      free(*sill); free(sill);
    }
    free(*img); free(img);
    if (adj) {
      free(*imgsum); free(imgsum);
    }

    tend = clock();
    duration=(double)(tend-tstart)/CLOCKS_PER_SEC;
    sf_warning(">> The CPU time of single shot migration is: %f seconds << ", duration);

    MPI_Finalize();
    exit(0);
}