Ejemplo n.º 1
0
int main( int argc, char ** argv) {

  INTS ierr;
  int me, NTasks, required, provided;
  INTS id, m;
  /* CSC Data */
  INTS n, dof;
  INTL nnzeros, edgenbr;
  COEF val;
  /* Local Data */
  INTS localnodenbr;
  INTS * nodelist;
  INTS new_localnodenbr;
  INTS * new_nodelist;
  INTS root;
  INTS base;
  COEF * lrhs;
  COEF * globrhs;
  COEF * globrhs_recv;
  COEF * globx;
  COEF * globprod;
  /* Other data */
  COEF * matElem;
  double prec, xmin, xmax, sum1, sum2;
  INTS i, j, k;
  INTS solver;
  INTS zero=0;
  INTS one=1;
  INTS nb_threads;
  INTS   id_seq;

  root = -1;
  base = 1;

  required=MPI_THREAD_MULTIPLE;
  MPI_Init_thread(&argc, &argv, required, &provided);

  MPI_Comm_size(MPI_COMM_WORLD, &NTasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  n = dof = 0;
  if (argc >= 2) {
    n = atoi(argv[1]);
    dof = atoi(argv[2]);
  } else {
    if (me == 0) {
      fprintf(stderr, "Usage: %s <size> <DofNumber>\n", argv[0]);
      return 1;
    }
  }

  xmin = 0.0;
  xmax = 1.0;

  /* Starting MURGE*/
  ierr = MURGE_Initialize(one);
  if (ierr != MURGE_SUCCESS) {
    fprintf(stderr, "Error %ld in MURGE_Initialize\n", (long)ierr);
    return 1;
  }
  id = 0;

  /* Set Options */
  prec = 1e-7;
  /*
    Call MURGE_Get_Solver(solver)
  */
  solver = MURGE_SOLVER_PASTIX;




  if ( solver == MURGE_SOLVER_PASTIX ) {
    MURGE_SetDefaultOptions(id, zero);
    MURGE_SetOptionINT(id, IPARM_VERBOSE, API_VERBOSE_NO);
    MURGE_SetOptionINT(id, IPARM_MATRIX_VERIFICATION, API_YES);
    nb_threads = 1;
#ifdef _OPENMP
#pragma omp parallel shared(nb_threads)
    {
      nb_threads = omp_get_num_threads();
    }
#endif /* _OPENMP */

    if (me == 0) {
      fprintf(stdout, "Running on %ld threads and %d MPI Tasks\n",
              (long)nb_threads, NTasks);
    }
    MURGE_SetOptionINT(id, IPARM_THREAD_NBR, nb_threads);
  } else if (solver == MURGE_SOLVER_HIPS) {
#ifdef HIPS
    if ( method == 1 ) {
      MURGE_SetDefaultOptions(id, HIPS_ITERATIVE);
    } else {
      MURGE_SetDefaultOptions(id, HIPS_HYBRID);
      MURGE_SetOptionINT(id, HIPS_PARTITION_TYPE, zero);
      MURGE_SetOptionINT(id, HIPS_DOMSIZE, domsize);
    }
    MURGE_SetOptionINT(id, HIPS_SYMMETRIC, zero);
    MURGE_SetOptionINT(id, HIPS_LOCALLY, zero);
    MURGE_SetOptionINT(id, HIPS_ITMAX, itmax);
    MURGE_SetOptionINT(id, HIPS_KRYLOV_RESTART, restart);
    MURGE_SetOptionINT(id, HIPS_VERBOSE, verbose);
    MURGE_SetOptionINT(id, HIPS_DOMNBR, NTasks);
    MURGE_SetOptionINT(id, HIPS_CHECK_GRAPH, one);
    MURGE_SetOptionINT(id, HIPS_CHECK_MATRIX, one);
#endif
  }
  MURGE_SetOptionINT(id, MURGE_IPARAM_DOF, dof);
  MURGE_SetOptionINT(id, MURGE_IPARAM_SYM, MURGE_BOOLEAN_FALSE);
  MURGE_SetOptionINT(id, MURGE_IPARAM_BASEVAL, base);

  MURGE_SetOptionREAL(id, MURGE_RPARAM_EPSILON_ERROR, prec);
  /* Set the graph : all processor enter some edge of the
     graph that corresponds to non-zeros location in the matrix */

  /****************************************
   ** Enter the matrix non-zero pattern  **
   ** you can use any distribution       **
   ****************************************/

  /* this processor enters the A(myfirstrow:mylastrow, *)
     part of the matrix non-zero pattern */
  if (me == 0) {
    edgenbr = 3*n-4;

    MURGE_GraphBegin(id, n, edgenbr);

    /* Dirichlet boundary condition */
    MURGE_GraphEdge(id, one, one);
    MURGE_GraphEdge(id, n, n);

    /* Interior */
    for (i = 2; i < n; i++) {
      for (j = -1; j <= 1; j++) {
        MURGE_GraphEdge(id, i, i+j);
        /* if (j != 0) {
           MURGE_GraphEdge(id, j+i, i);
           } */
      }
    }
  } else {
    edgenbr = 0;
    MURGE_GraphBegin(id, n, edgenbr);
  }
  MURGE_GraphEnd(id);


  /*  Get Local nodes */
  MURGE_GetLocalNodeNbr(id, &localnodenbr);
  nodelist = (INTS*)malloc(localnodenbr*sizeof(INTS));
  MURGE_GetLocalNodeList(id, nodelist);

  new_localnodenbr = localnodenbr;
  new_nodelist     = nodelist;

  if (NTasks > 1)
    {
      /* move column 1 to next proc */
      int    found = 0;
      int    owner, owner_rcv, index;

      for (m = 0; m < localnodenbr; m++) {
        i = nodelist[m];
        if (i == 1)
          found = 1;
      }
      if (found == 1)
        owner = me;
      else
        owner = 0;
      MPI_Allreduce(&owner, &owner_rcv, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
      if (owner_rcv == me)
        {
          new_localnodenbr--;
          new_nodelist = (INTS*)malloc(new_localnodenbr*sizeof(INTS));
          index = 0;
          for (m = 0; m < localnodenbr; m++)
            {
              i = nodelist[m];
              if (i != 1)
                new_nodelist[index++] = nodelist[m];
            }
        }
      if (owner_rcv == (me+1)%NTasks)
        {
          index = 0;
          new_localnodenbr++;
          new_nodelist = (INTS*)malloc(new_localnodenbr*sizeof(INTS));
          new_nodelist[index++] = 1;
          for (m = 0; m < localnodenbr; m++)
            {
              new_nodelist[index++] = nodelist[m];
            }
        }
    }
  /* Compute the number of non-zeros; */
  nnzeros = 0;
  for (m = 0; m < new_localnodenbr; m++) {
    i = new_nodelist[m];

    if (i == 1 || i == n) {
      /*  Boundaries */
      nnzeros = nnzeros + 1;
    } else {
      /*  Interior */
      for (k = -1; k <= 1; k++) {
        nnzeros = nnzeros + 1;
      }
    }
  }
  /*  We are using dof so a non zero is in fact a block of size dof**2 */
  edgenbr = nnzeros;
  nnzeros = nnzeros * dof*dof;

  /* You can enter only coefficient (i,j) that are in A(nodelist, nodelist)
     on this processor */

  /* We enter the lower and upper triangular part of the matrix so sym = 0 */

  /* matElem is the identity matrix of size 'dof' stored by line */
  {
    INTS * ROWs  = malloc(edgenbr*sizeof(INTS));
    INTS * COLs  = malloc(edgenbr*sizeof(INTS));
    int    index = 0;

    for (m = 0; m < new_localnodenbr; m++) {
      i = new_nodelist[m];
      if ( i == 1 || i == n ) {
        /*  Boundaries */
        ROWs[index] = i;
        COLs[index] = i;
        index++;
      } else {
        for (k = -1; k <= 1; k++) {
          ROWs[index] = i+k;
          COLs[index] = i;
          index++;
        }
      }
    }
    ierr = MURGE_AssemblySetSequence(id, edgenbr, ROWs, COLs,
                                     MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_OVW,
                                     MURGE_ASSEMBLY_FOOL, MURGE_BOOLEAN_TRUE,
                                     &id_seq);
    if (ierr != MURGE_SUCCESS) {
      fprintf(stderr, "Error %ld in MURGE_AssemblySetSequence\n", (long)ierr);
      exit(1);
    }
    free(ROWs);
    free(COLs);
  }

  {
    COEF * values = malloc(nnzeros*sizeof(COEF));
    int    index  = 0;
    matElem = (COEF*)malloc(dof*dof*sizeof(COEF));
    for (m = 0; m < new_localnodenbr; m++) {
      i = new_nodelist[m];
      if ( i == 1 || i == n ) {
        /*  Boundaries */
        GetCoef(matElem, i,i,xmin,xmax,n, dof);
        memcpy(&(values[dof*dof*index]),
               matElem, dof*dof*sizeof(COEF));
        index++;
      } else {
        for (k = -1; k <= 1; k++) {
          GetCoef(matElem,i+k,i,xmin,xmax,n, dof);
          memcpy(&(values[dof*dof*index]),
                 matElem, dof*dof*sizeof(COEF));
          index++;
        }
      }
    }

    free(matElem);
    ierr = MURGE_AssemblyUseSequence(id, id_seq, values);
    if (ierr != MURGE_SUCCESS) {
      fprintf(stderr, "Error %ld in MURGE_AssemblySetSequence\n", (long)ierr);
      exit(1);
    }
    free(values);
  }

  /* We build the rhs */
  lrhs = (COEF*)malloc(localnodenbr*dof*sizeof(COEF));
  globrhs = (COEF*)malloc(n*dof*sizeof(COEF));
  for (k = 0; k < n*dof; k++)
    globrhs[k] = 0.0;

  for (m = 0; m < localnodenbr; m++) {
    GetRhs(&val,nodelist[m],xmin,xmax,n);
    for (k = 0; k < dof; k++)
      globrhs[(nodelist[m]-1)*dof+k] = val;
    for (k = 0; k < dof; k++)
      lrhs[m*dof+k] = val;
  }

  globrhs_recv = (COEF*)malloc(n*dof*sizeof(COEF));
  MPI_Allreduce(globrhs, globrhs_recv, n*dof, MURGE_MPI_COEF,
                MPI_SUM, MPI_COMM_WORLD);
  free(globrhs);
  MURGE_SetLocalRHS(id, lrhs, MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_OVW);

  /* Get the global solution */
  globx = (COEF*)malloc(n*dof*sizeof(COEF));
  MURGE_GetGlobalSolution(id, globx, root);

  MURGE_SetGlobalRHS(id, globx, -one, MURGE_ASSEMBLY_OVW);
  globprod = (COEF*)malloc(n*dof*sizeof(COEF));
  MURGE_GetGlobalProduct(id, globprod, -one);
  sum1 = 0;
  sum2 = 0;
  for (k = 0; k < n*dof; k++) {
    sum1 += globprod[k]*globprod[k];
    sum2 += (globprod[k] - globrhs_recv[k])*(globprod[k]-globrhs_recv[k]);
  }
  fprintf(stdout, "||AX - B||/||AX||  : %.15g\n", sqrt(sum2/sum1));

  /* Store in a file */
  if (me == 0)
    store(globx,xmin,xmax,n,dof);

  {
    int iter;
    INTS  n_coefs = n/NTasks;
    INTS *coef_idx;
    COEF *coef_vals;

    if (me < n%NTasks)
      n_coefs++;

    fprintf(stdout, "Now using MURGE_SetRHS and MURGE_ASSEMBLY_FOOL\n");
    coef_idx  = malloc(n_coefs*sizeof(INTS));
    coef_vals = malloc(n_coefs*dof*sizeof(COEF));
    /* cyclic distribution of RHS */
    for (iter = 0; iter < n_coefs; iter++)
      {
        coef_idx[iter]  = me + iter*NTasks + 1; /* baseval == 1 */
        for (k = 0; k < dof; k++)
          coef_vals[iter*dof+k] = globrhs_recv[(me + iter*NTasks)*dof + k];

      }

    MURGE_SetRHS(id, n_coefs, coef_idx, coef_vals, MURGE_ASSEMBLY_OVW,
                 MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_FOOL);

    free(coef_vals);
    free(coef_idx);
    MURGE_GetGlobalSolution(id, globx, root);

    MURGE_SetGlobalRHS(id, globx, -one, MURGE_ASSEMBLY_OVW);
    MURGE_GetGlobalProduct(id, globprod, -one);
    sum1 = 0;
    sum2 = 0;
    for (k = 0; k < n*dof; k++) {
      sum1 += globprod[k]*globprod[k];
      sum2 += (globprod[k] - globrhs_recv[k])*(globprod[k]-globrhs_recv[k]);
    }
    fprintf(stdout, "||AX - B||/||AX||  : %.15g\n", sqrt(sum2/sum1));
  }

  /* I'm Free  */
  MURGE_Clean(id);
  MURGE_Finalize();
  MPI_Finalize();

  free(nodelist);
  free(lrhs);
  free(globx);
  free(globprod);
  free(globrhs_recv);
  return 0;
}
int main(int argc, char **argv) {
	int ret;
	int provided = 0;
	struct mdhim_t *md;
	uint32_t key, key2, **secondary_keys, **secondary_keys2;
	int value, *secondary_key_lens, *secondary_key_lens2;
	struct mdhim_brm_t *brm;
	struct mdhim_bgetrm_t *bgrm;
        mdhim_options_t *db_opts;
	struct index_t *secondary_local_index, *secondary_local_index2;
	struct secondary_info *secondary_info, *secondary_info2;
	MPI_Comm comm;

	ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	if (ret != MPI_SUCCESS) {
		printf("Error initializing MPI with threads\n");
		exit(1);
	}

	if (provided != MPI_THREAD_MULTIPLE) {
                printf("Not able to enable MPI_THREAD_MULTIPLE mode\n");
                exit(1);
        }
        
        db_opts = mdhim_options_init();
        mdhim_options_set_db_path(db_opts, "./");
        mdhim_options_set_db_name(db_opts, "mdhimTstDB");
        mdhim_options_set_db_type(db_opts, LEVELDB);
        mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int)
	mdhim_options_set_debug_level(db_opts, MLOG_CRIT);

	comm = MPI_COMM_WORLD;
	md = mdhimInit(&comm, db_opts);
	if (!md) {
		printf("Error initializing MDHIM\n");
		exit(1);
	}	
	
	//Put the primary keys and values
	key = 100 * (md->mdhim_rank + 1);
	value = 500 * (md->mdhim_rank + 1);
	
	secondary_keys = malloc(sizeof(uint32_t *));		
	secondary_keys[0] = malloc(sizeof(uint32_t));
	*secondary_keys[0] = md->mdhim_rank + 1;
	secondary_key_lens = malloc(sizeof(int));
	secondary_key_lens[0] = sizeof(uint32_t);
	
	secondary_keys2 = malloc(sizeof(uint32_t *));		
	secondary_keys2[0] = malloc(sizeof(uint32_t));
	*secondary_keys2[0] = md->mdhim_rank + 1;
	secondary_key_lens2 = malloc(sizeof(int));
	secondary_key_lens2[0] = sizeof(uint32_t);

	//Create a secondary index on only one range server
	secondary_local_index = create_local_index(md, LEVELDB, 
						   MDHIM_INT_KEY);
	secondary_local_index2 = create_local_index(md, LEVELDB, 
						    MDHIM_INT_KEY);
	secondary_info = mdhimCreateSecondaryInfo(secondary_local_index, 
						  (void **) secondary_keys, 
						  secondary_key_lens, 1, 
						  SECONDARY_LOCAL_INFO);
	secondary_info2 = mdhimCreateSecondaryInfo(secondary_local_index2, 
						   (void **) secondary_keys2, 
						   secondary_key_lens2, 1, 
						   SECONDARY_LOCAL_INFO);
	brm = mdhimPut(md, 
		       &key, sizeof(key), 
		       &value, sizeof(value), 
		       NULL, secondary_info);
	if (!brm || brm->error) {
		printf("Error inserting key/value into MDHIM\n");
	} else {
		printf("Successfully inserted key/value into MDHIM\n");
	}

	//Release the received message
	mdhim_full_release_msg(brm);

	//Insert a new key with the second secondary key
	key2 = 200 * (md->mdhim_rank + 1);
	brm = mdhimPut(md, 
		       &key2, sizeof(key2), 
		       &value, sizeof(value), 
		       NULL, secondary_info2);
	if (!brm || brm->error) {
		printf("Error inserting key/value into MDHIM\n");
	} else {
		printf("Successfully inserted key/value into MDHIM\n");
	}

	//Release the received message
	mdhim_full_release_msg(brm);

	//Commit the database
	ret = mdhimCommit(md, md->primary_index);
	if (ret != MDHIM_SUCCESS) {
		printf("Error committing MDHIM database\n");
	} else {
		printf("Committed MDHIM database\n");
	}

	//Get the stats for the secondary index so the client figures out who to query
	ret = mdhimStatFlush(md, secondary_local_index);
	if (ret != MDHIM_SUCCESS) {
		printf("Error getting stats\n");
	} else {
		printf("Got stats\n");
	}
	//Get the stats for the secondary index so the client figures out who to query
	ret = mdhimStatFlush(md, secondary_local_index2);
	if (ret != MDHIM_SUCCESS) {
		printf("Error getting stats\n");
	} else {
		printf("Got stats\n");
	}

	//Get the primary key values from the secondary local key
	value = 0;
	bgrm = mdhimGet(md, secondary_local_index, 
			secondary_keys[0], 
			secondary_key_lens[0], 
			MDHIM_GET_PRIMARY_EQ);
	if (!bgrm || bgrm->error) {
		printf("Error getting value for key: %d from MDHIM\n", key);
	} else if (bgrm->value_lens[0]) {
		printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0]));
	}

	mdhim_full_release_msg(bgrm);

	//Get the primary key values from the secondary local key
	value = 0;
	bgrm = mdhimGet(md, secondary_local_index2, secondary_keys2[0], 
			secondary_key_lens2[0], 
			MDHIM_GET_PRIMARY_EQ);
	if (!bgrm || bgrm->error) {
		printf("Error getting value for key: %d from MDHIM\n", key);
	} else if (bgrm->value_lens[0]) {
		printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0]));
	}

	mdhim_full_release_msg(bgrm);

	ret = mdhimClose(md);
	free(secondary_keys[0]);
	free(secondary_keys);
	free(secondary_key_lens);
	free(secondary_keys2[0]);
	free(secondary_keys2);
	free(secondary_key_lens2);
	mdhim_options_destroy(db_opts);
	mdhimReleaseSecondaryInfo(secondary_info);
	mdhimReleaseSecondaryInfo(secondary_info2);
	if (ret != MDHIM_SUCCESS) {
		printf("Error closing MDHIM\n");
	}

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();

	return 0;
}
Ejemplo n.º 3
0
int main(int argc, char **argv) {
	int ret;
	int provided = 0;
	struct mdhim_t *md;
	int key;
	int value;
	struct mdhim_rm_t *rm;
	struct mdhim_getrm_t *grm;
	char     *db_path = "./";
	char     *db_name = "mdhimTstDB-";
	int      dbug = MLOG_DBG;
	db_options_t *db_opts; // Local variable for db create options to be passed
	int db_type = 2; //UNQLITE=1, LEVELDB=2 (data_store.h) 

	// Create options for DB initialization
	db_opts = db_options_init();
	db_options_set_path(db_opts, db_path);
	db_options_set_name(db_opts, db_name);
	db_options_set_type(db_opts, db_type);
	db_options_set_key_type(db_opts, MDHIM_INT_KEY);
	db_options_set_debug_level(db_opts, dbug);
	ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	if (ret != MPI_SUCCESS) {
		printf("Error initializing MPI with threads\n");
		exit(1);
	}

	if (provided != MPI_THREAD_MULTIPLE) {
                printf("Not able to enable MPI_THREAD_MULTIPLE mode\n");
                exit(1);
        }

	md = mdhimInit(MPI_COMM_WORLD, db_opts);
	if (!md) {
		printf("Error initializing MDHIM\n");
		exit(1);
	}	

	//Put the keys and values
	key = 20 * (md->mdhim_rank + 1);
	value = 1000 * (md->mdhim_rank + 1);
	rm = mdhimPut(md, &key, sizeof(key), 
		       &value, sizeof(value));
	if (!rm || rm->error) {
		printf("Error inserting key/value into MDHIM\n");
	} else {
		printf("Successfully inserted key/value into MDHIM\n");
	}

	rm = mdhimDelete(md, &key, sizeof(key));
	if (!rm || rm->error) {
		printf("Error deleting key/value from MDHIM\n");
	} else {
		printf("Successfully deleted key/value into MDHIM\n");
	}

	//Get the values
	value = 0;
	grm = mdhimGet(md, &key, sizeof(key), MDHIM_GET_EQ);
	if (!grm || grm->error) {
		printf("Error getting value for key: %d from MDHIM\n", key);
	} else if (grm->value_len) {
		printf("Successfully got value: %d from MDHIM\n", *((int *) grm->value));
	}

	ret = mdhimClose(md);
	if (ret != MDHIM_SUCCESS) {
		printf("Error closing MDHIM\n");
	}

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();

	return 0;
}
Ejemplo n.º 4
0
int main(int argc, char* argv[])
{
  bool verb;        
  int it,iz,im,ikz,ikx,iky,ix,iy,i,j,snap;     /* index variables */
  int nt,nz,nx,ny, m2, nk, nzx, nz2, nx2, ny2, nzx2, n2, pad1;
  float dt;
  sf_complex c;

  float  *rr;      /* I/O arrays*/
  sf_complex *cwave, *cwavem, *ww;
  sf_complex **wave, *curr;
  float *rcurr, *rcurr_all;

  sf_file Fw,Fr,Fo;    /* I/O files */
  sf_axis at,az,ax,ay;    /* cube axes */

  sf_complex **lt, **rt;
  sf_file left, right, snaps;

  /*MPI related*/
  int cpuid,numprocs;
  int provided;
  int n_local, o_local;
  int ozx2;
  float *sendbuf, *recvbuf;
  int *rcounts, *displs;

  MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
  threads_ok = provided >= MPI_THREAD_FUNNELED;

  sf_init(argc,argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

  if(!sf_getbool("verb",&verb)) verb=true; /* verbosity */

  /* setup I/O files */
  Fw = sf_input ("--input" );
  Fo = sf_output("--output");
  Fr = sf_input ("ref");

  /* Read/Write axes */
  at = sf_iaxa(Fw,1); nt = sf_n(at); dt = sf_d(at); 
  az = sf_iaxa(Fr,1); nz = sf_n(az); 
  ax = sf_iaxa(Fr,2); nx = sf_n(ax); 
  ay = sf_iaxa(Fr,3); ny = sf_n(ay); 

  if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

  if (!sf_getint("snap",&snap)) snap=0;
  /* interval for snapshots */
    
  if (cpuid==0) {

    sf_oaxa(Fo,az,1); 
    sf_oaxa(Fo,ax,2);
    sf_oaxa(Fo,ay,3);
    
    sf_settype(Fo,SF_FLOAT);

    if (snap > 0) {
      snaps = sf_output("snaps");
      /* (optional) snapshot file */
	
      sf_oaxa(snaps,az,1); 
      sf_oaxa(snaps,ax,2);
      sf_oaxa(snaps,ay,3);
      sf_oaxa(snaps,at,4);
      sf_settype(snaps,SF_FLOAT);
      sf_putint(snaps,"n4",nt/snap);
      sf_putfloat(snaps,"d4",dt*snap);
      sf_putfloat(snaps,"o4",0.);
    } else {
      snaps = NULL;
    }

  }

  //nk = cfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2);
  //n_local = ny2;
  //o_local = 0;
  nk = mcfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2,&n_local,&o_local);
  sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d",cpuid,nz2,nx2,ny2,n_local,o_local);

  nzx = nz*nx*ny;
  //nzx2 = nz2*nx2*ny2;
  nzx2 = n_local*nz2*nx2;
  ozx2 = o_local*nz2*nx2;

  /* propagator matrices */
  left = sf_input("left");
  right = sf_input("right");

  if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
  if (!sf_histint(left,"n2",&m2))  sf_error("Need n2=%d in left",m2);
    
  if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
  if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
  lt = sf_complexalloc2(nzx,m2);
  rt = sf_complexalloc2(m2,nk);

  sf_complexread(lt[0],nzx*m2,left);
  sf_complexread(rt[0],m2*nk,right);

  /* read wavelet & reflectivity */
  ww=sf_complexalloc(nt);  sf_complexread(ww,nt ,Fw);
  rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr);

  curr = sf_complexalloc(nzx2);
  rcurr= sf_floatalloc(nzx2);

  cwave  = sf_complexalloc(nzx2);
  cwavem = sf_complexalloc(nzx2);
  wave = sf_complexalloc2(nzx2,m2);

  //icfft3_allocate(cwavem);

  for (iz=0; iz < nzx2; iz++) {
    curr[iz]=sf_cmplx(0.,0.);
    rcurr[iz]=0.;
  }

  sendbuf = rcurr;
  if (cpuid==0) {
    rcurr_all = sf_floatalloc(nz2*nx2*ny2);
    recvbuf = rcurr_all;
    rcounts = sf_intalloc(numprocs);
    displs  = sf_intalloc(numprocs);
  } else {
    rcurr_all = NULL;
    recvbuf = NULL;
    rcounts = NULL;
    displs = NULL;
  }

  MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* MAIN LOOP */
  for (it=0; it<nt; it++) {
    if(verb) sf_warning("it=%d;",it);

    /* matrix multiplication */
    mcfft3(curr,cwave);

    for (im = 0; im < m2; im++) {
      for (iky = 0; iky < n_local; iky++) {
        for (ikx = 0; ikx < nx2; ikx++) {
          for (ikz = 0; ikz < nz2; ikz++) {
            i = ikz + ikx*nz2 + (o_local+iky)*nx2*nz2;
            j = ikz + ikx*nz2 + iky*nx2*nz2;
#ifdef SF_HAS_COMPLEX_H
            cwavem[j] = cwave[j]*rt[i][im];
#else
            cwavem[j] = sf_cmul(cwave[j],rt[i][im]);
#endif
          }
        }
      }
      imcfft3(wave[im],cwavem);
    }

    for (iy = 0; iy < n_local && (iy+o_local)<ny; iy++) {
      for (ix = 0; ix < nx; ix++) {
        for (iz=0; iz < nz; iz++) {
          i = iz + ix*nz + (o_local+iy)*nx*nz;  /* original grid */
          j = iz + ix*nz2+ iy*nx2*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H		
          c = ww[it] * rr[i];
#else
          c = sf_crmul(ww[it],rr[i]);
#endif

          for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
            c += lt[im][i]*wave[im][j];
#else
            c += sf_cmul(lt[im][i],wave[im][j]);
#endif
          }
		    
          curr[j] = c;
          rcurr[j]= crealf(c);
        }
      }
    }

    /* output movie */
    if (NULL != snaps && 0 == it%snap) {
      MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);

      if (cpuid==0) {
        for (iy = 0; iy < ny; iy++)
          for (ix = 0; ix < nx; ix++)
            sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,snaps);
      }
    }

  }
  if(verb) sf_warning(".");    
	    	
  /* write wavefield to output */
  MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
  if (cpuid==0) {
    for (iy = 0; iy < ny; iy++)
      for (ix = 0; ix < nx; ix++)
        sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,Fo);
  }
    
  mcfft3_finalize();

  MPI_Finalize();
  exit (0);
}
Ejemplo n.º 5
0
/*
   Initialize MTest, initializing MPI if necessary.

 Environment Variables:
+ MPITEST_DEBUG - If set (to any value), turns on debugging output
. MPITEST_THREADLEVEL_DEFAULT - If set, use as the default "provided"
                                level of thread support.  Applies to
                                MTest_Init but not MTest_Init_thread.
- MPITEST_VERBOSE - If set to a numeric value, turns on that level of
  verbose output.  This is used by the routine 'MTestPrintfMsg'

*/
void MTest_Init_thread(int *argc, char ***argv, int required, int *provided)
{
    int flag;
    char *envval = 0;

    MPI_Initialized(&flag);
    if (!flag) {
        /* Permit an MPI that claims only MPI 1 but includes the
         * MPI_Init_thread routine (e.g., IBM MPI) */
#if MPI_VERSION >= 2 || defined(HAVE_MPI_INIT_THREAD)
        MPI_Init_thread(argc, argv, required, provided);
#else
        MPI_Init(argc, argv);
        *provided = -1;
#endif
    }
    /* Check for debugging control */
    if (getenv("MPITEST_DEBUG")) {
        dbgflag = 1;
        MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    }

    /* Check for verbose control */
    envval = getenv("MPITEST_VERBOSE");
    if (envval) {
        char *s;
        long val = strtol(envval, &s, 0);
        if (s == envval) {
            /* This is the error case for strtol */
            fprintf(stderr, "Warning: %s not valid for MPITEST_VERBOSE\n", envval);
            fflush(stderr);
        }
        else {
            if (val >= 0) {
                verbose = val;
            }
            else {
                fprintf(stderr, "Warning: %s not valid for MPITEST_VERBOSE\n", envval);
                fflush(stderr);
            }
        }
    }
    /* Check for option to return success/failure in the return value of main */
    envval = getenv("MPITEST_RETURN_WITH_CODE");
    if (envval) {
        if (strcmp(envval, "yes") == 0 ||
            strcmp(envval, "YES") == 0 ||
            strcmp(envval, "true") == 0 || strcmp(envval, "TRUE") == 0) {
            returnWithVal = 1;
        }
        else if (strcmp(envval, "no") == 0 ||
                 strcmp(envval, "NO") == 0 ||
                 strcmp(envval, "false") == 0 || strcmp(envval, "FALSE") == 0) {
            returnWithVal = 0;
        }
        else {
            fprintf(stderr, "Warning: %s not valid for MPITEST_RETURN_WITH_CODE\n", envval);
            fflush(stderr);
        }
    }

    /* Print rusage data if set */
    if (getenv("MPITEST_RUSAGE")) {
        usageOutput = 1;
    }
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
	int err,errarg;
	int threadsupport,threadsupport_t;
	int rank;
	int opt,erropt;
	int reqthread=MPI_THREAD_MULTIPLE;

	/* Read options */

	verbosity=MPI_T_VERBOSITY_MPIDEV_ALL;
	list_pvar=1;
	list_cvar=1;
	longlist=0;
	runmpi=1;
	errarg=0;

	while ((opt=getopt(argc,argv, "hv:pclim")) != -1 ) {
		switch (opt) {
		case 'h':
			errarg=-1;
			break;
		case 'v':
			switch (atoi(optarg)) {
			case 1: verbosity=MPI_T_VERBOSITY_USER_BASIC; break;
			case 2: verbosity=MPI_T_VERBOSITY_USER_DETAIL; break;
			case 3: verbosity=MPI_T_VERBOSITY_USER_ALL; break;
			case 4: verbosity=MPI_T_VERBOSITY_TUNER_BASIC; break;
			case 5: verbosity=MPI_T_VERBOSITY_TUNER_DETAIL; break;
			case 6: verbosity=MPI_T_VERBOSITY_TUNER_ALL; break;
			case 7: verbosity=MPI_T_VERBOSITY_MPIDEV_BASIC; break;
			case 8: verbosity=MPI_T_VERBOSITY_MPIDEV_DETAIL; break;
			case 9: verbosity=MPI_T_VERBOSITY_MPIDEV_ALL; break;
			}
			break;
			case 'p':
				list_pvar=1;
				list_cvar=0;
				break;
			case 'c':
				list_cvar=1;
				list_pvar=0;
				break;
			case 'l':
				longlist=1;
				break;
			case 'm':
				runmpi=0;
				break;
			default:
				errarg=1;
				erropt=opt;
				break;
		}
	}

	/* Initialize */

	if (runmpi)
	{
		err=MPI_Init_thread(&argc,&argv,reqthread,&threadsupport);
		CHECKERR("Init",err);

		err=MPI_Comm_rank(MPI_COMM_WORLD,&rank);
		CHECKERR("Rank",err);
	}
	else
		rank=0;


	/* ONLY FOR RANK 0 */

	if (rank==0)
	{
		err=MPI_T_init_thread(reqthread, &threadsupport_t);
		CHECKERR("T_Init",err);

		if (errarg)
		{
			if (errarg>0)
				printf("Argument error: %c\n",erropt);
			usage(errarg!=-1);
		}


		/* Header */

		printf("MPI_T Variable List\n");

		if (runmpi)
		{
			/* Print thread support for MPI */

			printf("  MPI Thread support: ");
			switch (threadsupport) {
			case MPI_THREAD_SINGLE:
				printf("MPI_THREAD_SINGLE\n");
				break;
			case MPI_THREAD_FUNNELED:
				printf("MPI_THREAD_FUNNELED\n");
				break;
			case MPI_THREAD_SERIALIZED:
				printf("MPI_THREAD_SERIALIZED\n");
				break;
			case MPI_THREAD_MULTIPLE:
				printf("MPI_THREAD_MULTIPLE\n");
				break;
			default:
				printf("unknown (%i)\n",threadsupport);
			}
		}

		/* Print thread support for MPI_T */

		printf("  MPI_T Thread support: ");
		switch (threadsupport_t) {
		case MPI_THREAD_SINGLE:
			printf("MPI_THREAD_SINGLE\n");
			break;
		case MPI_THREAD_FUNNELED:
			printf("MPI_THREAD_FUNNELED\n");
			break;
		case MPI_THREAD_SERIALIZED:
			printf("MPI_THREAD_SERIALIZED\n");
			break;
		case MPI_THREAD_MULTIPLE:
			printf("MPI_THREAD_MULTIPLE\n");
			break;
		default:
			printf("unknown (%i)\n",threadsupport_t);
		}

		/* Start MPI_T */


		if (list_cvar)
		{
			printf("\n===============================\n");
			printf("Control Variables");
			printf("\n===============================\n\n");
			list_cvars();
			printf("\n");
		}

		if (list_pvar)
		{
			printf("\n===============================\n");
			printf("Performance Variables");
			printf("\n===============================\n\n");
			list_pvars();
			printf("\n");
		}
	}

	/* Clean up */

	if (runmpi)
	{
		err=MPI_Barrier(MPI_COMM_WORLD);
		CHECKERR("Barrier",err);
	}

	if (rank==0)
		MPI_T_finalize();

	if (runmpi)
		MPI_Finalize();

	if (rank==0)
		printf("Done.\n");

	return 0;
}
Ejemplo n.º 7
0
int main(int argc, char *argv[])
{
  int provided, ret, size, rank;
  int nextpe, i;
  static char src[BUF_SIZE];
  char *dest;

  MPI_Status   status[2];
  MPI_Request  req[2];
  
  ret = MPI_Init_thread(&argc, &argv,  MPI_THREAD_MULTIPLE, &provided);
  if (ret != MPI_SUCCESS) {
    printf("Error initializing MPI with threads\n");
    exit(1);
  }

  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  nextpe = (rank + 1) % size;

  //src = rank;

  dest = (char *) shmalloc (BUF_SIZE * sizeof (*dest));
  if (dest == NULL){
    printf("Couldn't shmalloc.\n");
  }

  for ( i = 0; i < BUF_SIZE; i++){
    dest[i] = 'z';
    src[i]  = BUF_SIZE * rank + i;
  }
  shmem_barrier_all ();

  //shmem_int_put (dest, &src, 1, nextpe);
  printf("Sending: %d to %d from %d\n\n", src[0], nextpe, rank);
  MPI_Isend(&src, BUF_SIZE, MPI_CHAR, nextpe, 123, MPI_COMM_WORLD, &req[0]);
  int flag = -1;
  MPI_Test( &req[0], &flag, &status[0]);

 
  // shmem_int_get (dest, &src, 1, nextpe);
  MPI_Irecv(dest, BUF_SIZE, MPI_CHAR, nextpe, 123, MPI_COMM_WORLD, &req[1]);
  MPI_Test( &req[1], &flag, &status[1]);

  shmem_barrier_all ();

  printf ("Rank: %4d: got[5] %4d, %4d: ", rank, dest[5],src[5]);
  if (dest[0] == rank * BUF_SIZE)
    {
      printf ("CORRECT");
    }
  else
    {
      printf ("WRONG, expected %d", rank);
    }
  printf ("\n");

  shmem_barrier_all ();

  /*
  static int value;
  int old;
  value = rank + 1;
  old = shmem_int_cswap (&value, value, -value, rank);
  printf ("%d: value = %d, old = %d\n", rank, value, old);

  static int race_winner = -4;
  int oldval;

  oldval = shmem_int_cswap (&race_winner, -4, rank, 0);

  if (oldval == -4){
      printf ("pe %d was first\n", rank);
    }
  */
  //shfree (dest);
  return 0;

}
Ejemplo n.º 8
0
int main(int argc,char **argv) {
  
  MPI_File file;
  long long mapxsize,mapysize;
  
  long long myxmin,myxmax,myymin,myymax;
  int processes_in_x_dim,processes_in_y_dim;
  int my_proc_id_in_x_dim,my_proc_id_in_y_dim;

  long long boxxsize,boxysize; // sizes of a map fragment handled by each process

  int myrank,proccount;

  MPI_Offset filesize;

  long long x,y; // counters to go through a map fragment

  double max_similarity,my_similarity,my_temp_similarity;

  double cell_val;

  int provided_thread_support;
  MPI_Init_thread(&argc,&argv,MPI_THREAD_MULTIPLE, &provided_thread_support);
  
  // first read the file name from command line
  
  if (argc<7) {
    printf("\nSyntax: 2Dmapsearch-MPIIO <map_filename> mapxsize mapysize processes_in_x_dim processes_in_y_dim pmem_path\n");
    MPI_Finalize();
    exit(-1);
  }
  
  mapxsize=atol(argv[2]);
  mapysize=atol(argv[3]);
  
  processes_in_x_dim=atoi(argv[4]);
  processes_in_y_dim=atoi(argv[5]);

  if (mapxsize*mapysize<=0) {
    printf("\nWrong map size given.\n");
    MPI_Finalize();
    exit(-1);
  }

  // find out my rank and the number of processes 

  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  MPI_Comm_size(MPI_COMM_WORLD,&proccount);

  // now check if the number of processes matches the specified processes in dims
  if (proccount!=(processes_in_x_dim*processes_in_y_dim)) {
    printf("\nThe number of processes started does not match processes_in_x_dim*processes_in_y_dim.\n");
    MPI_Finalize();
    exit(-1);
  }

  MPI_Info info;
  MPI_Info_create(&info);
  MPI_Info_set(info,"pmem_path",argv[6]);
  MPI_Info_set(info,"pmem_io_mode","0");
  MPI_File_open(MPI_COMM_WORLD,argv[1],MPI_MODE_RDWR,info,&file) ;
  
  // now check the size of the file vs the given map size
  MPI_File_get_size(file,&filesize);
  if (filesize<mapxsize*mapysize) {
    printf("\nFile too small for the specified map size.\n");
    MPI_File_close(&file);
    MPI_Finalize();
    exit(-1);
  }


  // now each process should determine its bounding box for the map

  // length of each box will be (mapxsize/processes_in_x_dim) and similarly for the y dimension

  boxxsize=(mapxsize/processes_in_x_dim);
  boxysize=(mapysize/processes_in_y_dim);

  my_proc_id_in_x_dim=myrank%processes_in_x_dim;
  my_proc_id_in_y_dim=myrank/processes_in_x_dim;

  myxmin=my_proc_id_in_x_dim*boxxsize;
  myymin=my_proc_id_in_y_dim*boxysize;
  myxmax=myxmin+boxxsize;
  myymax=myymin+boxysize;

  // now each process should scan its fragment

  // if a certain element is detected then the application scans its immediate surroundings for elements of some other types

  my_similarity=0;

  for(x=myxmin;x<myxmax;x++)
    for(y=myymin;y<myymax;y++) {
      
      cell_val=get_xy_cell(x,y,file,mapxsize,mapysize);
      if ((cell_val>=CELL_VAL_LOW_THRESHOLD) && (cell_val<=CELL_VAL_HIGH_THRESHOLD))
	my_temp_similarity=eval_surrounding(x,y,file,mapxsize,mapysize);
      
      if (my_temp_similarity>=my_similarity)
	my_similarity=my_temp_similarity;
      
    }


  // now all processes should select the highest similarity

  MPI_Reduce(&my_similarity,&max_similarity,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);

  if (!myrank) {
    printf("\nThe final similarity is %f\n",max_similarity);

  }


 MPI_File_close(&file);

 MPI_Finalize();

}
Ejemplo n.º 9
0
int main(int argc, char **argv)
{
    int provided;
    int rank;
    int size;
    int status;

    double t0, t1, t2, t3, t4, t5;

    int i, j, k;

    int bufPow, bufSize;
    int msgPow, msgSize;

    double* m1;
    double* b1;
    double* b2;
    MPI_Win w1;

    int target;
    double dt, bw;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    bufPow = (argc > 1 ? atoi(argv[1]) : 25);
    bufSize = pow(2,bufPow);
    if (rank == 0) printf("%d: bufSize = %d doubles\n", rank, bufSize);

    /* allocate RMA buffers for windows */

    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &m1);
    assert(status==MPI_SUCCESS);

    for (i = 0; i < bufSize; i++)
    {
        m1[i] = (double)0;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* register remote pointers */

    status = MPI_Win_create(m1,
                            bufSize * sizeof(double),
                            sizeof(double),
                            MPI_INFO_NULL,
                            MPI_COMM_WORLD,
                            &w1);
    assert(status==MPI_SUCCESS);

    MPI_Barrier(MPI_COMM_WORLD);

    /* allocate RMA buffers */
    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &b1);
    assert(status==MPI_SUCCESS);

    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &b2);
    assert(status==MPI_SUCCESS);

    for (k = 0; k < msgSize; k++)
    {
        b2[k] = (double)rank;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* begin test */

    if (rank == 0)
    {
        printf("MPI_Get performance test for buffer size = %d doubles\n",
               bufSize);
        printf("host      target       msg. size (doubles)     get (sec)     BW (MB/s)\n");
        printf("======================================================================\n");
        fflush(stdout);

        for (i = 1; i < bufPow; i++)
        {
            msgPow = i;
            msgSize = pow(2,msgPow);

            for (j = 1; j < size; j++)
            {
                target = j;

                for (k = 0; k < msgSize; k++)
                {
                    b1[k] = -1.0*rank;
                }

                /* this communication is just to initialize the remote buffer */

                status = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,
                                      target,
                                      MPI_MODE_NOCHECK,
                                      w1);
                assert(status==MPI_SUCCESS);

                status = MPI_Put(b1,
                                 msgSize,
                                 MPI_DOUBLE,
                                 target,
                                 0,
                                 msgSize,
                                 MPI_DOUBLE,
                                 w1);
                assert(status==MPI_SUCCESS);

                status = MPI_Win_unlock(target, w1);
                assert(status==MPI_SUCCESS);

                /* this is the real communication to time */

                t0 = MPI_Wtime();

                status = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,
                                      target,
                                      MPI_MODE_NOCHECK,
                                      w1);
                assert(status==MPI_SUCCESS);

                t1 = MPI_Wtime();

                status = MPI_Acc(b2,
                                 msgSize,
                                 MPI_DOUBLE,
                                 target,
                                 0,
                                 msgSize,
                                 MPI_DOUBLE,
                                 MPI_SUM,
                                 w1);
                assert(status==MPI_SUCCESS);

                t2 = MPI_Wtime();

                status = MPI_Win_unlock(target, w1);
                assert(status==MPI_SUCCESS);

                t3 = MPI_Wtime();

                /* this communication is just to verify the remote buffer */

                status = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,
                                      target,
                                      MPI_MODE_NOCHECK,
                                      w1);
                assert(status==MPI_SUCCESS);

                status = MPI_Get(b1,
                                 msgSize,
                                 MPI_DOUBLE,
                                 target,
                                 0,
                                 msgSize,
                                 MPI_DOUBLE,
                                 w1);
                assert(status==MPI_SUCCESS);

                status = MPI_Win_unlock(target, w1);
                assert(status==MPI_SUCCESS);

                for (k = 0; k < msgSize; k++)
                {
                    assert( b2[k]==0.0 );
                }

                dt = t3 - t0;
                bw = (double) msgSize * sizeof(double) * (1e-6) / dt;

                printf("%4d     %4d     %4d       %9.6f     %9.3f\n", rank, target, msgSize, dt, bw);
                fflush(stdout);

            }
            printf("======================================================================\n");
            fflush(stdout);
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);

    status = MPI_Win_free(&w1);
    assert(status==MPI_SUCCESS);

    status = MPI_Free_mem(b2);
    assert(status==MPI_SUCCESS);

    status = MPI_Free_mem(b1);
    assert(status==MPI_SUCCESS);

    status = MPI_Free_mem(m1);
    assert(status==MPI_SUCCESS);

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) printf("%d: MPI_Finalize\n", rank);
    MPI_Finalize();

    return (0);
}
Ejemplo n.º 10
0
int main(int argc, char **argv) {
	int ret;
	int provided = 0;
	struct mdhim_t *md;
	struct mdhim_brm_t *brm;
	struct mdhim_bgetrm_t *bgrm;
        mdhim_options_t *db_opts;
	struct plfs_record *rec = NULL;
	FILE *file;
	unsigned long long int key;
	MPI_Comm comm;

	ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	if (ret != MPI_SUCCESS) {
		printf("Error initializing MPI with threads\n");
		exit(1);
	}

	if (provided != MPI_THREAD_MULTIPLE) {
                printf("Not able to enable MPI_THREAD_MULTIPLE mode\n");
                exit(1);
        }

	//Set MDHIM options
        db_opts = mdhim_options_init();
        mdhim_options_set_db_path(db_opts, "./");
        mdhim_options_set_db_name(db_opts, "mdhimTstDB");
        mdhim_options_set_db_type(db_opts, LEVELDB);
        mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY);
	mdhim_options_set_debug_level(db_opts, MLOG_CRIT);
	mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE);
        mdhim_options_set_server_factor(db_opts, 10);
	mdhim_options_set_value_append(db_opts, 1);

	//Initialize MDHIM
	comm = MPI_COMM_WORLD;
	md = mdhimInit(&comm, db_opts);
	if (!md) {
		printf("Error initializing MDHIM\n");
		exit(1);
	}	

	file = open_output(md->mdhim_rank);
	if (!file) {
		printf("Error opening file\n");
		goto done;
	}
	
	rec = parse_input(file);
	if (!rec) {
		printf("Error parsing file\n");
		goto done;
	}
	key = get_key(rec->logical_offset);
	printf("Inserting key: %llu\n", key);
	brm = mdhimPut(md, &key, sizeof(key), 
		      rec, sizeof(struct plfs_record), NULL, NULL);
	if (!brm || brm->error) {
		printf("Error inserting key/value into MDHIM\n");
	} else {
		printf("Successfully inserted key/value into MDHIM\n");
	}
	
	mdhim_full_release_msg(brm);
	//Commit the database
	ret = mdhimCommit(md, md->primary_index);
	if (ret != MDHIM_SUCCESS) {
		printf("Error committing MDHIM database\n");
	} else {
		printf("Committed MDHIM database\n");
	}

	bgrm = mdhimGet(md, md->primary_index, &key, sizeof(key), 
			MDHIM_GET_EQ);
	if (!bgrm || bgrm->error) {
		printf("Error getting value for key: %llu from MDHIM\n", key);
	} else if (bgrm->value_lens[0]) {
		printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0]));
	}

	mdhim_full_release_msg(bgrm);

done:
	ret = mdhimClose(md);
	free(rec);
	mdhim_options_destroy(db_opts);
	if (ret != MDHIM_SUCCESS) {
		printf("Error closing MDHIM\n");
	}

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();

	return 0;
}
Ejemplo n.º 11
0
int main(int argc, char *argv[])
{
  FILE *parameterfile = NULL;
  int j, i, ix = 0, isample = 0, op_id = 0;
  char datafilename[206];
  char parameterfilename[206];
  char conf_filename[50];
  char * input_filename = NULL;
  char * filename = NULL;
  double plaquette_energy;
  struct stout_parameters params_smear;
  spinor **s, *s_;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 8;
  DUM_MATRIX = DUM_DERI + 5;
#if ((defined BGL && defined XLC) || defined _USE_TSPLITPAR)
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#else
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#endif

  verbose = 0;
  g_use_clover_flag = 0;

#ifdef MPI

#  ifdef OMP
  int mpi_thread_provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#  else
  MPI_Init(&argc, &argv);
#  endif

  MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);
#else
  g_proc_id = 0;
#endif

  process_args(argc,argv,&input_filename,&filename);
  set_default_filenames(&input_filename, &filename);

  /* Read the input file */
  if( (j = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

#ifdef OMP
  init_openmp();
#endif

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if (g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  tmlqcd_mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed);

  /* we need to make sure that we don't have even_odd_flag = 1 */
  /* if any of the operators doesn't use it                    */
  /* in this way even/odd can still be used by other operators */
  for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0;

#ifndef MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
#endif
  if (j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if (no_monomials > 0) {
    if (even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if (j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
      exit(-1);
    }
  }
  if (even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if (g_running_phmc) {
    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if (j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
      exit(-1);
    }
  }

  g_mu = g_mu1;

  if (g_cart_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strncpy(datafilename, filename, 200);
    strcat(datafilename, ".data");
    strncpy(parameterfilename, filename, 200);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, "invert", git_hash);
    fclose(parameterfile);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;

  init_operators();

  /* list and initialize measurements*/
  if(g_proc_id == 0) {
    printf("\n");
    for(int j = 0; j < no_measurements; j++) {
      printf("# measurement id %d, type = %d\n", j, measurement_list[j].type);
    }
  }
  init_measurements();  

  /* this could be maybe moved to init_operators */
#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  if (g_sloppy_precision_flag == 1) {
    j = init_dirac_halfspinor32();
    if (j != 0)
    {
      fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
      exit(-1);
    }
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag)
    init_xchange_halffield();
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
            conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
      exit(-2);
    }


    if (g_cart_id == 0) {
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1){
      params_smear.rho = stout_rho;
      params_smear.iterations = stout_no_iter;
/*       if (stout_smear((su3_tuple*)(g_gauge_field[0]), &params_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */
/*         exit(1) ; */
      g_update_gauge_copy = 1;
      plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

      if (g_cart_id == 0) {
        printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

    /* if any measurements are defined in the input file, do them here */
    measurement * meas;
    for(int imeas = 0; imeas < no_measurements; imeas++){
      meas = &measurement_list[imeas];
      if (g_proc_id == 0) {
        fprintf(stdout, "#\n# Beginning online measurement.\n");
      }
      meas->measurefunc(nstore, imeas, even_odd_flag);
    }

    if (reweighting_flag == 1) {
      reweighting_factor(reweighting_samples, nstore);
    }

    /* Compute minimal eigenvalues, if wanted */
    if (compute_evs != 0) {
      eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision,
                  0, compute_evs, nstore, even_odd_flag);
    }
    if (phmc_compute_evs != 0) {
#ifdef MPI
      MPI_Finalize();
#endif
      return(0);
    }

    /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/

    if(compute_modenumber != 0 || compute_topsus !=0){
      
      s_ = calloc(no_sources_z2*VOLUMEPLUSRAND+1, sizeof(spinor));
      s  = calloc(no_sources_z2, sizeof(spinor*));
      if(s_ == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      if(s == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      
      
      for(i = 0; i < no_sources_z2; i++) {
#if (defined SSE3 || defined SSE2 || defined SSE)
        s[i] = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE)+i*VOLUMEPLUSRAND;
#else
        s[i] = s_+i*VOLUMEPLUSRAND;
#endif
	
        random_spinor_field_lexic(s[i], reproduce_randomnumber_flag,RN_Z2);
	
/* 	what is this here needed for?? */
/*         spinor *aux_,*aux; */
/* #if ( defined SSE || defined SSE2 || defined SSE3 ) */
/*         aux_=calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */
/*         aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); */
/* #else */
/*         aux_=calloc(VOLUMEPLUSRAND, sizeof(spinor)); */
/*         aux = aux_; */
/* #endif */
	
        if(g_proc_id == 0) {
          printf("source %d \n", i);
        }
	
        if(compute_modenumber != 0){
          mode_number(s[i], mstarsq);
        }
	
        if(compute_topsus !=0) {
          top_sus(s[i], mstarsq);
        }
      }
      free(s);
      free(s_);
    }


    /* move to operators as well */
    if (g_dflgcr_flag == 1) {
      /* set up deflation blocks */
      init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z);

      /* the can stay here for now, but later we probably need */
      /* something like init_dfl_solver called somewhere else  */
      /* create set of approximate lowest eigenvectors ("global deflation subspace") */

      /*       g_mu = 0.; */
      /*       boundary(0.125); */
      generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag);
      /*       boundary(g_kappa); */
      /*       g_mu = g_mu1; */

      /* Compute little Dirac operators */
      /*       alt_block_compute_little_D(); */
      if (g_debug_level > 0) {
        check_projectors(reproduce_randomnumber_flag);
        check_local_D(reproduce_randomnumber_flag);
      }
      if (g_debug_level > 1) {
        check_little_D_inversion(reproduce_randomnumber_flag);
      }

    }
    if(SourceInfo.type == 1) {
      index_start = 0;
      index_end = 1;
    }

    g_precWS=NULL;
    if(use_preconditioning == 1){
      /* todo load fftw wisdom */
#if (defined HAVE_FFTW ) && !( defined MPI)
      loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX);
#else
      use_preconditioning=0;
#endif
    }

    if (g_cart_id == 0) {
      fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/
    }
    for(op_id = 0; op_id < no_operators; op_id++) {
      boundary(operator_list[op_id].kappa);
      g_kappa = operator_list[op_id].kappa; 
      g_mu = 0.;

      if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){
        printf("# Using preconditioning with treelevel preconditioning operator: %s \n",
              precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver]));
        /* initial preconditioning workspace */
        operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS));
        spinorPrecWS_Init(operator_list[op_id].precWS,
                  operator_list[op_id].kappa,
                  operator_list[op_id].mu/2./operator_list[op_id].kappa,
                  -(0.5/operator_list[op_id].kappa-4.),
                  PRECWSOPERATORSELECT[operator_list[op_id].solver]);
        g_precWS = operator_list[op_id].precWS;

        if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) {
          fitPrecParams(op_id);
        }
      }

      for(isample = 0; isample < no_samples; isample++) {
        for (ix = index_start; ix < index_end; ix++) {
          if (g_cart_id == 0) {
            fprintf(stdout, "#\n"); /*Indicate starting of new index*/
          }
          /* we use g_spinor_field[0-7] for sources and props for the moment */
          /* 0-3 in case of 1 flavour  */
          /* 0-7 in case of 2 flavours */
          prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location);
          //randmize initial guess for eigcg if needed-----experimental
          if( (operator_list[op_id].solver == INCREIGCG) && (operator_list[op_id].solver_params.eigcg_rand_guess_opt) ){ //randomize the initial guess
              gaussian_volume_source( operator_list[op_id].prop0, operator_list[op_id].prop1,isample,ix,0); //need to check this
          } 
          operator_list[op_id].inverter(op_id, index_start, 1);
        }
      }


      if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){
        /* free preconditioning workspace */
        spinorPrecWS_Free(operator_list[op_id].precWS);
        free(operator_list[op_id].precWS);
      }

      if(operator_list[op_id].type == OVERLAP){
        free_Dov_WS();
      }

    }
    nstore += Nsave;
  }

#ifdef OMP
  free_omp_accumulators();
#endif
  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  free_chi_spinor_field();
  free(filename);
  free(input_filename);
#ifdef MPI
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
#endif
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Ejemplo n.º 12
0
int main(int argc, char **argv)
{
    int provided;
    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
    assert(provided==MPI_THREAD_SINGLE);

    int me;
    int nproc;
    MPI_Comm_rank(MPI_COMM_WORLD,&me);
    MPI_Comm_size(MPI_COMM_WORLD,&nproc);

    int status;
    double t0,t1,t2,t3,t4,t5;
    double tt0,tt1,tt2,tt3,tt4;

    int bufSize = ( argc>1 ? atoi(argv[1]) : 1000000 );
    if (me==0) printf("%d: bufSize = %d doubles\n",me,bufSize);

    /* allocate RMA buffers for windows */
    double* m1;
    double* m2;
    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &m1);
    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &m2);

    /* register remote pointers */
    MPI_Win w1;
    MPI_Win w2;
    status = MPI_Win_create(m1, bufSize * sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &w1);
    status = MPI_Win_create(m2, bufSize * sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &w2);
    MPI_Barrier(MPI_COMM_WORLD);

    /* allocate RMA buffers */
    double* b1;
    double* b2;
    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &b1);
    status = MPI_Alloc_mem(bufSize * sizeof(double), MPI_INFO_NULL, &b2);

    /* initialize buffers */
    int i;
    for (i=0;i<bufSize;i++) b1[i]=1.0*me;
    for (i=0;i<bufSize;i++) b2[i]=-1.0;

    status = MPI_Win_fence( MPI_MODE_NOPRECEDE | MPI_MODE_NOSTORE , w1 );
    status = MPI_Win_fence( MPI_MODE_NOPRECEDE | MPI_MODE_NOSTORE , w2);
    status = MPI_Put(b1, bufSize, MPI_DOUBLE, me, 0, bufSize, MPI_DOUBLE, w1);
    status = MPI_Put(b2, bufSize, MPI_DOUBLE, me, 0, bufSize, MPI_DOUBLE, w2);
    status = MPI_Win_fence( MPI_MODE_NOSTORE , w1);
    status = MPI_Win_fence( MPI_MODE_NOSTORE , w2);

    int target;
    int j;
    double dt,bw;
    MPI_Barrier(MPI_COMM_WORLD);
    if (me==0){
        printf("MPI_Get performance test for buffer size = %d doubles\n",bufSize);
        printf("  jump    host   target       get (s)       BW (MB/s)\n");
        printf("===========================================================\n");
        fflush(stdout);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    for (j=0;j<nproc;j++){
        target = (me+j) % nproc;
        MPI_Barrier(MPI_COMM_WORLD);
        t0 = MPI_Wtime();
        status = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, MPI_MODE_NOCHECK, w1);
        t1 = MPI_Wtime();
        status = MPI_Get(b2, bufSize, MPI_DOUBLE, target, 0, bufSize, MPI_DOUBLE, w1);
        t2 = MPI_Wtime();
        status = MPI_Win_unlock(target, w1);
        t3 = MPI_Wtime();
        for (i=0;i<bufSize;i++) assert( b2[i]==(1.0*target) );
        dt = t3 - t0;
        bw = (double)bufSize*sizeof(double)*(1e-6)/dt;
        printf("%4d     %4d     %4d       %9.6f     %9.3f\n",j,me,target,dt,bw);
        fflush(stdout);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    status = MPI_Win_free(&w2);
    status = MPI_Win_free(&w1);

    status = MPI_Free_mem(b2);
    status = MPI_Free_mem(b1);

    status = MPI_Free_mem(m2);
    status = MPI_Free_mem(m1);

    MPI_Barrier(MPI_COMM_WORLD);

    if (me==0) printf("%d: MPI_Finalize\n",me);
    MPI_Finalize();

    return(0);
}
Ejemplo n.º 13
0
int main(int argc, char *argv[])
{
    int rc, i, done, do_put_answer, work_unit_size;
    int my_world_rank, nranks, num_work_units, num_answers, provided;
    int work_prio, work_type, work_handle[PP_HANDLE_SIZE], work_len, answer_rank;
    int *num_handled_by_rank, num_handled_by_me;
    int dbgprintf_flag = 1, use_prio_for_reserve_flag = 0;
  
    int req_types[4];
    int num_types = 2;
    int type_vect[4] = {WORK,ANSWER};
    int num_type_in_req;

    char thread_type[32];
    char *work_unit_buf;

    double temptime, time_for_fake_work;
    double start_job_time, end_put_time, start_work_time, end_work_time;
    double total_work_time, total_loop_time;
    double total_reserve_time, total_get_time;

    do_put_answer      = DEFAULT_DO_PUT_ANSWER;  /* will halt by exhaustion after 5 secs */
    work_unit_size     = DEFAULT_WORK_UNIT_SIZE;
    num_work_units     = DEFAULT_NUM_WORK_UNITS;
    time_for_fake_work = DEFAULT_NSECS_FAKE_WORK;
    total_work_time = 0.0;
    total_loop_time = 0.0;
    total_reserve_time = 0.0;
    total_get_time = 0.0;

    for (i=1; i < argc; i++)
    {        
        // printf("av %s\n",argv[i]);
        if (strcmp(argv[i],"-a") == 0)
            do_put_answer = 1;
        else if (strcmp(argv[i],"-n") == 0)
            num_work_units = atoi(argv[++i]);
        else if (strcmp(argv[i],"-s") == 0)
            work_unit_size = atoi(argv[++i]);
        else if (strcmp(argv[i],"-t") == 0)
            time_for_fake_work = atof(argv[++i]);
        else
        {
            printf("st1: unrecognized cmd-line arg at %d :%s:\n",i,argv[i]);
            exit(-1);
        }
    }

    rc = MPI_Init_thread(NULL,NULL,MPI_THREAD_MULTIPLE,&provided);
    if (rc != MPI_SUCCESS)
    {
        printf("st1: MPI_Init_thread failed with rc=%d\n",rc);
        exit(-1);
    }
    switch (provided)
    {
        case MPI_THREAD_SINGLE: strcpy(thread_type,"MPI_THREAD_SINGLE"); break;
        case MPI_THREAD_FUNNELED: strcpy(thread_type,"MPI_THREAD_FUNNELED"); break;
        case MPI_THREAD_SERIALIZED: strcpy(thread_type,"MPI_THREAD_SERIALIZED"); break;
        case MPI_THREAD_MULTIPLE: strcpy(thread_type,"MPI_THREAD_MULTIPLE"); break;
        default: strcpy(thread_type,"UNKNOWN"); break;
    }
    printf("st1: MPI provides %s\n",thread_type);
    MPI_Comm_size(MPI_COMM_WORLD,&nranks);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_world_rank);

    num_handled_by_me = 0;
    if (my_world_rank == 0)
        num_handled_by_rank = malloc(nranks * sizeof(int));
    else
        num_handled_by_rank = NULL;
  
    work_unit_buf = malloc(work_unit_size);
  
    rc = PP_Init(SRVR_MAX_MALLOC_AMT,num_types,type_vect);
  
    rc = MPI_Barrier( MPI_COMM_WORLD );
    start_job_time = MPI_Wtime();
    end_work_time  = MPI_Wtime();  /* dummy val until set below */
  
    if ( my_world_rank == 0 )  /* if master app, put work */
    {
        num_answers = 0;
        for (i=0; i < num_work_units; i++)
        {
            memset(work_unit_buf,'X',work_unit_size);
            if (work_unit_size >= 18)
                sprintf(work_unit_buf,"workunit %d",i);
            rc = PP_Put( work_unit_buf, work_unit_size, WORK, -1, -1, &work_handle ); 
            // dbgprintf( 1, "put work_unit %d  rc %d\n", i, rc );
        }
        // dbgprintf(1,"st1: all work submitted after %f secs\n",MPI_Wtime()-start_job_time);
        printf("st1: all work submitted after %f secs\n",MPI_Wtime()-start_job_time);
    }
    rc = MPI_Barrier( MPI_COMM_WORLD );
    end_put_time = start_work_time = MPI_Wtime();
  
    done = 0;
    while ( !done )
    {
        if (do_put_answer)
        {
            if (my_world_rank == 0)
            {
                req_types[0] = ANSWER;
                req_types[1] = WORK;
                num_types_in_req = 2;
            }
            else
            {
                req_types[0] = WORK;
                num_types_in_req = 1;
            }
        }
        else
        {
            num_types_in_req = 0;
        }
        // dbgprintf( 1, "st1: reserving work\n" );
        temptime = MPI_Wtime();
        rc = PP_FindAndReserve(num_types_in_req,req_types,&work_len,
                               &work_type,&answer_rank,work_handle);
        // dbgprintf( 1, "st1: after reserve rc %d len %d type %d\n", rc, work_len, work_type );
        if ( rc == PP_EXHAUSTION )
        {
            // dbgprintf( 1, "st1: done by exhaustion\n" );
            printf( "st1: done by exhaustion\n" );
            break;
        }
        else if ( rc == PP_NO_MORE_WORK )
        {
            // dbgprintf( 1, "st1: done by no more work\n" );
            printf( "st1: done by no more work\n" );
            break;
        }
        else if (rc < 0)
        {
            // dbgprintf( 1, "st1: ** reserve failed, rc = %d\n", rc );
            printf( "st1: ** reserve failed, rc = %d\n", rc );
            ADLB_Abort(-1);
        }
        else if (work_type == WORK) 
        {
            total_reserve_time += MPI_Wtime() - temptime;  /* only count for work */
            temptime = MPI_Wtime();
            rc = PP_Get( work_unit_buf, work_handle );
            total_get_time += MPI_Wtime() - temptime;
            if (rc == PP_NO_MORE_WORK)
            {
                // dbgprintf( 1, "st1: no more work on get_reserved\n" );
                printf( "st1: no more work on get_reserved\n" );
                break;
            }
            else   /* got good work */
            {
                /* do dummy/fake work */
                num_handled_by_me++;
                if (time_for_fake_work == 0.0)
                {
                    // dbgprintf(1,"st1: worktime 0.0\n");
                }
                else
                {
                    temptime = MPI_Wtime();
                    while (1)
                    {
                        for (i=0; i < 1000000; i++)
                            ;
                        if (MPI_Wtime()-temptime > time_for_fake_work)
                            break;
                    }
                    // dbgprintf(1,"st1: worktime %f\n",MPI_Wtime()-temptime);
                }
                if (do_put_answer)
                {
                    rc = PP_Put( NULL, 0, ANSWER, -1, 0, handle ); 
                }
            }
            end_work_time = MPI_Wtime();  /* chgs on each work unit */
        }
        else if ( work_type == ANSWER) 
        {
            num_answers++;
            // dbgprintf(1111,"GENBATCH: GOT ANSWER %d\n",num_answers);
            if (num_answers >= num_work_units)
		PP_Set_problem_done();
        }
        else
        {
            // dbgprintf( 1, "st1: ** unexpected work type %d\n", work_type );
            printf( "st1: ** unexpected work type %d\n", work_type );
            PP_Abort( -1 );
        }
    }
    rc = MPI_Barrier( MPI_COMM_WORLD );
    // total_loop_time can be misleading since we have to wait for exhaustion
    // total_loop_time = MPI_Wtime() - start_work_time;
    // dbgprintf(1,"st1: total loop time %f\n",total_loop_time);
    /****
    total_work_time = end_work_time - start_work_time;
    dbgprintf(1,"st1: num handled by me %d\n",num_handled_by_me);
    dbgprintf(1,"st1: last end_work_time %f\n",end_work_time);
    dbgprintf(1,"st1: total work_time %f ; avg work time %f\n",
            total_work_time,total_work_time/((float)num_handled_by_me));
    dbgprintf(1,"st1: total reserve time %f ; avg reserve time %f\n",
            total_reserve_time,total_reserve_time/((float)num_handled_by_me));
    dbgprintf(1,"st1: total get time %f ; avg get time %f\n",
            total_get_time,total_get_time/((float)num_handled_by_me));
    ****/
    printf("st1: num handled by me %d\n",num_handled_by_me);
    printf("st1: last end_work_time %f\n",end_work_time);
    printf("st1: total work_time %f ; avg work time %f\n",
            total_work_time,total_work_time/((float)num_handled_by_me));
    printf("st1: total reserve time %f ; avg reserve time %f\n",
            total_reserve_time,total_reserve_time/((float)num_handled_by_me));
    printf("st1: total get time %f ; avg get time %f\n",
            total_get_time,total_get_time/((float)num_handled_by_me));
    MPI_Gather(&num_handled_by_me,1,MPI_INT,
               num_handled_by_rank,1,MPI_INT,
               0,MPI_COMM_WORLD);
    if (my_world_rank == 0)
    {
        for (i=0; i < nranks; i++)
            // dbgprintf(1,"st1: num handled by rank %d : total %d  per sec %.0f\n",
            printf("st1: num handled by rank %d : total %d  per sec %.0f\n",
                   i,num_handled_by_rank[i],
                   ((float)num_handled_by_rank[i])/total_work_time);
    }

    PP_Finalize();
    // printf("st1: calling mpi_finalize\n");
    rc = MPI_Finalized(&i);
    if ( ! i)
        MPI_Finalize();
    // printf("st1: past mpi_finalize\n");
  
    return 0;
}
Ejemplo n.º 14
0
int main(int argc, char* argv[])
{
    bool mig, sub;
    int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, ih, nh, nh2;
    int im, i, j, m2, it1, it2, its, ikz, ikx, ikh, n2, nk, snap;
    float dt, dx, dz, c, old, dh;
    float *curr, *prev, **img, **dat, **lft, **rht, **wave;
    sf_complex *cwave, *cwavem;
    sf_file data, image, left, right, snaps;

    /*MPI related*/
    int cpuid,numprocs;
    int provided;
    int n_local, o_local, nz_local;
    int ozx2;
    float *sendbuf, *recvbuf, *wave_all;
    int *rcounts, *displs;

    /*wall time*/
    double startTime, elapsedTime;
    double clockZero = 0.0;

    MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
    threads_ok = provided >= MPI_THREAD_FUNNELED;

    sf_init(argc,argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    if (!sf_getbool("mig",&mig)) mig=false;
    /* if n, modeling; if y, migration */

    if (!sf_getint("snap",&snap)) snap=0;
    /* interval for snapshots */

    snaps = (snap > 0)? sf_output("snaps"): NULL;
    /* (optional) snapshot file */

    if (mig) { /* migration */
	data = sf_input("input");
	image = sf_output("output");

	if (!sf_histint(data,"n1",&nh)) sf_error("No n1=");
	if (!sf_histfloat(data,"d1",&dh)) sf_error("No d1=");

	if (!sf_histint(data,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(data,"d2",&dx)) sf_error("No d2=");

	if (!sf_histint(data,"n3",&nt)) sf_error("No n3=");
	if (!sf_histfloat(data,"d3",&dt)) sf_error("No d3=");

	if (!sf_getint("nz",&nz)) sf_error("Need nz=");
	/* time samples (if migration) */
	if (!sf_getfloat("dz",&dz)) sf_error("Need dz=");
	/* time sampling (if migration) */
        
        if (cpuid==0) {
	sf_putint(image,"o1",0.);
	sf_putint(image,"n1",nz);
	sf_putfloat(image,"d1",dz);
	sf_putstring(image,"label1","Depth");
	sf_putint(image,"o2",0.);
	sf_putint(image,"n2",nx);
	sf_putfloat(image,"d2",dx);
	sf_putstring(image,"label2","Midpoint");
	sf_putint(image,"n3",1); /* stack for now */
        }
    } else { /* modeling */
	image = sf_input("input");
	data = sf_output("output");

	if (!sf_histint(image,"n1",&nz)) sf_error("No n1=");
	if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1=");

	if (!sf_histint(image,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2=");

	if (!sf_getint("nt",&nt)) sf_error("Need nt=");
	/* time samples (if modeling) */
	if (!sf_getfloat("dt",&dt)) sf_error("Need dt=");
	/* time sampling (if modeling) */

	if (!sf_getint("nh",&nh)) sf_error("Need nh=");
        /* offset samples (if modeling) */
	if (!sf_getfloat("dh",&dh)) sf_error("Need dh=");
	/* offset sampling (if modeling) */

        if (cpuid==0) {
	sf_putint(data,"n1",nh);
	sf_putfloat(data,"d1",dh);
	sf_putstring(data,"label1","Half-Offset");
	sf_putint(data,"o2",0.);
	sf_putint(data,"n2",nx);
	sf_putfloat(data,"d2",dx);
	sf_putstring(data,"label2","Midpoint");
	sf_putint(data,"n3",nt);
	sf_putfloat(data,"d3",dt);
	sf_putstring(data,"label3","Time");
	sf_putstring(data,"unit3","s");
        }
    }

    if (cpuid==0) {
    if (NULL != snaps) {
      sf_putint(snaps,"n1",nh);
      sf_putfloat(snaps,"d1",dh);
      sf_putstring(snaps,"label1","Half-Offset");

      sf_putint(snaps,"n2",nx);
      sf_putfloat(snaps,"d2",dx);
      sf_putstring(snaps,"label2","Midpoint");

      sf_putint(snaps,"n3",nz);
      sf_putfloat(snaps,"d3",dz);
      sf_putstring(snaps,"label3","Depth");

      sf_putint(snaps,"n4",nt/snap);
      sf_putfloat(snaps,"d4",dt*snap);
      if (mig) {
        sf_putfloat(snaps,"o4",(nt-1)*dt);
      } else {
        sf_putfloat(snaps,"o4",0.);
      }
      sf_putstring(snaps,"label4","Time");
    }
    }

    /* Mark the starting time. */
    startTime = walltime( &clockZero );

    nk = mcfft3_init(1,nh,nx,nz,&nh2,&nx2,&nz2,&n_local,&o_local);
    nz_local = (n_local < nz-o_local)? n_local:nz-o_local;
    sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d,nz_local=%d",cpuid,nh2,nx2,nz2,n_local,o_local,nz_local);
    if (cpuid==0)
      if (o_local!=0) sf_error("Cpuid and o_local inconsistant!");

    nzx = nz*nx*nh;
    //nzx2 = nz2*nx2*nh2;
    nzx2 = n_local*nx2*nh2;
    ozx2 = o_local*nx2*nh2;

    img = sf_floatalloc2(nz,nx);
    dat = sf_floatalloc2(nh,nx);

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histbool(left,"sub",&sub) && !sf_getbool("sub",&sub)) sub=true;
    /* if -1 is included in the matrix */

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("No n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
    lft = sf_floatalloc2(nzx,m2);
    rht = sf_floatalloc2(m2,nk);

    sf_floatread(lft[0],nzx*m2,left);
    sf_floatread(rht[0],m2*nk,right);

    curr = sf_floatalloc(nzx2);
    prev = sf_floatalloc(nzx2);

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave = sf_floatalloc2(nzx2,m2);

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz]=0.;
	prev[iz]=0.;
    }

    sendbuf = prev;
    if (cpuid==0) {
      wave_all = sf_floatalloc(nh2*nx2*nz2);
      recvbuf = wave_all;
      rcounts = sf_intalloc(numprocs);
      displs  = sf_intalloc(numprocs);
    } else {
      wave_all = NULL;
      recvbuf = NULL;
      rcounts = NULL;
      displs = NULL;
    }

    MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (mig) { /* migration */
	/* step backward in time */
	it1 = nt-1;
	it2 = -1;
	its = -1;	
    } else { /* modeling */
	sf_floatread(img[0],nz*nx,image);

	/* transpose and initialize at zero offset */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
	for (iz=0; iz < nz_local; iz++) {
	    for (ix=0; ix < nx; ix++) {
		curr[nh2*(ix+iz*nx2)]=img[ix][iz+o_local];
	    }
	}
	
	/* step forward in time */
	it1 = 0;
	it2 = nt;
	its = +1;
    }

    /* time stepping */
    for (it=it1; it != it2; it += its) {
	sf_warning("it=%d;",it);

	if (mig) { /* migration <- read data */
	    sf_floatread(dat[0],nx*nh,data);
	} else {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	    for (ix=0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {
		    dat[ix][ih] = 0.;
		}
	    }
	}
	
	if (NULL != snaps && 0 == it%snap) {
          MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
          if (cpuid==0) {
            for (iz = 0; iz < nz; iz++)
              for (ix = 0; ix < nx; ix++)
                sf_floatwrite(wave_all+nh2*(ix+nx2*iz),nh,snaps);
          }
        }

	/* at z=0 */
        if (cpuid==0) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (ih=0; ih < nh; ih++) {
		if (mig) {
		    curr[ix*nh2+ih] += dat[ix][ih];
		} else {
		    dat[ix][ih] = curr[ix*nh2+ih];
		}
	    }
	}
        }

	/* matrix multiplication */
	mcfft3(curr,cwave);

	for (im = 0; im < m2; im++) {
          //for (ik = 0; ik < nk; ik++) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ikz,ikx,ikh,i,j)
#endif
          for (ikz = 0; ikz < n_local; ikz++) {
            for (ikx = 0; ikx < nx2; ikx++) {
              for (ikh = 0; ikh < nh2; ikh++) {
                i = ikh + ikx*nh2 + (o_local+ikz)*nx2*nh2;
                j = ikh + ikx*nh2 + ikz*nx2*nh2;
#ifdef SF_HAS_COMPLEX_H
		cwavem[j] = cwave[j]*rht[i][im];
#else
		cwavem[j] = sf_crmul(cwave[j],rht[i][im]);
#endif
              }
            }
          }
          imcfft3(wave[im],cwavem);
	}

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,iz,ih,i,j,im,old,c)
#endif
        for (iz=0; iz < nz_local; iz++) {
	    for (ix = 0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {	
                    i = ih + ix*nh + (o_local+iz)*nx*nh;  /* original grid */
                    j = ih + ix*nh2+ iz*nx2*nh2; /* padded grid */
		
		    old = curr[j];

		    c = sub? 2*old: 0.0f;

		    c -= prev[j];

		    prev[j] = old;

		    for (im = 0; im < m2; im++) {
			c += lft[im][i]*wave[im][j];
		    }
		    
		    curr[j] = c;
		}
	    }
	}
	
	if (!mig) { /* modeling -> write out data */
          if (cpuid==0)
	    sf_floatwrite(dat[0],nx*nh,data);
	}
    }
    sf_warning(".");

    if (mig) {
      sendbuf = curr;
      MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);

      if (cpuid==0) {
        /* transpose */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
        for (iz=0; iz < nz; iz++) {
          for (ix=0; ix < nx; ix++) {
            img[ix][iz] = wave_all[nh2*(ix+iz*nx2)];
          }
        }
	sf_floatwrite(img[0],nz*nx,image);
      }

    }

    mcfft3_finalize();

    /* Work's done. Get the elapsed wall time. */
    elapsedTime = walltime( &startTime );
    /* Print the wall time and terminate. */
    if (cpuid==0)
      printf("\nwall time = %.5fs\n", elapsedTime);

    MPI_Finalize();

    exit(0);
}
Ejemplo n.º 15
0
int main (int argc, char **argv){

// DOES NOT WORK!
//	// setup signal trap to catch Ctrl-C
//	struct sigaction new_action, old_action;
//	new_action.sa_handler = ctrlc_handler;
//	sigemptyset(&new_action.sa_mask);
//	new_action.sa_flags = 0;
////    if( sigaction (SIGINT, NULL, &old_action) == -1)
////            perror("Failed to retrieve old handle");
////    if (old_action.sa_handler != SIG_IGN)
////            if( sigaction (SIGINT, &new_action, NULL) == -1)
////                    perror("Failed to set new Handle");
//    if( sigaction (SIGTERM, NULL, &old_action) == -1)
//            perror("Failed to retrieve old handle");
//    if (old_action.sa_handler != SIG_IGN)
//            if( sigaction (SIGTERM, &new_action, NULL) == -1)
//                    perror("Failed to set new Handle");

	atexit(exit_handler);

	// real work,
	long long t3, t4;
	t3= cci::common::event::timestampInUS();
	int threading_provided;
	int err  = MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &threading_provided);
	MPI_Comm comm = MPI_COMM_WORLD;

	int rank=-1;
	MPI_Comm_rank(comm, &rank);

	time_t now = time(0);
	// Convert now to tm struct for local timezone
	tm* localtm = localtime(&now);
	if (rank == 0) cci::common::Debug::print("The START local date and time is: %s\n", asctime(localtm));

	if (rank == 0) cci::common::Debug::print("initialized MPI\n");
	// IMPORTANT: need to initialize random number generator right now.
	srand(rank*113 + 1);
	//srand(cci::common::event::timestampInUS());


	long long t1, t2;

	t1 = cci::common::event::timestampInUS();
	conf = new cci::rt::syntest::SynDataConfiguratorFull(argc, argv);
	cci::common::LogSession *logsession = NULL;
	if (conf->getLogger() != NULL) logsession = conf->getLogger()->getSession("setup");

	cci::rt::Process *p = new cci::rt::Process(comm, argc, argv, conf);
	p->setup();
	t2 = cci::common::event::timestampInUS();
	if (logsession != NULL) logsession->log(cci::common::event(0, std::string("proc setup"), t1, t2, std::string(), ::cci::common::event::NETWORK_WAIT));

	p->run();

	t1 = cci::common::event::timestampInUS();
	p->teardown();
	t2 = cci::common::event::timestampInUS();
	if (logsession != NULL) logsession->log(cci::common::event(0, std::string("proc teardown"), t1, t2, std::string(), ::cci::common::event::NETWORK_WAIT));

	if (p != NULL) delete p;
	MPI_Barrier(comm);

	t4= cci::common::event::timestampInUS();
	if (rank ==0) cci::common::Debug::print("finished processing in %lu us.\n", long(t4-t3));

	writeLog();


	exit(0);

	return 0;

}
Ejemplo n.º 16
0
int main(int argc, char **argv) {
	int ret;
	int provided;
	int i;
	struct mdhim_t *md;
	int total = 0;
	struct mdhim_brm_t *brm, *brmp;
	struct mdhim_bgetrm_t *bgrm, *bgrmp;
	struct timeval start_tv, end_tv;
	char     *db_path = "./";
	char     *db_name = "mdhimTstDB-";
	int      dbug = MLOG_DBG; //MLOG_CRIT=1, MLOG_DBG=2
	mdhim_options_t *db_opts; // Local variable for db create options to be passed
	int db_type = LEVELDB; //(data_store.h) 
	long double put_time = 0;
	long double get_time = 0;
	struct index_t *secondary_local_index;
	struct secondary_bulk_info *secondary_info;
	int num_keys[KEYS];
	MPI_Comm comm;

	// Create options for DB initialization
	db_opts = mdhim_options_init();
	mdhim_options_set_db_path(db_opts, db_path);
	mdhim_options_set_db_name(db_opts, db_name);
	mdhim_options_set_db_type(db_opts, db_type);
	mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY);
	mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE);
        mdhim_options_set_server_factor(db_opts, 4);
	mdhim_options_set_debug_level(db_opts, dbug);

	//Initialize MPI with multiple thread support
	ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	if (ret != MPI_SUCCESS) {
		printf("Error initializing MPI with threads\n");
		exit(1);
	}

	//Quit if MPI didn't initialize with multiple threads
	if (provided != MPI_THREAD_MULTIPLE) {
                printf("Not able to enable MPI_THREAD_MULTIPLE mode\n");
                exit(1);
        }

	gettimeofday(&start_tv, NULL);

	//Initialize MDHIM
	comm = MPI_COMM_WORLD;
	md = mdhimInit(&comm, db_opts);
	if (!md) {
		printf("Error initializing MDHIM\n");
		MPI_Abort(MPI_COMM_WORLD, ret);
		exit(1);
	}	

	key_lens = malloc(sizeof(int) * KEYS);
	value_lens = malloc(sizeof(int) * KEYS);
	keys = malloc(sizeof(uint64_t *) * KEYS);
	values = malloc(sizeof(uint64_t *) * KEYS);
	secondary_key_lens = malloc(sizeof(int *) * KEYS);
	secondary_keys = malloc(sizeof(uint64_t **) * KEYS);
	memset(secondary_keys, 0, sizeof(uint64_t **) * KEYS);

	/* Primary and secondary key entries */
	MPI_Barrier(MPI_COMM_WORLD);	
	total = 0;
	secondary_local_index = create_local_index(md, LEVELDB, 
						   MDHIM_LONG_INT_KEY, NULL);
	for (i = 0; i < KEYS; i++) {
		num_keys[i] = 1;
	}
	while (total != TOTAL_KEYS) {
		//Populate the primary keys and values to insert
		gen_keys_values(md->mdhim_rank, total);
		secondary_info = mdhimCreateSecondaryBulkInfo(secondary_local_index, 
							      (void ***) secondary_keys, 
							      secondary_key_lens, num_keys, 
							      SECONDARY_LOCAL_INFO);
		//record the start time
		start_record(&start_tv);	
		//Insert the primary keys into MDHIM
		brm = mdhimBPut(md, (void **) keys, key_lens,  
				(void **) values, value_lens, KEYS, 
				NULL, secondary_info);
		//Record the end time
		end_record(&end_tv);
		//Add the final time
		add_time(&start_tv, &end_tv, &put_time);
                if (!brm || brm->error) {
                        printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank);
                } 
		while (brm) {
			if (brm->error < 0) {
				printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank);
			}
	
			brmp = brm->next;
			//Free the message
			mdhim_full_release_msg(brm);
			brm = brmp;
		}
	
		free_key_values();
		mdhimReleaseSecondaryBulkInfo(secondary_info);
		total += KEYS;
	}

	/* End primary and secondary entries */


	MPI_Barrier(MPI_COMM_WORLD);
	/* End secondary key entries */

	//Commit the database
	ret = mdhimCommit(md, md->primary_index);
	if (ret != MDHIM_SUCCESS) {
		printf("Error committing MDHIM database\n");
	} else {
		printf("Committed MDHIM database\n");
	}

	//Get the stats for the secondary index so the client figures out who to query
	ret = mdhimStatFlush(md, secondary_local_index);
	if (ret != MDHIM_SUCCESS) {
		printf("Error getting stats\n");
	} else {
		printf("Got stats\n");
	}

	MPI_Barrier(MPI_COMM_WORLD);
	//Retrieve the primary key's values from the secondary key
	total = 0;
	while (total != TOTAL_KEYS) {
		//Populate the keys and values to retrieve
		gen_keys_values(md->mdhim_rank, total);
		start_record(&start_tv);
		//Get the values back for each key inserted
		for (i = 0; i < KEYS; i++) {
			bgrm = mdhimBGet(md, secondary_local_index, 
					 (void **) secondary_keys[i], secondary_key_lens[i], 
					 1, MDHIM_GET_PRIMARY_EQ);
		}

		end_record(&end_tv);
		add_time(&start_tv, &end_tv, &get_time);
		while (bgrm) {
			/*	if (!bgrm || bgrm->error) {
				printf("Rank: %d - Error retrieving values starting at: %llu", 
				       md->mdhim_rank, (long long unsigned int) *keys[0]);
				       } */
	
			//Validate that the data retrieved is the correct data
			for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) {   				
				if (!bgrm->value_lens[i]) {
					printf("Rank: %d - Got an empty value for key: %llu", 
					       md->mdhim_rank, *(long long unsigned int *)bgrm->keys[i]);
					continue;
				}
			}

			bgrmp = bgrm;
			bgrm = bgrm->next;
			mdhim_full_release_msg(bgrmp);
		}

		free_key_values();
		total += KEYS;
	}

	free(key_lens);
	free(keys);
	free(values);
	free(value_lens);
	free(secondary_key_lens);
	free(secondary_keys);
	MPI_Barrier(MPI_COMM_WORLD);

	//Quit MDHIM
	ret = mdhimClose(md);
	gettimeofday(&end_tv, NULL);
	if (ret != MDHIM_SUCCESS) {
		printf("Error closing MDHIM\n");
	}
	
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	printf("Took: %Lf seconds to put %d keys\n", 
	       put_time, TOTAL_KEYS * 2);
	printf("Took: %Lf seconds to get %d keys/values\n", 
	       get_time, TOTAL_KEYS * 2);

	return 0;
}
Ejemplo n.º 17
0
Archivo: pinit.c Proyecto: hansec/petsc
/*@C
   PetscInitialize - Initializes the PETSc database and MPI.
   PetscInitialize() calls MPI_Init() if that has yet to be called,
   so this routine should always be called near the beginning of
   your program -- usually the very first line!

   Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set

   Input Parameters:
+  argc - count of number of command line arguments
.  args - the command line arguments
.  file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL to not check for
          code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files
-  help - [optional] Help message to print, use NULL for no message

   If you wish PETSc code to run ONLY on a subcommunicator of MPI_COMM_WORLD, create that
   communicator first and assign it to PETSC_COMM_WORLD BEFORE calling PetscInitialize(). Thus if you are running a
   four process job and two processes will run PETSc and have PetscInitialize() and PetscFinalize() and two process will not,
   then do this. If ALL processes in the job are using PetscInitialize() and PetscFinalize() then you don't need to do this, even
   if different subcommunicators of the job are doing different things with PETSc.

   Options Database Keys:
+  -start_in_debugger [noxterm,dbx,xdb,gdb,...] - Starts program in debugger
.  -on_error_attach_debugger [noxterm,dbx,xdb,gdb,...] - Starts debugger when error detected
.  -on_error_emacs <machinename> causes emacsclient to jump to error file
.  -on_error_abort calls abort() when error detected (no traceback)
.  -on_error_mpiabort calls MPI_abort() when error detected
.  -error_output_stderr prints error messages to stderr instead of the default stdout
.  -error_output_none does not print the error messages (but handles errors in the same way as if this was not called)
.  -debugger_nodes [node1,node2,...] - Indicates nodes to start in debugger
.  -debugger_pause [sleeptime] (in seconds) - Pauses debugger
.  -stop_for_debugger - Print message on how to attach debugger manually to
                        process and wait (-debugger_pause) seconds for attachment
.  -malloc - Indicates use of PETSc error-checking malloc (on by default for debug version of libraries)
.  -malloc no - Indicates not to use error-checking malloc
.  -malloc_debug - check for memory corruption at EVERY malloc or free
.  -malloc_test - like -malloc_dump -malloc_debug, but only active for debugging builds
.  -fp_trap - Stops on floating point exceptions (Note that on the
              IBM RS6000 this slows code by at least a factor of 10.)
.  -no_signal_handler - Indicates not to trap error signals
.  -shared_tmp - indicates /tmp directory is shared by all processors
.  -not_shared_tmp - each processor has own /tmp
.  -tmp - alternative name of /tmp directory
.  -get_total_flops - returns total flops done by all processors
.  -memory_info - Print memory usage at end of run
-  -server <port> - start PETSc webserver (default port is 8080)

   Options Database Keys for Profiling:
   See the <a href="../../docs/manual.pdf#nameddest=ch_profiling">profiling chapter of the users manual</a> for details.
+  -info <optional filename> - Prints verbose information to the screen
.  -info_exclude <null,vec,mat,pc,ksp,snes,ts> - Excludes some of the verbose messages
.  -log_sync - Log the synchronization in scatters, inner products and norms
.  -log_trace [filename] - Print traces of all PETSc calls to the screen (useful to determine where a program
        hangs without running in the debugger).  See PetscLogTraceBegin().
.  -log_summary [filename] - Prints summary of flop and timing information to screen. If the filename is specified the
        summary is written to the file.  See PetscLogView().
.  -log_summary_python [filename] - Prints data on of flop and timing usage to a file or screen. See PetscLogPrintSViewPython().
.  -log_all [filename] - Logs extensive profiling information  See PetscLogDump().
.  -log [filename] - Logs basic profiline information  See PetscLogDump().
-  -log_mpe [filename] - Creates a logfile viewable by the utility Jumpshot (in MPICH distribution)

    Only one of -log_trace, -log_summary, -log_all, -log, or -log_mpe may be used at a time

   Environmental Variables:
+   PETSC_TMP - alternative tmp directory
.   PETSC_SHARED_TMP - tmp is shared by all processes
.   PETSC_NOT_SHARED_TMP - each process has its own private tmp
.   PETSC_VIEWER_SOCKET_PORT - socket number to use for socket viewer
-   PETSC_VIEWER_SOCKET_MACHINE - machine to use for socket viewer to connect to


   Level: beginner

   Notes:
   If for some reason you must call MPI_Init() separately, call
   it before PetscInitialize().

   Fortran Version:
   In Fortran this routine has the format
$       call PetscInitialize(file,ierr)

+   ierr - error return code
-  file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL_CHARACTER to not check for
          code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files

   Important Fortran Note:
   In Fortran, you MUST use NULL_CHARACTER to indicate a
   null character string; you CANNOT just use NULL as
   in the C version. See the <a href="../../docs/manual.pdf">users manual</a> for details.

   If your main program is C but you call Fortran code that also uses PETSc you need to call PetscInitializeFortran() soon after
   calling PetscInitialize().

   Concepts: initializing PETSc

.seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscInitializeNoArguments()

@*/
PetscErrorCode  PetscInitialize(int *argc,char ***args,const char file[],const char help[])
{
  PetscErrorCode ierr;
  PetscMPIInt    flag, size;
  PetscInt       nodesize;
  PetscBool      flg;
  char           hostname[256];

  PetscFunctionBegin;
  if (PetscInitializeCalled) PetscFunctionReturn(0);

  /* these must be initialized in a routine, not as a constant declaration*/
  PETSC_STDOUT = stdout;
  PETSC_STDERR = stderr;

  ierr = PetscOptionsCreate();CHKERRQ(ierr);

  /*
     We initialize the program name here (before MPI_Init()) because MPICH has a bug in
     it that it sets args[0] on all processors to be args[0] on the first processor.
  */
  if (argc && *argc) {
    ierr = PetscSetProgramName(**args);CHKERRQ(ierr);
  } else {
    ierr = PetscSetProgramName("Unknown Name");CHKERRQ(ierr);
  }

  ierr = MPI_Initialized(&flag);CHKERRQ(ierr);
  if (!flag) {
    if (PETSC_COMM_WORLD != MPI_COMM_NULL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You cannot set PETSC_COMM_WORLD if you have not initialized MPI first");
#if defined(PETSC_HAVE_MPI_INIT_THREAD)
    {
      PetscMPIInt provided;
      ierr = MPI_Init_thread(argc,args,MPI_THREAD_FUNNELED,&provided);CHKERRQ(ierr);
    }
#else
    ierr = MPI_Init(argc,args);CHKERRQ(ierr);
#endif
    PetscBeganMPI = PETSC_TRUE;
  }
  if (argc && args) {
    PetscGlobalArgc = *argc;
    PetscGlobalArgs = *args;
  }
  PetscFinalizeCalled = PETSC_FALSE;

  if (PETSC_COMM_WORLD == MPI_COMM_NULL) PETSC_COMM_WORLD = MPI_COMM_WORLD;
  ierr = MPI_Comm_set_errhandler(PETSC_COMM_WORLD,MPI_ERRORS_RETURN);CHKERRQ(ierr);

  /* Done after init due to a bug in MPICH-GM? */
  ierr = PetscErrorPrintfInitialize();CHKERRQ(ierr);

  ierr = MPI_Comm_rank(MPI_COMM_WORLD,&PetscGlobalRank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(MPI_COMM_WORLD,&PetscGlobalSize);CHKERRQ(ierr);

  MPIU_BOOL = MPI_INT;
  MPIU_ENUM = MPI_INT;

  /*
     Initialized the global complex variable; this is because with
     shared libraries the constructors for global variables
     are not called; at least on IRIX.
  */
#if defined(PETSC_HAVE_COMPLEX)
  {
#if defined(PETSC_CLANGUAGE_CXX)
    PetscComplex ic(0.0,1.0);
    PETSC_i = ic;
#elif defined(PETSC_CLANGUAGE_C)
    PETSC_i = _Complex_I;
#endif
  }

#if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
  ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_contiguous(2,MPI_FLOAT,&MPIU_C_COMPLEX);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_C_COMPLEX);CHKERRQ(ierr);
#endif
#endif /* PETSC_HAVE_COMPLEX */

  /*
     Create the PETSc MPI reduction operator that sums of the first
     half of the entries and maxes the second half.
  */
  ierr = MPI_Op_create(PetscMaxSum_Local,1,&PetscMaxSum_Op);CHKERRQ(ierr);

#if defined(PETSC_USE_REAL___FLOAT128)
  ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU___FLOAT128);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU___FLOAT128);CHKERRQ(ierr);
#if defined(PETSC_HAVE_COMPLEX)
  ierr = MPI_Type_contiguous(4,MPI_DOUBLE,&MPIU___COMPLEX128);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU___COMPLEX128);CHKERRQ(ierr);
#endif
  ierr = MPI_Op_create(PetscMax_Local,1,&MPIU_MAX);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscMin_Local,1,&MPIU_MIN);CHKERRQ(ierr);
#endif

#if (defined(PETSC_HAVE_COMPLEX) && !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)) || defined(PETSC_USE_REAL___FLOAT128)
  ierr = MPI_Op_create(PetscSum_Local,1,&MPIU_SUM);CHKERRQ(ierr);
#endif

  ierr = MPI_Type_contiguous(2,MPIU_SCALAR,&MPIU_2SCALAR);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_2SCALAR);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscADMax_Local,1,&PetscADMax_Op);CHKERRQ(ierr);
  ierr = MPI_Op_create(PetscADMin_Local,1,&PetscADMin_Op);CHKERRQ(ierr);

#if defined(PETSC_USE_64BIT_INDICES) || !defined(MPI_2INT)
  ierr = MPI_Type_contiguous(2,MPIU_INT,&MPIU_2INT);CHKERRQ(ierr);
  ierr = MPI_Type_commit(&MPIU_2INT);CHKERRQ(ierr);
#endif

  /*
     Attributes to be set on PETSc communicators
  */
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelCounter,&Petsc_Counter_keyval,(void*)0);CHKERRQ(ierr);
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm,&Petsc_InnerComm_keyval,(void*)0);CHKERRQ(ierr);
  ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm,&Petsc_OuterComm_keyval,(void*)0);CHKERRQ(ierr);

  /*
     Build the options database
  */
  ierr = PetscOptionsInsert(argc,args,file);CHKERRQ(ierr);


  /*
     Print main application help message
  */
  ierr = PetscOptionsHasName(NULL,"-help",&flg);CHKERRQ(ierr);
  if (help && flg) {
    ierr = PetscPrintf(PETSC_COMM_WORLD,help);CHKERRQ(ierr);
  }
  ierr = PetscOptionsCheckInitial_Private();CHKERRQ(ierr);

  /* SHOULD PUT IN GUARDS: Make sure logging is initialized, even if we do not print it out */
#if defined(PETSC_USE_LOG)
  ierr = PetscLogBegin_Private();CHKERRQ(ierr);
#endif

  /*
     Load the dynamic libraries (on machines that support them), this registers all
     the solvers etc. (On non-dynamic machines this initializes the PetscDraw and PetscViewer classes)
  */
  ierr = PetscInitialize_DynamicLibraries();CHKERRQ(ierr);

  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = PetscInfo1(0,"PETSc successfully started: number of processors = %d\n",size);CHKERRQ(ierr);
  ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
  ierr = PetscInfo1(0,"Running on machine: %s\n",hostname);CHKERRQ(ierr);

  ierr = PetscOptionsCheckInitial_Components();CHKERRQ(ierr);
  /* Check the options database for options related to the options database itself */
  ierr = PetscOptionsSetFromOptions();CHKERRQ(ierr);

#if defined(PETSC_USE_PETSC_MPI_EXTERNAL32)
  /*
      Tell MPI about our own data representation converter, this would/should be used if extern32 is not supported by the MPI

      Currently not used because it is not supported by MPICH.
  */
#if !defined(PETSC_WORDS_BIGENDIAN)
  ierr = MPI_Register_datarep((char*)"petsc",PetscDataRep_read_conv_fn,PetscDataRep_write_conv_fn,PetscDataRep_extent_fn,NULL);CHKERRQ(ierr);
#endif
#endif

  ierr = PetscOptionsGetInt(NULL,"-hmpi_spawn_size",&nodesize,&flg);CHKERRQ(ierr);
  if (flg) {
#if defined(PETSC_HAVE_MPI_COMM_SPAWN)
    ierr = PetscHMPISpawn((PetscMPIInt) nodesize);CHKERRQ(ierr); /* worker nodes never return from here; they go directly to PetscEnd() */
#else
    SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PETSc built without MPI 2 (MPI_Comm_spawn) support, use -hmpi_merge_size instead");
#endif
  } else {
    ierr = PetscOptionsGetInt(NULL,"-hmpi_merge_size",&nodesize,&flg);CHKERRQ(ierr);
    if (flg) {
      ierr = PetscHMPIMerge((PetscMPIInt) nodesize,NULL,NULL);CHKERRQ(ierr);
      if (PetscHMPIWorker) { /* if worker then never enter user code */
        PetscInitializeCalled = PETSC_TRUE;
        PetscEnd();
      }
    }
  }

#if defined(PETSC_HAVE_CUDA)
  {
    PetscMPIInt p;
    for (p = 0; p < PetscGlobalSize; ++p) {
      if (p == PetscGlobalRank) cublasInit();
      ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
    }
  }
#endif

  ierr = PetscOptionsHasName(NULL,"-python",&flg);CHKERRQ(ierr);
  if (flg) {
    PetscInitializeCalled = PETSC_TRUE;
    ierr = PetscPythonInitialize(NULL,NULL);CHKERRQ(ierr);
  }

  ierr = PetscThreadCommInitializePackage();CHKERRQ(ierr);

  /*
      Setup building of stack frames for all function calls
  */
#if defined(PETSC_USE_DEBUG)
  PetscThreadLocalRegister((PetscThreadKey*)&petscstack); /* Creates petscstack_key if needed */
  ierr = PetscStackCreate();CHKERRQ(ierr);
#endif

#if defined(PETSC_SERIALIZE_FUNCTIONS)
  ierr = PetscFPTCreate(10000);CHKERRQ(ierr);
#endif

  /*
      Once we are completedly initialized then we can set this variables
  */
  PetscInitializeCalled = PETSC_TRUE;
  PetscFunctionReturn(0);
}
Ejemplo n.º 18
0
int main(int argc, char **argv)
{
#ifdef HAVE_MPI
    int required_thread_support=MPI_THREAD_SINGLE;
    int provided_thread_support;
    MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support);
    assert(required_thread_support==provided_thread_support);
#endif

    bool verbose = false;
    if(argc>1) {
        verbose = std::string(argv[1])=="-v";
    }

#ifdef HAVE_VTK
    Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box10x10x10.vtu");
    mesh->create_boundary();

    MetricField<double,3> metric_field(*mesh);

    size_t NNodes = mesh->get_number_nodes();

    std::vector<double> psi(NNodes);
    for(size_t i=0; i<NNodes; i++)
        psi[i] =
            pow(mesh->get_coords(i)[0], 4) +
            pow(mesh->get_coords(i)[1], 4) +
            pow(mesh->get_coords(i)[2], 4);

    metric_field.add_field(&(psi[0]), 0.001);
    metric_field.update_mesh();

    Refine<double,3> adapt(*mesh);

    double tic = get_wtime();
    for(int i=0; i<2; i++)
        adapt.refine(sqrt(2.0));
    double toc = get_wtime();

    if(verbose)
        mesh->verify();

    mesh->defragment();

    VTKTools<double>::export_vtu("../data/test_refine_3d", mesh);

    double qmean = mesh->get_qmean();
    double qmin = mesh->get_qmin();
    int nelements = mesh->get_number_elements();

    if(verbose)
        std::cout<<"Refine loop time:    "<<toc-tic<<std::endl
                 <<"Number elements:     "<<nelements<<std::endl
                 <<"Quality mean:        "<<qmean<<std::endl
                 <<"Quality min:         "<<qmin<<std::endl;

    long double area = mesh->calculate_area();
    long double volume = mesh->calculate_volume();

    long double ideal_area(6), ideal_volume(1);
    std::cout<<"Checking area == 6: ";
    if(std::abs(area-ideal_area)/std::max(area, ideal_area)<DBL_EPSILON)
        std::cout<<"pass"<<std::endl;
    else
        std::cout<<"fail (area="<<area<<")"<<std::endl;

    std::cout<<"Checking volume == 1: ";
    if(std::abs(volume-ideal_volume)/std::max(volume, ideal_volume)<DBL_EPSILON)
        std::cout<<"pass"<<std::endl;
    else
        std::cout<<"fail (volume="<<volume<<")"<<std::endl;

    delete mesh;
#else
    std::cerr<<"Pragmatic was configured without VTK"<<std::endl;
#endif

#ifdef HAVE_MPI
    MPI_Finalize();
#endif

    return 0;
}
Ejemplo n.º 19
0
int main(int argc,char *argv[])
{
  int j,j_max,k,k_max = 1;
#ifdef HAVE_LIBLEMON
  paramsXlfInfo *xlfInfo;
#endif
  int status = 0;
  
  static double t1,t2,dt,sdt,dts,qdt,sqdt;
  double antioptaway=0.0;

#ifdef TM_USE_MPI
  static double dt2;
  
  DUM_DERI = 6;
  DUM_MATRIX = DUM_DERI+8;
  NO_OF_SPINORFIELDS = DUM_MATRIX+2;

#  ifdef TM_USE_OMP
  int mpi_thread_provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#  else
  MPI_Init(&argc, &argv);
#  endif
  MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);

#else
  g_proc_id = 0;
#endif

  g_rgi_C1 = 1.; 

    /* Read the input file */
  if((status = read_input("benchmark.input")) != 0) {
    fprintf(stderr, "Could not find input file: benchmark.input\nAborting...\n");
    exit(-1);
  }

#ifdef TM_USE_OMP
  init_openmp();
#endif

  tmlqcd_mpi_init(argc, argv);


  
  if(g_proc_id==0) {
#ifdef SSE
    printf("# The code was compiled with SSE instructions\n");
#endif
#ifdef SSE2
    printf("# The code was compiled with SSE2 instructions\n");
#endif
#ifdef SSE3
    printf("# The code was compiled with SSE3 instructions\n");
#endif
#ifdef P4
    printf("# The code was compiled for Pentium4\n");
#endif
#ifdef OPTERON
    printf("# The code was compiled for AMD Opteron\n");
#endif
#ifdef _GAUGE_COPY
    printf("# The code was compiled with -D_GAUGE_COPY\n");
#endif
#ifdef BGL
    printf("# The code was compiled for Blue Gene/L\n");
#endif
#ifdef BGP
    printf("# The code was compiled for Blue Gene/P\n");
#endif
#ifdef _USE_HALFSPINOR
    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
#endif    
#ifdef _USE_SHMEM
    printf("# The code was compiled with -D_USE_SHMEM\n");
#  ifdef _PERSISTENT
    printf("# The code was compiled for persistent MPI calls (halfspinor only)\n");
#  endif
#endif
#ifdef TM_USE_MPI
#  ifdef _NON_BLOCKING
    printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n");
#  endif
#endif
    printf("\n");
    fflush(stdout);
  }
  
  
#ifdef _GAUGE_COPY
  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
#else
  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
#endif
  init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);

  if(even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max);
  }

  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(0);
  }
  j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand);
  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for moment fields! Aborting...\n");
    exit(0);
  }
  
  if(g_proc_id == 0) {
    fprintf(stdout,"# The number of processes is %d \n",g_nproc);
    printf("# The lattice size is %d x %d x %d x %d\n",
	   (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ));
    printf("# The local lattice size is %d x %d x %d x %d\n", 
	   (int)(T), (int)(LX), (int)(LY),(int) LZ);
    if(even_odd_flag) {
      printf("# benchmarking the even/odd preconditioned Dirac operator\n");
    }
    else {
      printf("# benchmarking the standard Dirac operator\n");
    }
    fflush(stdout);
  }
  
  /* define the geometry */
  geometry();
  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n");
    exit(0);
  }
  if(g_sloppy_precision_flag == 1) {
    g_sloppy_precision = 1;
    j = init_dirac_halfspinor32();
    if ( j!= 0) {
      fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n");
      exit(0);
    }
  }
#  if (defined _PERSISTENT)
  init_xchange_halffield();
#  endif
#endif  

  status = check_geometry();
  if (status != 0) {
    fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
    exit(1);
  }
#if (defined TM_USE_MPI && !(defined _USE_SHMEM))
  check_xchange(); 
#endif

  start_ranlux(1, 123456);
  random_gauge_field(reproduce_randomnumber_flag, g_gauge_field);

#ifdef TM_USE_MPI
  /*For parallelization: exchange the gaugefield */
  xchange_gauge(g_gauge_field);
#endif

  if(even_odd_flag) {
    sdt=0.; sqdt=0.0;
    /*initialize the pseudo-fermion fields*/
    for (k = 0; k < k_max; k++) {
      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
    }
    
    j_max=512;
    antioptaway=0.0;
    /* compute approximately how many applications we need to do to get a reliable measurement */
#ifdef TM_USE_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    t1 = gettime();
    for (j=0;j<j_max;j++) {
      for (k=0;k<k_max;k++) {
        Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]);
        Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
        antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0);
      }
    }
    dt = gettime()-t1;
    // division by g_nproc because we will average over processes
    j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef TM_USE_MPI
    MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
    j_max = j;
#endif



    /* perform the actual benchmark */
#ifdef TM_USE_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    t1 = gettime();
    antioptaway=0.0;
    for (j=0;j<j_max;j++) {
      for (k=0;k<k_max;k++) {
        Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]);
        Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
        antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0);
      }
    }
    dt = gettime()-t1;
#ifdef TM_USE_MPI
    MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
    sdt = dt;
#endif
    
    qdt=dt*dt;
#ifdef TM_USE_MPI
    MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
    sqdt = qdt;
#endif

    sdt=sdt/((double)g_nproc);
    sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
     
    dts=dt;
    sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME)));
    sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME)));
    
    if(g_proc_id==0) {
      printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
      printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
#ifdef TM_USE_MPI
      printf("# Communication switched on: \n");
#endif
      printf("\n%12d Mflops(total) %8d Mflops(process)", (int)(g_nproc*1608.0f/sdt),(int)(1608.0f/sdt));
#ifdef TM_USE_OMP
      printf(" %8d Mflops(thread)",(int)(1608.0f/(omp_num_threads*sdt)));
#endif
      printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3)); 
      fflush(stdout);
    }
    
#ifdef TM_USE_MPI
    /* isolated computation */
    t1 = gettime();
    antioptaway=0.0;
    for (j=0;j<j_max;j++) {
      for (k=0;k<k_max;k++) {
        Hopping_Matrix_nocom(0, g_spinor_field[k+k_max], g_spinor_field[k]);
        Hopping_Matrix_nocom(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
        antioptaway += creal(g_spinor_field[2*k_max][0].s0.c0);
      }
    }
    t2 = gettime();
    dt2 = t2-t1;
    /* compute the bandwidth */
    dt=dts-dt2;
    MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    sdt=sdt/((double)g_nproc);
    MPI_Allreduce (&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    dt=dt/((double)g_nproc);
    dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME)));
    if(g_proc_id==0) {
      printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway);
      printf("# Communication switched off: \n\n%12d Mflops(total) %8d Mflops(process)", (int)(g_nproc*1608.0f/dt),(int)(1608.0f/dt));
#ifdef TM_USE_OMP
      printf(" %8d Mflops(thread)",(int)(1608.0f/(omp_num_threads*dt)));
#endif
      printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3)); 
      fflush(stdout);
    }
    sdt=sdt/((double)k_max);
    sdt=sdt/((double)j_max);
    sdt=sdt/((double)(2*SLICE));
    if(g_proc_id==0) {
      printf("# The size of the package is %d bytes.\n",(SLICE)*192);
#ifdef _USE_HALFSPINOR
      printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192./sdt/1024/1024, 192./sdt/1024./1024);
#else
      printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 2.*192./sdt/1024/1024, 2.*192./sdt/1024./1024);
#endif
    }
#endif
    fflush(stdout);
  }
  else {
    /* the non even/odd case now */
    /*initialize the pseudo-fermion fields*/
    j_max=128;
    sdt=0.;
    for (k=0;k<k_max;k++) {
      random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
    }
    
    /* estimate a reasonable number of applications to get a reliable measurement */
#ifdef TM_USE_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    t1 = gettime();
    for (j=0;j<j_max;j++) {
      for (k=0;k<k_max;k++) {
        D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
        antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
      }
    }
    t2 = gettime();
    dt=t2-t1;
    // division by g_nproc because we will average over processes using  MPI_SUM
    j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef TM_USE_MPI
    MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
    j_max = j;
#endif

    /* perform the actual measurement */
#ifdef TM_USE_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    t1 = gettime();
    for (j=0;j<j_max;j++) {
      for (k=0;k<k_max;k++) {
        D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
        antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
      }
    }
    t2 = gettime();
    dt=t2-t1;
#ifdef TM_USE_MPI
    MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
    sdt = dt;
#endif
    qdt=dt*dt;
#ifdef TM_USE_MPI
    MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
    sqdt = qdt;
#endif
    sdt=sdt/((double)g_nproc);
    sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
    dts=dt;
    sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME)));
    sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME)));

    if(g_proc_id==0) {
      printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
      printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n\n", sdt, sqdt, j_max);
      printf(" %12d Mflops(total) %8d Mflops(process)", (int)(1680.0f*g_nproc/sdt),(int)(1680.0f/sdt));
#ifdef TM_USE_OMP
      printf(" %8d Mflops(thread)",(int)(1680.0f/(omp_num_threads*sdt)));
#endif
      printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3)); 
      fflush(stdout);
    }
  }

#ifdef HAVE_LIBLEMON
  if(g_proc_id==0) {
    printf("# Performing parallel IO test ...\n");
  }
  xlfInfo = construct_paramsXlfInfo(0.5, 0);
  write_gauge_field( "conf.test", 64, xlfInfo);
  free(xlfInfo);
  if(g_proc_id==0) {
    printf("# done ...\n");
  }
#endif


#ifdef TM_USE_OMP
  free_omp_accumulators();
#endif
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
#ifdef TM_USE_MPI
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
#endif
  return(0);
}
Ejemplo n.º 20
0
FORT_DLL_SPEC void FORT_CALL mpi_init_thread_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *ierr ){
    mpirinitf_(); MPIR_F_NeedInit = 0;
    *ierr = MPI_Init_thread( 0, 0, (int)*v1, v2 );
}
Ejemplo n.º 21
0
int main ( int argc, char* argv[] )
{
    /* parse command line arguments */
    std::string anArg;
    std::string modelRoot;
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
#ifdef OGS_FEM_IPQC
    int nb_ddc=0; //number of cores for DDC related processes
#endif
#endif

    for( int i = 1; i < argc; i++ )
    {
        anArg = std::string( argv[i] );
        if( anArg == "--help" || anArg == "-h")
        {
            std::cout << "Usage: ogs [MODEL_ROOT] [OPTIONS]\n"
                      << "Where OPTIONS are:\n"
                      << "  -h [--help]               print this message and exit\n"
                      << "  -b [--build-info]         print build info and exit\n"
                      << "  --output-directory DIR    put output files into DIR\n"
                      << "  --version                 print ogs version and exit" << "\n";
            continue;
        }
        if( anArg == "--build-info" || anArg == "-b" )
        {
            std::cout << "ogs version: " << BuildInfo::OGS_VERSION << "\n"
                      << "ogs date: " << BuildInfo::OGS_DATE << "\n";
            std::cout << "git commit info: " << BuildInfo::GIT_COMMIT_INFO << "\n";
            std::cout << "build timestamp: " << BuildInfo::BUILD_TIMESTAMP << "\n";
            continue;
        }
        if( anArg == "--version" )
        {
            std::cout << BuildInfo::OGS_VERSION << "\n";
            continue;
        }
        if( anArg == "--model-root" || anArg == "-m" )
        {
            if (i+1 >= argc) {
                std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl;
                std::exit(EXIT_FAILURE);
            }
            modelRoot = std::string( argv[++i] );
            continue;
        }
        if (anArg == "--output-directory")
        {
            if (i+1 >= argc) {
                std::cerr << "Error: Parameter " << anArg << " needs an additional argument" << std::endl;
                std::exit(EXIT_FAILURE);
            }
            std::string path = argv[++i];

            if (! path.empty()) defaultOutputPath = path;
            continue;
        }
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
#ifdef OGS_FEM_IPQC
        std::string decompositions;
        if( anArg == "--domain-decomposition" || anArg == "-ddc" )
        {
            decompositions = std::string( argv[++i] );
            nb_ddc = atoi(decompositions.c_str());
            continue;
        }
#endif
#endif
        // anything left over must be the model root, unless already found
        if ( modelRoot == "" )
            modelRoot = std::string( argv[i] );
    } // end of parse argc loop

    if( argc > 1 && modelRoot == "" ) // non-interactive mode and no model given
        exit(0);             // e.g. just wanted the build info

    std::string solver_pkg_name = BuildInfo::SOLVER_PACKAGE_NAME;
    // No default linear solver package is in use.
    if(solver_pkg_name.find("Default") == std::string::npos)
    {
        std::cout << "\nWarning: " << solver_pkg_name
                  << " other than the OGS default one is in use." <<std::endl;
        std::cout << "         The solver setting may need to be adjusted for the solution accuracy!" << std::endl;
    }

    char* dateiname(NULL);
#ifdef SUPERCOMPUTER
// *********************************************************************
// buffered output ... important for performance on cray
// (unbuffered output is limited to 10 bytes per second)
// [email protected] 11.10.2007

    char buf[1024 * 1024];
    int bsize;

    bsize = 1024 * 1024; // question: what happens if buffer is full?
    // according to documentation the buffer is flushed when full.
    // If we have a lot of output, increasing buffer is usefull.
    if(bsize > 0)
//        bufstd = malloc(bsize);
        setvbuf(stdout, buf, _IOFBF, bsize);
    //**********************************************************************
#endif
    /*---------- MPI Initialization ----------------------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || \
	defined(USE_MPI_GEMS) || defined(USE_MPI_KRC)
    printf("Before MPI_Init\n");
#if defined(USE_MPI_GEMS)
    int prov;
    MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED, &prov);
#else
    MPI_Init(&argc,&argv);
#endif
    MPI_Barrier (MPI_COMM_WORLD); // 12.09.2007 WW
    elapsed_time_mpi = -MPI_Wtime(); // 12.09.2007 WW
#ifdef OGS_FEM_IPQC
    bool splitcomm_flag;
    int np;
    MPI_Comm_size(MPI_COMM_WORLD, &np);
    splitcomm_flag = SplitMPI_Communicator::CreateCommunicator(MPI_COMM_WORLD, np, nb_ddc);
    time_ele_paral = 0.0;
#else
    comm_DDC= MPI_COMM_WORLD;
#endif
#endif
    /*---------- MPI Initialization ----------------------------------*/


#ifdef USE_PETSC
    int rank, r_size;
    PetscLogDouble v1,v2;
    char help[] = "OGS with PETSc \n";
    //PetscInitialize(argc, argv, help);
    PetscInitialize(&argc,&argv,(char *)0,help);
    //kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
    PetscTime(&v1);
#else
    PetscGetTime(&v1);
#endif
    MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
    MPI_Comm_size(PETSC_COMM_WORLD, &r_size);
    PetscSynchronizedPrintf(PETSC_COMM_WORLD, "===\nUse PETSc solver");
    PetscSynchronizedPrintf(PETSC_COMM_WORLD, "Number of CPUs: %d, rank: %d\n", r_size, rank);
#endif

    /*---------- LIS solver -----------------------------------------*/
#ifdef LIS
    //Initialization of the lis solver.
    lis_initialize(&argc, &argv);
#endif
    /*========================================================================*/
    /* Kommunikation mit Betriebssystem */
    /* Timer fuer Gesamtzeit starten */
#ifdef TESTTIME
    TStartTimer(0);
#endif
    /* Intro ausgeben */
#if defined(USE_MPI) //WW
    if(myrank == 0)
#endif
#ifdef USE_PETSC
        if(rank == 0 )
#endif

            DisplayStartMsg();
    /* Speicherverwaltung initialisieren */
    if (!InitMemoryTest())
    {
        DisplayErrorMsg("Fehler: Speicherprotokoll kann nicht erstellt werden!");
        DisplayErrorMsg("        Programm vorzeitig beendet!");
        return 1; // LB changed from 0 to 1 because 0 is indicating success
    }
    if( argc == 1 )               // interactive mode

        dateiname = ReadString();
    else                         // non-interactive mode
    {
        if ( argc == 2 )     // a model root was supplied
        {
            dateiname = (char*) Malloc((int)strlen(argv[1]) + 1);
            dateiname = strcpy(dateiname,argv[1]);
        }
        else                // several args supplied
            if( modelRoot != "")
            {
                dateiname = (char*) Malloc( (int) modelRoot.size() + 1 );
                dateiname = strcpy( dateiname, modelRoot.c_str() );
            }
        DisplayMsgLn(dateiname);
    }
    //WW  DisplayMsgLn("");
    //WW  DisplayMsgLn("");
    // ----------23.02.2009. WW-----------------

    // LB Check if file exists
    std::string tmpFilename = dateiname;
    tmpFilename.append(".pcs");
    if(!IsFileExisting(tmpFilename))
    {
        std::cout << " Error: Cannot find file " << dateiname << "\n";
        return 1;
    }

    // If no option is given, output files are placed in the same directory as the input files
    if (defaultOutputPath.empty()) defaultOutputPath = pathDirname(std::string(dateiname));

    FileName = dateiname;
    size_t indexChWin, indexChLinux;
    indexChWin = indexChLinux = 0;
    indexChWin = FileName.find_last_of('\\');
    indexChLinux = FileName.find_last_of('/');
    //
    if(indexChWin != std::string::npos)
        FilePath = FileName.substr(0,indexChWin) + "\\";
    else if(indexChLinux != std::string::npos)
        FilePath = FileName.substr(0,indexChLinux) + "/";
    // ---------------------------WW
    Problem* aproblem = new Problem(dateiname);
#ifdef USE_PETSC
    aproblem->setRankandSize(rank, r_size);
#endif
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS)  || defined(USE_MPI_KRC)
    aproblem->setRankandSize(myrank, mysize);

    if (myrank != MPI_UNDEFINED)
    {
#endif
        aproblem->Euler_TimeDiscretize();
        delete aproblem;
        aproblem = NULL;
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_GEMS)  || defined(USE_MPI_KRC)
    }

#ifdef OGS_FEM_IPQC
    //sending killing signals to ranks of group_IPQC, only when the group exists
    if (splitcomm_flag == true) {
        int signal = -1, rank_IPQC, mysize_IPQC = np - nb_ddc;
        for (int i=0; i< mysize_IPQC; i++) {
            rank_IPQC = mysize + i;
            MPI_Send(&signal, 1, MPI_INT, rank_IPQC, 0, MPI_COMM_WORLD);
        }
    }
#endif

#endif


    if(ClockTimeVec.size()>0)
        ClockTimeVec[0]->PrintTimes();  //CB time
    DestroyClockTime();
#ifdef TESTTIME
#if defined(USE_MPI)
    if(myrank == 0)
#endif
#if defined(USE_PETSC)
        if(rank == 0)
#endif
            std::cout << "Simulation time: " << TGetTimer(0) << "s" << "\n";
#endif
    /* Abspann ausgeben */
    /*--------- MPI Finalize ------------------*/
#if defined(USE_MPI) || defined(USE_MPI_PARPROC) || defined(USE_MPI_REGSOIL) || defined(USE_MPI_KRC)
    elapsed_time_mpi += MPI_Wtime(); // 12.09.2007 WW
    std::cout << "\n *** Total CPU time of parallel modeling: " << elapsed_time_mpi <<
              "\n";                                                                          //WW
    // Count CPU time of post time loop WW
    MPI_Finalize();
#endif
    /*--------- MPI Finalize ------------------*/
    /*--------- LIS Finalize ------------------*/
#ifdef LIS
    lis_finalize();
#endif
    /*--------- LIS Finalize ------------------*/

    free(dateiname);

#ifdef USE_PETSC
    //kg44 quick fix to compile PETSC with version PETSCV3.4
#ifdef USEPETSC34
    PetscTime(&v2);
#else
    PetscGetTime(&v2);
#endif


    PetscPrintf(PETSC_COMM_WORLD,"\t\n>>Total elapsed time by using PETSC:%f s\n",v2-v1);

    PetscFinalize();
#endif

    return 0;
}
Ejemplo n.º 22
0
int main(int argc, char** argv) {
  int provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
  rokko::grid_1d g;
  
  std::cout.precision(10);
  options opt(argc, argv, 16, solver_type::default_solver(), g.get_myrank() == 0);
  if (!opt.valid) MPI_Abort(MPI_COMM_WORLD, 1);
  boost::timer tm;
  MPI_Barrier(g.get_comm());
  double t1 = tm.elapsed();

  // lattice structure
  int n = opt.N;
  int ibond = n;
  std::vector<int> ipair;
  for (int i = 0; i < ibond; ++i) {
    ipair.push_back(i);
    ipair.push_back((i + 1) % n);
  }

  // Hamiltonian parameters
  std::vector<double> bondwt(ibond, -1);
  std::vector<double> zrtio(ibond, 1);

  // table of configurations and Hamiltonian operator
  subspace ss(n, 0);
  hamiltonian hop(ss, ipair, bondwt, zrtio);
  solver_type solver(opt.solver);
  solver.initialize(argc, argv);

  // Hamiltonian matrix
  rokko::distributed_crs_matrix mat(hop.dimension(), hop.dimension(), solver);
  elm2_mpi(hop, mat);
  MPI_Barrier(g.get_comm());
  double t2 = tm.elapsed();
  
  // Eigenvalues
  int nev = 10;
  int blockSize = 5;
  int maxIters = 500;
  double tol = 1.0e-8;
  solver.diagonalize(mat, nev, blockSize, maxIters, tol);
  double t3 = tm.elapsed();
  
  if (g.get_myrank() == 0) {
    std::cout << "[Number of converged eigenpairs]\n\t" << solver.num_conv() << std::endl;
    // std::cout << "[Iteration number]\n\t" << itr << std::endl;
    std::cout << "[Eigenvalues]\n";
    for (int i = 0; i < solver.num_conv(); ++i) std::cout << '\t' << solver.eigenvalue(i);
    std::cout << std::endl;
  }

  // Ground-state eigenvector
  rokko::distributed_vector eigvec;
  solver.eigenvector(0, eigvec);
  if (g.get_myrank() == 0) std::cout << "[Eigenvector components (selected)]";
  std::cout << std::flush;
  MPI_Barrier(g.get_comm());
  int count = 0;
  for (int i = 12; i < ss.dimension(); i += ss.dimension()/20, ++count) {
    if (eigvec.is_gindex(i)) {
      if (count % 4 == 0) std::cout << std::endl;
      std::cout << '\t' << eigvec.get_global(i);
    }
    std::cout << std::flush;
    MPI_Barrier(g.get_comm());
  }
  if (g.get_myrank() == 0) std::cout << std::endl;
  std::cout << std::flush;
  MPI_Barrier(g.get_comm());

  // Precision check and correlation functions
  // double Hexpec = check2(mat, x, 0, v, 0);

  // std::vector<int> npair;
  // npair.push_back(1);
  // npair.push_back(2);
  // std::vector<double> sxx(1), szz(1);
  // xcorr(ss, npair, x, 0, sxx);
  // zcorr(ss, npair, x, 0, szz);
  // std::cout << "[Nearest neighbor correlation functions]\n\t" 
  //           << "sxx : " << sxx[0]
  //           << ", szz : " << szz[0] << std::endl;

  if (g.get_myrank() == 0) {
    std::cerr << "initialize      " << (t2-t1) << " sec\n"
              << "diagonalization " << (t3-t2) << " sec\n";
    // << "check           " << (t4-t3) << " sec\n"
    // << "correlation     " << (t5-t4) << " sec\n";
  }

  MPI_Finalize();
}
Ejemplo n.º 23
0
int main(int argc, char *argv[]) {

   int i, j, rank, nranks;
   int xdim, ydim;
   long bufsize;
   double **buffer;
   double t_start=0.0, t_stop=0.0;
   int count[2], src_stride, trg_stride, stride_level, peer;
   double expected, actual;
   int provided;

   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    if (nranks < 2) {
        printf("%s: Must be run with at least 2 processes\n", argv[0]);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

   ARMCI_Init_args(&argc, &argv);
   
   bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
   buffer = (double **) malloc(sizeof(double *) * nranks);
   ARMCI_Malloc((void **) buffer, bufsize);

   for(i=0; i< bufsize/sizeof(double); i++) {
       *(buffer[rank] + i) = 1.0 + rank;
   }

   if(rank == 0) {
     printf("ARMCI_PutS Latency - local and remote completions - in usec \n");
     printf("%30s %22s %22s\n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion");
     fflush(stdout);
   }

   src_stride = MAX_YDIM*sizeof(double);
   trg_stride = MAX_YDIM*sizeof(double);
   stride_level = 1;

   ARMCI_Barrier();

   for(xdim=1; xdim<=MAX_XDIM; xdim*=2) {

      count[1] = xdim;

      for(ydim=1; ydim<=MAX_YDIM; ydim*=2) {

        count[0] = ydim*sizeof(double); 
      
        if(rank == 0) 
        {
          peer = 1;          
 
          for(i=0; i<ITERATIONS+SKIP; i++) { 

             if(i == SKIP)
                 t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
 
          }
          t_stop = MPI_Wtime();
          ARMCI_Fence(peer);
          char temp[10]; 
          sprintf(temp,"%dX%d", xdim, ydim);
          printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();

          for(i=0; i<ITERATIONS+SKIP; i++) {
  
             if(i == SKIP)
                t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
             ARMCI_Fence(peer);

          }
          t_stop = MPI_Wtime();
          printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();
        }
        else
        {
            peer = 0;

            expected = (1.0 + (double) peer);

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }
            }
            for(i=0; i< bufsize/sizeof(double); i++) {
              *(buffer[rank] + i) = 1.0 + rank;
            }

            ARMCI_Barrier();

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }

              for(i=0; i< bufsize/sizeof(double); i++) {
                *(buffer[rank] + i) = 1.0 + rank;
              }
            }
            ARMCI_Barrier();

        }
        
      }

   }

   ARMCI_Barrier();

   ARMCI_Free((void *) buffer[rank]);
   free(buffer);

   ARMCI_Finalize();

   MPI_Finalize();

   return 0;
}
Ejemplo n.º 24
0
int main( int argc, char ** argv) {

  int me, NTasks;
#ifndef FORCE_NOMPI
  int required, provided;
#endif
  INTS id, m;
  /* CSC Data */
  INTS n, dof;
  INTL nnzeros, edgenbr;
  COEF val;
  /* Local Data */
  INTS localnodenbr;
  INTS * nodelist;
  INTS root;
  INTS base;
  COEF * lrhs;
  COEF * globrhs;
  COEF * globrhs_recv;
  COEF * globx;
  COEF * globx2;
  COEF * globprod;
  /* Other data */
  COEF * matElem;
  double prec, xmin, xmax, sum1, sum2;
  INTS i, j, k;
  INTS solver;
  INTS zero=0;
  INTS one=1;
  INTS nb_threads;

  root = -1;
  base = 1;

#ifndef FORCE_NOMPI
  required=MPI_THREAD_MULTIPLE;
  MPI_Init_thread(&argc, &argv, required, &provided);

  MPI_Comm_size(MPI_COMM_WORLD, &NTasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  n = dof = 0;
#else
  NTasks = 1;
  me = 0;
#endif
  if (argc >= 2) {
    n = atoi(argv[1]);
    dof = atoi(argv[2]);
  } else {
    if (me == 0) {
      fprintf(stderr, "Usage: %s <size> <DofNumber>\n", argv[0]);
      return 1;
    }
  }

  xmin = 0.0;
  xmax = 1.0;

  /* Starting MURGE*/
  CALL_MURGE(MURGE_Initialize(2));
  id = 0;

  /* Set Options */
  prec = 1e-7;
  /*
   Call MURGE_Get_Solver(solver)
   */
  solver = MURGE_SOLVER_PASTIX;




  if ( solver == MURGE_SOLVER_PASTIX ) {
    CALL_MURGE(MURGE_SetDefaultOptions(id, zero));
    CALL_MURGE(MURGE_SetOptionINT(id, IPARM_VERBOSE, API_VERBOSE_NO));
    CALL_MURGE(MURGE_SetOptionINT(id, IPARM_MATRIX_VERIFICATION, API_YES));
    /* CSCd Required for product in verification */
    CALL_MURGE(MURGE_SetOptionINT(id, IPARM_FREE_CSCUSER, API_CSC_PRESERVE));

    nb_threads = 1;
#ifdef _OPENMP
#pragma omp parallel shared(nb_threads)
    {
      nb_threads = omp_get_num_threads();
    }
#endif /* _OPENMP */

    if (me == 0) {
      fprintf(stdout, "Running on %ld threads and %d MPI Tasks\n",
              (long)nb_threads, NTasks);
    }
    CALL_MURGE(MURGE_SetOptionINT(id, IPARM_THREAD_NBR, nb_threads));
  } else if (solver == MURGE_SOLVER_HIPS) {
#ifdef HIPS
    if ( method == 1 ) {
      CALL_MURGE(MURGE_SetDefaultOptions(id, HIPS_ITERATIVE));
    } else {
      CALL_MURGE(MURGE_SetDefaultOptions(id, HIPS_HYBRID));
      CALL_MURGE(MURGE_SetOptionINT(id, HIPS_PARTITION_TYPE, zero));
      CALL_MURGE(MURGE_SetOptionINT(id, HIPS_DOMSIZE, domsize));
    }
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_SYMMETRIC, zero));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_LOCALLY, zero));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_ITMAX, itmax));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_KRYLOV_RESTART, restart));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_VERBOSE, verbose));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_DOMNBR, NTasks));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_CHECK_GRAPH, one));
    CALL_MURGE(MURGE_SetOptionINT(id, HIPS_CHECK_MATRIX, one));
#endif
  }
  CALL_MURGE(MURGE_SetOptionINT(id, MURGE_IPARAM_DOF, dof));
  CALL_MURGE(MURGE_SetOptionINT(id, MURGE_IPARAM_SYM, MURGE_BOOLEAN_FALSE));
  CALL_MURGE(MURGE_SetOptionINT(id, MURGE_IPARAM_BASEVAL, base));

  CALL_MURGE(MURGE_SetOptionREAL(id, MURGE_RPARAM_EPSILON_ERROR, prec));
  /* Set the graph : all processor enter some edge of the
   graph that corresponds to non-zeros location in the matrix */

  /****************************************
   ** Enter the matrix non-zero pattern  **
   ** you can use any distribution       **
   ****************************************/

  /* this processor enters the A(myfirstrow:mylastrow, *)
   part of the matrix non-zero pattern */
  if (me == 0) {
    edgenbr = 3*n-4;
    CALL_MURGE(MURGE_GraphBegin(id, n, edgenbr));

    /* Dirichlet boundary condition */
    CALL_MURGE(MURGE_GraphEdge(id, one, one));
    CALL_MURGE(MURGE_GraphEdge(id, n, n));

    /* Interior */
    for (i = 2; i < n; i++) {
      for (j = -1; j <= 1; j++) {
        CALL_MURGE(MURGE_GraphEdge(id, i, i+j));
        /* if (j != 0) {
         MURGE_GraphEdge(id, j+i, i);
         } */
      }
    }
  } else {
    edgenbr = 0;
    CALL_MURGE(MURGE_GraphBegin(id, n, edgenbr));
  }
  CALL_MURGE(MURGE_GraphEnd(id));


  /*  Get Local nodes */
  CALL_MURGE(MURGE_GetLocalNodeNbr(id, &localnodenbr));
  nodelist = (INTS*)malloc(localnodenbr*sizeof(INTS));

  CALL_MURGE(MURGE_GetLocalNodeList(id, nodelist));

  /* compute the number of non-zeros; */
  nnzeros = 0;
  for (m = 0; m < localnodenbr; m++) {
    i = nodelist[m];
    if (i == 1 || i == n) {
      /*  Boundaries */
      nnzeros = nnzeros + 1;
    } else {
      /*  Interior */
      for (k = -1; k <= 1; k++) {
        nnzeros = nnzeros + 1;
      }
    }
  }
  /*  We are using dof so a non zero is in fact a block of size dof**2 */
  nnzeros = nnzeros * dof*dof;

  /* You can enter only coefficient (i,j) that are in A(nodelist, nodelist)
   on this processor */

  /* We enter the lower and upper triangular part of the matrix so sym = 0 */

  /* matElem is the identity matrix of size 'dof' stored by line */

  CALL_MURGE(MURGE_AssemblyBegin(id, n, nnzeros,
                                 MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_OVW,
                                 MURGE_ASSEMBLY_FOOL, MURGE_BOOLEAN_FALSE));

#ifdef _OPENMP
#pragma omp parallel default(none) private(m, i, k, matElem)      \
  shared(dof, localnodenbr, n, xmin, xmax, nodelist, id, stderr)
#endif /* _OPENMP */
  {
    matElem = (COEF*)malloc(dof*dof*sizeof(COEF));
#ifdef _OPENMP
#pragma omp for
#endif /* _OPENMP */
    for (m = 0; m < localnodenbr; m++) {
      i = nodelist[m];
      if ( i == 1 || i == n ) {
        /*  Boundaries */
        GetCoef(matElem, i,i,xmin,xmax,n, dof);
        CALL_MURGE(MURGE_AssemblySetNodeValues(id, i, i, matElem));
      } else {
        for (k = -1; k <= 1; k++) {
          GetCoef(matElem,i+k,i,xmin,xmax,n, dof);
          CALL_MURGE(MURGE_AssemblySetNodeValues(id, i, i+k, matElem));
        }
      }
    }
    free(matElem);
  }

  CALL_MURGE(MURGE_AssemblyEnd(id));


  /* We matElem the rhs */
  lrhs = (COEF*)malloc(localnodenbr*dof*sizeof(COEF));
  globrhs = (COEF*)malloc(n*dof*sizeof(COEF));
  for (k = 0; k < n*dof; k++)
    globrhs[k] = 0.0;

  for (m = 0; m < localnodenbr; m++) {
    GetRhs(&val,nodelist[m],xmin,xmax,n);
    for (k = 0; k < dof; k++)
      globrhs[(nodelist[m]-1)*dof+k] = val;
    for (k = 0; k < dof; k++)
      lrhs[m*dof+k] = val;
  }
  globrhs_recv = (COEF*)malloc(n*dof*sizeof(COEF));
#ifndef FORCE_NOMPI
  MPI_Allreduce(globrhs, globrhs_recv, n*dof, MURGE_MPI_COEF,
                MPI_SUM, MPI_COMM_WORLD);
#else
  memcpy(globrhs_recv, globrhs, n*dof*sizeof(COEF));
#endif
  free(globrhs);
  CALL_MURGE(MURGE_SetLocalRHS(id, lrhs, MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_OVW));

  /* Get the global solution without refinement */
  globx = (COEF*)malloc(n*dof*sizeof(COEF));
  fprintf(stdout, "> Getting solution without refinement <\n");
  CALL_MURGE(MURGE_SetOptionINT(id, IPARM_MURGE_MAY_REFINE, API_YES));
  CALL_MURGE(MURGE_SetOptionINT(id, IPARM_MURGE_REFINEMENT, API_NO));
  CALL_MURGE(MURGE_GetGlobalSolution(id, globx, root));

  /* Get the global solution with refinement and using MURGE_GetSolution()
   * Test chaining two get solution.
   */
  fprintf(stdout, "> Getting solution with refinement <\n");
  globx2 = (COEF*)malloc(n*dof*sizeof(COEF));
  CALL_MURGE(MURGE_SetOptionINT(id, IPARM_MURGE_REFINEMENT, API_YES));
  CALL_MURGE(MURGE_GetSolution(id, n, nodelist, globx2, MURGE_ASSEMBLY_RESPECT));

  /* note that we cannot check solution in-between getsolution calls because
   * setting X would overwrite RHS...
   * Maybe this will be handled in next generation of murge interface... */
  fprintf(stdout, "> Check solution without refinement <\n");
  CALL_MURGE(MURGE_SetGlobalRHS(id, globx, -one, MURGE_ASSEMBLY_OVW));
  globprod = (COEF*)malloc(n*dof*sizeof(COEF));
  CALL_MURGE(MURGE_GetGlobalProduct(id, globprod, -one));
  sum1 = 0;
  sum2 = 0;
  for (k = 0; k < n*dof; k++) {
    sum1 = sum1 + globprod[k]*globprod[k];
    sum2 = sum2 + (globprod[k] - globrhs_recv[k])*(globprod[k]-globrhs_recv[k]);
  }
  fprintf(stdout, "||AX - B||/||AX||  : %.15g\n", sqrt(sum2/sum1));

  fprintf(stdout, "> Check solution with refinement <\n");
  CALL_MURGE(MURGE_SetGlobalRHS(id, globx2, -one, MURGE_ASSEMBLY_OVW));
  CALL_MURGE(MURGE_GetGlobalProduct(id, globprod, -one));
  sum1 = 0;
  sum2 = 0;
  for (k = 0; k < n*dof; k++) {
    sum1 = sum1 + globprod[k]*globprod[k];
    sum2 = sum2 + (globprod[k] - globrhs_recv[k])*(globprod[k]-globrhs_recv[k]);
  }
  fprintf(stdout, "||AX - B||/||AX||  : %.15g\n", sqrt(sum2/sum1));

  /* Store in a file */
  if (me == 0)
    store(globx,xmin,xmax,n,dof);

  {
    int iter;
    INTS  n_coefs = n/NTasks;
    INTS *coef_idx;
    COEF *coef_vals;

    if (me < n%NTasks)
      n_coefs++;

    fprintf(stdout, "Now using MURGE_SetRHS and MURGE_ASSEMBLY_FOOL\n");
    coef_idx  = malloc(n_coefs*sizeof(INTS));
    coef_vals = malloc(n_coefs*dof*sizeof(COEF));
    /* cyclic distribution of RHS */
    for (iter = 0; iter < n_coefs; iter++)
      {
        coef_idx[iter]  = me + iter*NTasks + 1; /* baseval == 1 */
        for (k = 0; k < dof; k++)
          coef_vals[iter*dof+k] = globrhs_recv[(me + iter*NTasks)*dof + k];

      }
    CALL_MURGE(MURGE_SetRHS(id, n_coefs, coef_idx, coef_vals, MURGE_ASSEMBLY_OVW,
                            MURGE_ASSEMBLY_OVW, MURGE_ASSEMBLY_FOOL));
    free(coef_vals);
    free(coef_idx);
    CALL_MURGE(MURGE_GetGlobalSolution(id, globx, root));

    CALL_MURGE(MURGE_SetGlobalRHS(id, globx, -one, MURGE_ASSEMBLY_OVW));
    CALL_MURGE(MURGE_GetGlobalProduct(id, globprod, -one));
    sum1 = 0;
    sum2 = 0;
    for (k = 0; k < n*dof; k++) {
      sum1 = sum1 + globprod[k]*globprod[k];
      sum2 = sum2 + (globprod[k] - globrhs_recv[k])*(globprod[k]-globrhs_recv[k]);
    }
    fprintf(stdout, "||AX - B||/||AX||  : %.15g\n", sqrt(sum2/sum1));
  }

  /* I'm Free  */
  CALL_MURGE(MURGE_Clean(id));
  CALL_MURGE(MURGE_Finalize());
#ifndef FORCE_NOMPI
  MPI_Finalize();
#endif
  free(nodelist);
  free(lrhs);
  free(globx);
  free(globx2);
  free(globprod);
  free(globrhs_recv);
  return 0;
}
Ejemplo n.º 25
0
LibMeshInit::LibMeshInit (int argc, const char * const * argv,
                          MPI_Comm COMM_WORLD_IN)
#endif
{
  // should _not_ be initialized already.
  libmesh_assert (!libMesh::initialized());

  // Build a command-line parser.
  command_line.reset (new GetPot (argc, argv));

  // Disable performance logging upon request
  {
    if (libMesh::on_command_line ("--disable-perflog"))
      libMesh::perflog.disable_logging();
  }

  // Build a task scheduler
  {
    // Get the requested number of threads, defaults to 1 to avoid MPI and
    // multithreading competition.  If you would like to use MPI and multithreading
    // at the same time then (n_mpi_processes_per_node)x(n_threads) should be the
    //  number of processing cores per node.
    std::vector<std::string> n_threads(2);
    n_threads[0] = "--n_threads";
    n_threads[1] = "--n-threads";
    libMesh::libMeshPrivateData::_n_threads =
      libMesh::command_line_value (n_threads, 1);

    // If there's no threading model active, force _n_threads==1
#if !LIBMESH_USING_THREADS
    if (libMesh::libMeshPrivateData::_n_threads != 1)
      {
        libMesh::libMeshPrivateData::_n_threads = 1;
        libmesh_warning("Warning: You requested --n-threads>1 but no threading model is active!\n"
                        << "Forcing --n-threads==1 instead!");
      }
#endif

    // Set the number of OpenMP threads to the same as the number of threads libMesh is going to use
#ifdef LIBMESH_HAVE_OPENMP
    omp_set_num_threads(libMesh::libMeshPrivateData::_n_threads);
#endif

    task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
  }

  // Construct singletons who may be at risk of the
  // "static initialization order fiasco"
  Singleton::setup();

  // Make sure the construction worked
  libmesh_assert(remote_elem);

#if defined(LIBMESH_HAVE_MPI)

  // Allow the user to bypass MPI initialization
  if (!libMesh::on_command_line ("--disable-mpi"))
    {
      // Check whether the calling program has already initialized
      // MPI, and avoid duplicate Init/Finalize
      int flag;
      libmesh_call_mpi(MPI_Initialized (&flag));

      if (!flag)
        {
          int mpi_thread_provided;
          const int mpi_thread_requested = libMesh::n_threads() > 1 ?
            MPI_THREAD_FUNNELED :
            MPI_THREAD_SINGLE;

          libmesh_call_mpi
            (MPI_Init_thread (&argc, const_cast<char ***>(&argv),
                              mpi_thread_requested, &mpi_thread_provided));

          if ((libMesh::n_threads() > 1) &&
              (mpi_thread_provided < MPI_THREAD_FUNNELED))
            {
              libmesh_warning("Warning: MPI failed to guarantee MPI_THREAD_FUNNELED\n"
                              << "for a threaded run.\n"
                              << "Be sure your library is funneled-thread-safe..."
                              << std::endl);

              // Ideally, if an MPI stack tells us it's unsafe for us
              // to use threads, we shouldn't use threads.
              // In practice, we've encountered one MPI stack (an
              // mvapich2 configuration) that returned
              // MPI_THREAD_SINGLE as a proper warning, two stacks
              // that handle MPI_THREAD_FUNNELED properly, and two
              // current stacks plus a couple old stacks that return
              // MPI_THREAD_SINGLE but support libMesh threaded runs
              // anyway.

              // libMesh::libMeshPrivateData::_n_threads = 1;
              // task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
            }
          libmesh_initialized_mpi = true;
        }

      // Duplicate the input communicator for internal use
      // And get a Parallel::Communicator copy too, to use
      // as a default for that API
      this->_comm = COMM_WORLD_IN;

      libMesh::GLOBAL_COMM_WORLD = COMM_WORLD_IN;

      //MPI_Comm_set_name not supported in at least SGI MPT's MPI implementation
      //MPI_Comm_set_name (libMesh::COMM_WORLD, "libMesh::COMM_WORLD");

      libMeshPrivateData::_processor_id =
        cast_int<processor_id_type>(this->comm().rank());
      libMeshPrivateData::_n_processors =
        cast_int<processor_id_type>(this->comm().size());

      // Set up an MPI error handler if requested.  This helps us get
      // into a debugger with a proper stack when an MPI error occurs.
      if (libMesh::on_command_line ("--handle-mpi-errors"))
        {
          libmesh_call_mpi
            (MPI_Comm_create_errhandler(libMesh_MPI_Handler, &libmesh_errhandler));
          libmesh_call_mpi
            (MPI_Comm_set_errhandler(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler));
          libmesh_call_mpi
            (MPI_Comm_set_errhandler(MPI_COMM_WORLD, libmesh_errhandler));
        }
    }

  // Could we have gotten bad values from the above calls?
  libmesh_assert_greater (libMeshPrivateData::_n_processors, 0);

  // The cast_int already tested _processor_id>=0
  // libmesh_assert_greater_equal (libMeshPrivateData::_processor_id, 0);

  // Let's be sure we properly initialize on every processor at once:
  libmesh_parallel_only(this->comm());

#endif

#if defined(LIBMESH_HAVE_PETSC)

  // Allow the user to bypass PETSc initialization
  if (!libMesh::on_command_line ("--disable-petsc")

#if defined(LIBMESH_HAVE_MPI)
      // If the user bypassed MPI, we'd better be safe and assume that
      // PETSc was built to require it; otherwise PETSc initialization
      // dies.
      && !libMesh::on_command_line ("--disable-mpi")
#endif
      )
    {
      int ierr=0;

      PETSC_COMM_WORLD = libMesh::GLOBAL_COMM_WORLD;

      // Check whether the calling program has already initialized
      // PETSc, and avoid duplicate Initialize/Finalize
      PetscBool petsc_already_initialized;
      ierr = PetscInitialized(&petsc_already_initialized);
      CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
      if (petsc_already_initialized != PETSC_TRUE)
        libmesh_initialized_petsc = true;
# if defined(LIBMESH_HAVE_SLEPC)

      // If SLEPc allows us to check whether the calling program
      // has already initialized it, we do that, and avoid
      // duplicate Initialize/Finalize.
      // We assume that SLEPc will handle PETSc appropriately,
      // which it does in the versions we've checked.
      if (!SlepcInitializeCalled)
        {
          ierr = SlepcInitialize  (&argc, const_cast<char ***>(&argv), nullptr, nullptr);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
          libmesh_initialized_slepc = true;
        }
# else
      if (libmesh_initialized_petsc)
        {
          ierr = PetscInitialize (&argc, const_cast<char ***>(&argv), nullptr, nullptr);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
        }
# endif
#if !PETSC_RELEASE_LESS_THAN(3,3,0)
      // Register the reference implementation of DMlibMesh
#if PETSC_RELEASE_LESS_THAN(3,4,0)
      ierr = DMRegister(DMLIBMESH, PETSC_NULL, "DMCreate_libMesh", DMCreate_libMesh); CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
#else
      ierr = DMRegister(DMLIBMESH, DMCreate_libMesh); CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
#endif

#endif
    }
#endif

#if defined(LIBMESH_HAVE_MPI) && defined(LIBMESH_HAVE_VTK)
  // Do MPI initialization for VTK.
  _vtk_mpi_controller = vtkMPIController::New();
  _vtk_mpi_controller->Initialize(&argc, const_cast<char ***>(&argv), /*initialized_externally=*/1);
  _vtk_mpi_controller->SetGlobalController(_vtk_mpi_controller);
#endif

  // Re-parse the command-line arguments.  Note that PETSc and MPI
  // initialization above may have removed command line arguments
  // that are not relevant to this application in the above calls.
  // We don't want a false-positive by detecting those arguments.
  //
  // Note: this seems overly paranoid/like it should be unnecessary,
  // plus we were doing it wrong for many years and not clearing the
  // existing GetPot object before re-parsing the command line, so all
  // the command line arguments appeared twice in the GetPot object...
  command_line.reset (new GetPot (argc, argv));

  // The following line is an optimization when simultaneous
  // C and C++ style access to output streams is not required.
  // The amount of benefit which occurs is probably implementation
  // defined, and may be nothing.  On the other hand, I have seen
  // some IO tests where IO performance improves by a factor of two.
  if (!libMesh::on_command_line ("--sync-with-stdio"))
    std::ios::sync_with_stdio(false);

  // Honor the --separate-libmeshout command-line option.
  // When this is specified, the library uses an independent ostream
  // for libMesh::out/libMesh::err messages, and
  // std::cout and std::cerr are untouched by any other options
  if (libMesh::on_command_line ("--separate-libmeshout"))
    {
      // Redirect.  We'll share streambufs with cout/cerr for now, but
      // presumably anyone using this option will want to replace the
      // bufs later.
      std::ostream * newout = new std::ostream(std::cout.rdbuf());
      libMesh::out = *newout;
      std::ostream * newerr = new std::ostream(std::cerr.rdbuf());
      libMesh::err = *newerr;
    }

  // Process command line arguments for redirecting stdout/stderr.
  bool
    cmdline_has_redirect_stdout = libMesh::on_command_line ("--redirect-stdout"),
    cmdline_has_redirect_output = libMesh::on_command_line ("--redirect-output");

  // The --redirect-stdout command-line option has been deprecated in
  // favor of "--redirect-output basename".
  if (cmdline_has_redirect_stdout)
    libmesh_warning("The --redirect-stdout command line option has been deprecated. "
                    "Use '--redirect-output basename' instead.");

  // Honor the "--redirect-stdout" and "--redirect-output basename"
  // command-line options.  When one of these is specified, each
  // processor sends libMesh::out/libMesh::err messages to
  // stdout.processor.#### (default) or basename.processor.####.
  if (cmdline_has_redirect_stdout || cmdline_has_redirect_output)
    {
      std::string basename = "stdout";

      // Look for following argument if using new API
      if (cmdline_has_redirect_output)
        {
          // Set the cursor to the correct location in the list of command line arguments.
          command_line->search(1, "--redirect-output");

          // Get the next option on the command line as a string.
          std::string next_string = "";
          next_string = command_line->next(next_string);

          // If the next string starts with a dash, we assume it's
          // another flag and not a file basename requested by the
          // user.
          if (next_string.size() > 0 && next_string.find_first_of("-") != 0)
            basename = next_string;
        }

      std::ostringstream filename;
      filename << basename << ".processor." << libMesh::global_processor_id();
      _ofstream.reset (new std::ofstream (filename.str().c_str()));

      // Redirect, saving the original streambufs!
      out_buf = libMesh::out.rdbuf (_ofstream->rdbuf());
      err_buf = libMesh::err.rdbuf (_ofstream->rdbuf());
    }

  // redirect libMesh::out to nothing on all
  // other processors unless explicitly told
  // not to via the --keep-cout command-line argument.
  if (libMesh::global_processor_id() != 0)
    if (!libMesh::on_command_line ("--keep-cout"))
      libMesh::out.rdbuf (nullptr);

  // Similarly, the user can request to drop cerr on all non-0 ranks.
  // By default, errors are printed on all ranks, but this can lead to
  // interleaved/unpredictable outputs when doing parallel regression
  // testing, which this option is designed to support.
  if (libMesh::global_processor_id() != 0)
    if (libMesh::on_command_line ("--drop-cerr"))
      libMesh::err.rdbuf (nullptr);

  // Check command line to override printing
  // of reference count information.
  if (libMesh::on_command_line("--disable-refcount-printing"))
    ReferenceCounter::disable_print_counter_info();

#ifdef LIBMESH_ENABLE_EXCEPTIONS
  // Set our terminate handler to write stack traces in the event of a
  // crash
  old_terminate_handler = std::set_terminate(libmesh_terminate_handler);
#endif


  if (libMesh::on_command_line("--enable-fpe"))
    libMesh::enableFPE(true);

  if (libMesh::on_command_line("--enable-segv"))
    libMesh::enableSEGV(true);

  // The library is now ready for use
  libMeshPrivateData::_is_initialized = true;


  // Make sure these work.  Library methods
  // depend on these being implemented properly,
  // so this is a good time to test them!
  libmesh_assert (libMesh::initialized());
  libmesh_assert (!libMesh::closed());
}