Esempio n. 1
0
int
main (int argc, char** argv)
{
  QMP_status_t status;
  int this_node;
  QMP_thread_level_t req, prv;

  /* Start QMP */
  req = QMP_THREAD_SINGLE;
  status = QMP_init_msg_passing (&argc, &argv, req, &prv);

  if (status != QMP_SUCCESS) {
    QMP_error ("QMP_init failed: %s\n", QMP_error_string(status));
    QMP_abort(1);
  }

  /* Get my logical node number */
  this_node = QMP_get_node_number();

  /* Print the result */
  printf("%04d",this_node);

  /* Quit */
  QMP_finalize_msg_passing ();

  return 0;
}
Esempio n. 2
0
void initCommsQuda(int argc, char **argv, const int *X, const int nDim) {

  if (nDim != 4) errorQuda("Comms dimensions %d != 4", nDim);

#ifdef MULTI_GPU

#ifdef QMP_COMMS
  QMP_thread_level_t tl;
  QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);

  QMP_declare_logical_topology(X, nDim);
#elif defined(MPI_COMMS)
  MPI_Init (&argc, &argv);  

  int volume = 1;
  for (int d=0; d<nDim; d++) volume *= X[d];
  int size = -1;
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  if (volume != size)
    errorQuda("Number of processes %d must match requested MPI volume %d",
	      size, volume);

  comm_set_gridsize(X[0], X[1], X[2], X[3]);  
  comm_init();
#endif

#endif
}
Esempio n. 3
0
int
main(int argc, char *argv[])
{
    QMP_thread_level_t qmp_unused_1;
    int                i;

    QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &qmp_unused_1);
    
    if (argc != 14) {
	printf("usage: %s Lx Ly Lz Lt Ls Nx Ny Nz Nt nx ny nz nt\n", argv[0]);
	return 1;
    }

    for (i = 0; i < 5; i++)
	lattice_size[i] = atoi(argv[1 + i]);

    for (i = 0; i < 4; i++)
	network[i] = atoi(argv[6 + i]);

    for (i = 0; i < 4; i++)
	node[i] = atoi(argv[10 + i]);

    printf("lattice %d %d %d %d %d\n",
	   lattice_size[0],
	   lattice_size[1],
	   lattice_size[2],
	   lattice_size[3],
	   lattice_size[4]);
    printf("network %d %d %d %d\n",
	   network[0],
	   network[1],
	   network[2],
	   network[3]);
    printf("node %d %d %d %d\n",
	   node[0],
	   node[1],
	   node[2],
	   node[3]);
    
    L3(DWF_init)(lattice_size, NULL, NULL);

    dump_table("even_odd", &even_odd);
    dump_table("odd_even", &odd_even);
    
    return 0;
}
Esempio n. 4
0
int main (int argc, char** argv)
{
  int verbose;
  QMP_status_t status;
  QMP_thread_level_t req, prv;

  verbose = 0;
  if (argc > 1 && strcmp (argv[1], "-v") == 0)
    verbose = 1;
  
  QMP_verbose (verbose);
  req = QMP_THREAD_SINGLE;
  status = QMP_init_msg_passing (&argc, &argv, req, &prv);

  if (status != QMP_SUCCESS) {
    QMP_fprintf(stderr, "QMP_init failed\n");
    return -1;
  }

  {
    char p[288];
    int i;

    for(i=0; i < 288;++i)
      p[i] = 0;

    if (QMP_is_primary_node())
      for(i=0; i < 288;++i)
	p[i] = 65;

#if 1
    for(i=0; i < 10; ++i)
      QMP_broadcast(p, 288);
#else
    for(i=0; i < 10; ++i)
      stupid_broadcast(p, 288);
#endif

    QMP_info("result = %c",p[0]);
  }


  QMP_finalize_msg_passing ();

  return 0;
}
Esempio n. 5
0
int main(int argc, char **argv) {

#ifdef QMP_COMMS
  int ndim=4, dims[4];
  QMP_thread_level_t tl;
  QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
  dims[0] = dims[1] = dims[2] = 1;
  dims[3] = QMP_get_number_of_nodes();
  QMP_declare_logical_topology(dims, ndim);
#endif  

  SU3Test();

#ifdef QMP_COMMS
  QMP_finalize_msg_passing();
#endif

  return 0;
}
Esempio n. 6
0
int qio_test(int output_volfmt, int output_serpar, int ildgstyle, 
	     int input_volfmt, int input_serpar, int argc, char *argv[]){

  float array_in[NARRAY], array_out[NARRAY];
  float *field_in[NREAL], *subset_in[NREAL], 
    *field_out[NREAL], *subset_out[NREAL];
  suN_matrix *field_su3_out[NMATRIX], *field_su3_in[NMATRIX];
  QIO_Writer *outfile;
  QIO_Reader *infile;
  float diff_field = 0, diff_array = 0, diff_su3 = 0, diff_subset = 0;
  QMP_thread_level_t provided;
  int status;
  int sites_on_node = 0;
  int i,volume;
  char filename[] = "binary_test";
  int dim = 4;
  int lower[4] = {1, 0, 0, 2};
  int upper[4] = {2, 3, 3, 2};
  char myname[] = "qio_test";
  
  /* Start message passing */
  QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &provided);

  this_node = mynode();
  printf("%s(%d) QMP_init_msg_passing done\n",myname,this_node);

  /* Lattice dimensions */
  lattice_dim = 4;
  lattice_size[0] = 8;
  lattice_size[1] = 4;
  lattice_size[2] = 4;
  lattice_size[3] = 4;

  volume = 1;
  for(i = 0; i < lattice_dim; i++){
    volume *= lattice_size[i];
  }

  /* Set the mapping of coordinates to nodes */
  if(setup_layout(lattice_size, 4, QMP_get_number_of_nodes())!=0)
    return 1;
  printf("%s(%d) layout set for %d nodes\n",myname,this_node,
	 QMP_get_number_of_nodes());
  sites_on_node = num_sites(this_node);

  /* Build the layout structure */
  layout.node_number     = node_number;
  layout.node_index      = node_index;
  layout.get_coords      = get_coords;
  layout.num_sites       = num_sites;
  layout.latsize         = lattice_size;
  layout.latdim          = lattice_dim;
  layout.volume          = volume;
  layout.sites_on_node   = sites_on_node;
  layout.this_node       = this_node;
  layout.number_of_nodes = QMP_get_number_of_nodes();

  /* Open the test output file */
  outfile = open_test_output(filename, output_volfmt, output_serpar, 
			     ildgstyle, myname);
  if(outfile == NULL)return 1;

  /* If this is not the ILDG file test */
  if(ildgstyle == QIO_ILDGNO){
    /* Create the test output field */
    status = vcreate_R(field_out, NREAL);
    if(status)return status;
    
    /* Set some values for the field */
    vset_R(field_out, NREAL);
    
    /* Write the real test field */
    status = write_real_field(outfile, NREAL, field_out, myname);
    if(status)return status;
    
    /* Write a subset of the real test field */
    status = write_real_field_subset(outfile, NREAL, field_out, 
				     lower, upper, dim, myname);
    if(status)return status;
    
    /* Set some values for the global array */
    for(i = 0; i < NARRAY; i++)
      array_out[i] = i;
    
    /* Write the real global array */
    status = write_real_global(outfile, NARRAY, array_out, myname);
    if(status)return status;
  }

  /* Create the test output su3 field */
  status = vcreate_M(field_su3_out, NMATRIX);
  if(status)return status;

  /* Set some values for the su3 field */
  vset_M(field_su3_out, NMATRIX);

  /* Write the su3 test field */
  status = write_su3_field(outfile, NMATRIX, field_su3_out, myname);
  if(status)return status;

  /* Close the file */
  QIO_close_write(outfile);
  printf("%s(%d): Closed file for writing\n",myname,this_node);

  /* Set up a dummy input field */
  status = vcreate_R(field_in, NREAL);
  if(status)return status;
    
  /* Set up a dummy input field for subset */
  status = vcreate_R(subset_in, NREAL);
  if(status)return status;
    
  /* Set up a dummy input SU(N) field */
  status = vcreate_M(field_su3_in, NMATRIX);
  if(status)return status;

  /* Open the test file for reading */
  infile = open_test_input(filename, input_volfmt, input_serpar, myname);
  if(infile == NULL)return 1;

  if(ildgstyle == QIO_ILDGNO){
    /* Peek at the field record */
    status = peek_record_info(infile, myname);
    if(status != QIO_SUCCESS)return status;
    /* Skip the record */

#if(0)
    
    /* Skip the field */
    status = QIO_next_record(infile);
    if(status != QIO_SUCCESS)return status;
    
#else
    
    /* Read the field record */
    printf("%s(%d) reading real field\n",myname,this_node); fflush(stdout);
    status = read_real_field(infile, NREAL, field_in, myname);
    if(status)return status;
    
#endif

    /* Read the subset of the field */
    printf("%s(%d) reading subset of real field\n",
	   myname,this_node); fflush(stdout);
    status = read_real_field_subset(infile, NREAL, subset_in, myname);
    if(status)return status;
    
    /* Read the global array record */
    printf("%s(%d) reading global field\n",myname,this_node); fflush(stdout);
    status = read_real_global(infile, NARRAY, array_in, myname);
    if(status)return status;

  }    

  /* Read the su3 field record */
  printf("%s(%d) reading su3 field\n",myname,this_node); fflush(stdout);
  status = read_su3_field(infile, NMATRIX, field_su3_in, myname);
  if(status)return status;

  /* Close the file */
  QIO_close_read(infile);
  printf("%s(%d): Closed file for reading\n",myname,this_node);

  if(ildgstyle == QIO_ILDGNO){

    /* Compare the input and output fields */
    diff_field = vcompare_R(field_out, field_in, NREAL);
    if(this_node == 0){
      printf("%s(%d): Comparison of in and out real fields |in - out|^2 = %e\n",
	     myname,this_node,diff_field);
    }
    
    /* Create the subset output field */
    status = vcreate_R(subset_out, NREAL);
    if(status)return status;

    /* Copy the subset */
    vsubset_R(subset_out, field_out, lower, upper, NREAL);
    
    /* Compare the input and output subsets */
    diff_subset = vcompare_R(subset_out, subset_in, NREAL);
    if(this_node == 0){
      printf("%s(%d): Comparison of subsets of in and out real fields |in - out|^2 = %e\n",
	     myname,this_node,diff_subset);
    }
    
    /* Compare the input and output global arrays */
    diff_array = vcompare_r(array_out, array_in, NREAL);
    if(this_node == 0){
      printf("%s(%d): Comparison of in and out real global arrays |in - out|^2 = %e\n",
	     myname, this_node, diff_array);
    }
  }

  /* Compare the input and output suN fields */
  diff_su3 = vcompare_M(field_su3_out, field_su3_in, NMATRIX);
  if(this_node == 0){
    printf("%s(%d): Comparison of in and out suN fields |in - out|^2 = %e\n",
	   myname, this_node, diff_field);
  }

  /* Clean up */
  if(ildgstyle == QIO_ILDGNO){
    vdestroy_R(field_out, NREAL);
    vdestroy_R(field_in, NREAL);
    vdestroy_R(subset_in, NREAL);
    vdestroy_R(subset_out, NREAL);
  }
  vdestroy_M(field_su3_in, NMATRIX);
  vdestroy_M(field_su3_out, NMATRIX);

  /* Shut down QMP */
  QMP_finalize_msg_passing();

  /* Report result */
  if(diff_field + diff_subset + diff_su3 + diff_array > 0){
    printf("%s(%d): Test failed\n",myname,this_node);
    return 1;
  }
  printf("%s(%d): Test passed\n",myname,this_node);

  return 0;
}
Esempio n. 7
0
int
main(int argc, char *argv[])
{
  struct QOP_MDWF_State *mdwf_state = NULL;
  QMP_thread_level_t qt = QMP_THREAD_SINGLE;
  int status = 1;
  int vx[4];
  int i;

  if (QMP_init_msg_passing(&argc, &argv, qt, &qt) != QMP_SUCCESS) {
    fprintf(stderr, "QMP_init() failed\n");
    return 1;
  }

  for (i = 0; i < NELEM(b5); i++) {
    b5[i] = 0.1 * i * (NELEM(b5) - i);
    c5[i] = 0.1 * i * i * (NELEM(b5) - i);
  }

  self = QMP_get_node_number();
  primary = QMP_is_primary_node();
  for (i = 0; i < argc; i++)
    zprint("arg[%d]=%s", i, argv[i]);
  if (argc != 14) {
    zprint("14 arguments expected, found %d", argc);
    QMP_finalize_msg_passing();
    return 1;
  }

  for (i = 0; i < 4; i++) {
    mynetwork[i] = atoi(argv[i+1]);
    mylocal[i] = atoi(argv[i+5]);
    vx[i] = atoi(argv[i+10]);
    mylattice[i] = mylocal[i] * mynetwork[i];
  }
  mylocal[4] = mylattice[4] = atoi(argv[9]);

  zshowv4("network", mynetwork);
  zshowv5("local lattice", mylocal);
  zshowv5("lattice", mylattice);

  getv(mynode, 0, 4, vx);

  xshowv("node", mynode);

  if (QOP_MDWF_init(&mdwf_state,
		    mylattice, mynetwork, mynode, primary,
		    getsub, NULL)) {
    zprint("MDWF_init() failed");
    goto end;
  }

  zprint("MDWF_init() done");

  dump_state(mdwf_state);

  QOP_MDWF_fini(&mdwf_state);

  status = 0;
 end:
  QMP_finalize_msg_passing();
  return status;
}
Esempio n. 8
0
int
main(int argc, char *argv[])
{
  struct QOP_MDWF_State *mdwf_state = NULL;
  struct QOP_MDWF_Parameters *mdwf_params = NULL;
  QMP_thread_level_t qt = QMP_THREAD_SINGLE;
  int status = 1;
  int i;

  if (QMP_init_msg_passing(&argc, &argv, qt, &qt) != QMP_SUCCESS) {
    fprintf(stderr, "QMP_init() failed\n");
    return 1;
  }

  for (i = 0; i < NELEM(b5); i++) {
    b5[i] = 0.1 * i * (NELEM(b5) - i);
    c5[i] = 0.1 * i * i * (NELEM(b5) - i);
  }

  self = QMP_get_node_number();
  primary = QMP_is_primary_node();
  if (argc != 7) {
    zprint("7 arguments expected, found %d", argc);
    zprint("usage: localheat Lx Ly Lz Lt Ls time");
    QMP_finalize_msg_passing();
    return 1;
  }

  for (i = 0; i < 4; i++) {
    mynetwork[i] = 1;
    mylocal[i] = atoi(argv[i+1]);
    mylattice[i] = mylocal[i] * mynetwork[i];
  }
  mylocal[4] = mylattice[4] = atoi(argv[5]);
  total_sec = atoi(argv[6]);

  zshowv4("network", mynetwork);
  zshowv5("local lattice", mylocal);
  zshowv5("lattice", mylattice);
  zprint("total requested runtime %.0f sec", total_sec);

#if 0
  if (QMP_declare_logical_topology(mynetwork, 4) != QMP_SUCCESS) {
    zprint("declare_logical_top failed");
    goto end;
  }

  getv(mynode, 0, QMP_get_logical_number_of_dimensions(),
       QMP_get_logical_coordinates());
#else
  { int i;
    for (i = 0; i < 4; i++)
       mynode[i] = 0;
  }
#endif

  if (QOP_MDWF_init(&mdwf_state,
		    mylattice, mynetwork, mynode, primary,
		    getsub, NULL)) {
    zprint("MDWF_init() failed");
    goto end;
  }

  zprint("MDWF_init() done");

  if (QOP_MDWF_set_generic(&mdwf_params, mdwf_state, b5, c5, 0.123, 0.05)) {
    zprint("MDW_set_generic() failed");
    goto end;
  }
  zprint("MDWF_set_generic() done");

  if (do_run(mdwf_state, mdwf_params)) {
    zprint("float test failed");
    goto end;
  }

  QOP_MDWF_fini(&mdwf_state);

  zprint("Heater test finished");
  status = 0;
 end:
  QMP_finalize_msg_passing();
  return status;
}
Esempio n. 9
0
void init_qmp(int * argc, char ***argv) {

#if 0
  printf("init_qmp(%d %p)\n",*argc,*argv);
  for(int i = 0; i<*argc;i++){
    printf("argv[%d](before)=%s\n",i,(*argv)[i]); 
  }
#endif

#if 0
   spi_init();
#endif
  
    QMP_thread_level_t prv;
#ifndef UNIFORM_SEED_NO_COMMS
    QMP_status_t init_status = QMP_init_msg_passing(argc, argv, QMP_THREAD_SINGLE, &prv);
    if (init_status) printf("QMP_init_msg_passing returned %d\n",init_status);
    peRank = QMP_get_node_number();
    peNum = QMP_get_number_of_nodes();
    if(!peRank)printf("QMP_init_msg_passing returned %d\n",init_status);

    if (init_status != QMP_SUCCESS) {
      QMP_error("%s\n",QMP_error_string(init_status));
    }

    // check QMP thread level
    // Added by Hantao
    if(peRank == 0) {
        switch(prv) {
        case QMP_THREAD_SINGLE:
            printf("QMP thread level = QMP_THREAD_SINGLE\n");
            break;
        case QMP_THREAD_FUNNELED:
            printf("QMP thread level = QMP_THREAD_FUNNELED\n");
            break;
        case QMP_THREAD_SERIALIZED:
            printf("QMP thread level = QMP_THREAD_SERIALIZED\n");
            break;
        case QMP_THREAD_MULTIPLE:
            printf("QMP thread level = QMP_THREAD_MULTIPLE\n");
            break;
        default:
            printf("QMP thread level = no idea what this is, boom!\n");
        }
    }

    //Check to make sure that this machine is a GRID machine
    //Exit if not GRID machine
    QMP_ictype qmp_type = QMP_get_msg_passing_type();

    //Get information about the allocated machine
    peNum = QMP_get_number_of_nodes();
    NDIM = QMP_get_allocated_number_of_dimensions();
    peGrid = QMP_get_allocated_dimensions();
    pePos = QMP_get_allocated_coordinates();

    if(peRank==0){
      for(int i = 0; i<*argc;i++){
        printf("argv[%d])(after)=%s\n",i,(*argv)[i]); 
      }
    }
#else
    QMP_status_t init_status = QMP_SUCCESS;
    peRank=0;
    peNum=1;
    NDIM=4;
#endif

//#if (TARGET == BGL) || (TARGET == BGP)
  if (NDIM>5){
    peNum = 1;
    for(int i = 0;i<5;i++)
	peNum *= peGrid[i];
    peRank = peRank % peNum;
  }
  int if_print=1;
  for(int i = 0;i<NDIM;i++)
  if (pePos[i]>=2) if_print=0;

  if (if_print){
      printf("Rank=%d Num=%d NDIM=%d\n",peRank,peNum,NDIM);
      printf("dim:");
      for(int i = 0;i<NDIM;i++)
        printf(" %d",peGrid[i]);
      printf("\n");
      printf("pos:");
      for(int i = 0;i<NDIM;i++)
        printf(" %d",pePos[i]);
      printf("\n");

#if 0
    int rc;
    BGLPersonality pers;
    rts_get_personality(&pers, sizeof(pers));
    printf("from personality: %d %d %d %d\n",pers.xCoord,pers.yCoord,pers.zCoord,rts_get_processor_id());
#endif
  }


//     printf("from personality:\n");

#if 0
    if ( (qmp_type!= QMP_GRID) && (qmp_type !=QMP_MESH)  ) {
      QMP_error("CPS on QMP only implemented for GRID or MESH, not (%d) machines\n",qmp_type);
    }
#endif

//     printf("QMP_declare_logical_topology(peGrid, NDIM)\n");
#ifndef UNIFORM_SEED_NO_COMMS
    //Declare the logical topology (Redundant for GRID machines)
    if (QMP_declare_logical_topology(peGrid, NDIM) != QMP_SUCCESS) {
      QMP_error("Node %d: Failed to declare logical topology\n",peRank);
      exit(-4);
    }
#endif
    initialized = true;
  printf("Rank=%d init_qmp() done\n",peRank);
    
  }
Esempio n. 10
0
int main(int argc, char** argv)
{
  for (int i = 1; i < argc; i++) {

    if(process_command_line_option(argc, argv, &i) == 0){
      continue;
    }   

    if( strcmp(argv[i], "--tol") == 0){
      float tmpf;
      if (i+1 >= argc){
        usage(argv);
      }
      sscanf(argv[i+1], "%f", &tmpf);
      if (tmpf <= 0){
        printf("ERROR: invalid tol(%f)\n", tmpf);
        usage(argv);
      }
      tol = tmpf;
      i++;
      continue;
    }

    if( strcmp(argv[i], "--cpu_prec") == 0){
      if (i+1 >= argc){
        usage(argv);
      }
      cpu_prec= get_prec(argv[i+1]);
      i++;
      continue;
    }

    printf("ERROR: Invalid option:%s\n", argv[i]);
    usage(argv);
  }

  if (prec_sloppy == QUDA_INVALID_PRECISION){
    prec_sloppy = prec;
  }
  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID){
    link_recon_sloppy = link_recon;
  }


  // initialize QMP or MPI
#if defined(QMP_COMMS)
  QMP_thread_level_t tl;
  QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
#elif defined(MPI_COMMS)
  MPI_Init(&argc, &argv);
#endif

  // call srand() with a rank-dependent seed
  initRand();

  display_test_info();

  int ret = invert_test();

  display_test_info();


  // finalize the communications layer
#if defined(QMP_COMMS)
  QMP_finalize_msg_passing();
#elif defined(MPI_COMMS)
  MPI_Finalize();
#endif

  return ret;
}
Esempio n. 11
0
int
main (int argc, char** argv)
{
  int             i, nc;
  QMP_status_t      status;
  int       **smem, **rmem;
  QMP_msgmem_t    *recvmem;
  QMP_msghandle_t *recvh;
  QMP_msgmem_t    *sendmem;
  QMP_msghandle_t *sendh;
  struct perf_argv pargv;
  QMP_thread_level_t req, prv;

  /** 
   * Simple point to point topology 
   */
  int dims[4] = {2,2,2,2};
  int ndims = 1;

  //if(QMP_get_node_number()==0)
  //printf("starting init\n"); fflush(stdout);
  req = QMP_THREAD_SINGLE;
  status = QMP_init_msg_passing (&argc, &argv, req, &prv);
  if (status != QMP_SUCCESS) {
    fprintf (stderr, "QMP_init failed\n");
    return -1;
  }
  if(QMP_get_node_number()==0)
    printf("finished init\n"); fflush(stdout);

  if (parse_options (argc, argv, &pargv) == -1) {
    if(QMP_get_node_number()==0)
      usage (argv[0]);
    exit (1);
  }

  {
    int maxdims = 4;
    int k=0;
    int nodes = QMP_get_number_of_nodes();
    ndims = 0;
    while( (nodes&1) == 0 ) {
      if(ndims<maxdims) ndims++;
      else {
	dims[k] *= 2;
	k++;
	if(k>=maxdims) k = 0;
      }
      nodes /= 2;
    }
    if(nodes != 1) {
      QMP_error("invalid number of nodes %i", QMP_get_number_of_nodes());
      QMP_error(" must power of 2");
      QMP_abort(1);
    }
    pargv.ndims = ndims;
  }

  status = QMP_declare_logical_topology (dims, ndims);
  if (status != QMP_SUCCESS) {
    fprintf (stderr, "Cannot declare logical grid\n");
    return -1;
  }

  /* do a broadcast of parameter */
  if (QMP_broadcast (&pargv, sizeof (pargv)) != QMP_SUCCESS) {
    QMP_printf ("Broadcast parameter failed\n");
    exit (1);
  }

  {
    int k=1;
    const int *lc = QMP_get_logical_coordinates();
    for(i=0; i<ndims; i++) k += lc[i];
    pargv.sender = k&1;
  }

  QMP_printf("%s options: num_channels[%d] verify[%d] option[%d] datasize[%d] numloops[%d] sender[%d] strided_send[%i] strided_recv[%i] strided_array_send[%i] ",
	     argv[0], pargv.num_channels, pargv.verify, 
	     pargv.option, pargv.size, pargv.loops, pargv.sender,
	     strided_send, strided_recv, strided_array_send);
  fflush(stdout);


  /**
   * Create memory
   */
  nc = pargv.num_channels;
  smem = (int **)malloc(nc*sizeof (int *));
  rmem = (int **)malloc(nc*sizeof (int *));
  sendmem = (QMP_msgmem_t *)malloc(ndims*nc*sizeof (QMP_msgmem_t));
  recvmem = (QMP_msgmem_t *)malloc(ndims*nc*sizeof (QMP_msgmem_t));
  sendh = (QMP_msghandle_t *)malloc(nc*sizeof (QMP_msghandle_t));
  recvh = (QMP_msghandle_t *)malloc(nc*sizeof (QMP_msghandle_t));

  QMP_barrier();
  if(QMP_get_node_number()==0) printf("\n"); fflush(stdout);
  if(pargv.option & TEST_SIMUL) {
    int opts = pargv.option;
    pargv.option = TEST_SIMUL;
    if(QMP_get_node_number()==0)
      QMP_printf("starting simultaneous sends"); fflush(stdout);
    for(i=pargv.minsize; i<=pargv.maxsize; i*=pargv.facsize) {
      pargv.size = i;
      create_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc, i, &pargv);
      test_simultaneous_send (smem, rmem, sendh, recvh, &pargv);
      check_mem(rmem, ndims, nc, i);
      free_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc);
    }
    if(QMP_get_node_number()==0)
      QMP_printf("finished simultaneous sends\n"); fflush(stdout);
    pargv.option = opts;
  }

  if(pargv.option & TEST_PINGPONG) {
    int opts = pargv.option;
    pargv.option = TEST_PINGPONG;
    if(QMP_get_node_number()==0)
      QMP_printf("starting ping pong sends"); fflush(stdout);
    for(i=pargv.minsize; i<=pargv.maxsize; i*=pargv.facsize) {
      pargv.size = i;
      create_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc, i, &pargv);
      if(pargv.verify)
	test_pingpong_verify(smem, rmem, sendh, recvh, &pargv);
      else
	test_pingpong(smem, rmem, sendh, recvh, &pargv);
      check_mem(rmem, ndims, nc, i);
      free_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc);
    }
    if(QMP_get_node_number()==0)
      QMP_printf("finished ping pong sends\n"); fflush(stdout);
    pargv.option = opts;
  }

  if(pargv.option & TEST_ONEWAY) {
    int opts = pargv.option;
    pargv.option = TEST_ONEWAY;
    if(QMP_get_node_number()==0)
      QMP_printf("starting one way sends"); fflush(stdout);
    for(i=pargv.minsize; i<=pargv.maxsize; i*=pargv.facsize) {
      pargv.size = i;
      create_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc, i, &pargv);
      test_oneway (smem, rmem, sendh, recvh, &pargv);
      if(!pargv.sender) check_mem(rmem, ndims, nc, i);
      free_msgs(smem, rmem, sendmem, recvmem, sendh, recvh, ndims, nc);
    }
    if(QMP_get_node_number()==0)
      QMP_printf("finished one way sends"); fflush(stdout);
    pargv.option = opts;
  }


  /**
   * Free memory 
   */
  free (smem);
  free (rmem);

  free (sendh);
  free (recvh);

  free (sendmem);
  free (recvmem);

  QMP_finalize_msg_passing ();

  return 0;
}