Пример #1
0
int main(int argc, char **argv) {
  int help=0;
  int arg=1;

  gasnet_handlerentry_t htable[] = { 
    { hidx_ping_shorthandler,  ping_shorthandler  },
    { hidx_pong_shorthandler,  pong_shorthandler  },
    { hidx_ping_medhandler,    ping_medhandler    },
    { hidx_pong_medhandler,    pong_medhandler    },
    { hidx_ping_longhandler,   ping_longhandler   },
    { hidx_pong_longhandler,   pong_longhandler   },

    { hidx_ping_shorthandler_flood,  ping_shorthandler_flood  },
    { hidx_pong_shorthandler_flood,  pong_shorthandler_flood  },
    { hidx_ping_medhandler_flood,    ping_medhandler_flood    },
    { hidx_pong_medhandler_flood,    pong_medhandler_flood    },
    { hidx_ping_longhandler_flood,   ping_longhandler_flood   },
    { hidx_pong_longhandler_flood,   pong_longhandler_flood   },

    { hidx_done_shorthandler,  done_shorthandler  }
  };

  GASNET_Safe(gasnet_init(&argc, &argv));

  mynode = gasnet_mynode();
  numnode = gasnet_nodes();

  arg = 1;
  while (argc > arg) {
    if (!strcmp(argv[arg], "-p")) {
#if GASNET_PAR
      pollers = atoi(argv[arg+1]);
      arg += 2;
#else
      if (0 == mynode) {
        fprintf(stderr, "testam %s\n", GASNET_CONFIG_STRING);
        fprintf(stderr, "ERROR: The -p option is only available in the PAR configuration.\n");
        fflush(NULL);
      }
      sleep(1);
      gasnet_exit(1);
#endif
    } else if (!strcmp(argv[arg], "-in")) {
      insegment = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-out")) {
      insegment = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-c")) {
      crossmachinemode = 1;
      ++arg;
    } else if (argv[arg][0] == '-') {
      help = 1;
      ++arg;
    } else break;
  }

  if (argc > arg) { iters = atoi(argv[arg]); ++arg; }
  if (!iters) iters = 1000;
  if (argc > arg) { maxsz = atoi(argv[arg]); ++arg; }
  if (!maxsz) maxsz = 2*1024*1024;
  if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); ++arg; }

  GASNET_Safe(gasnet_attach(htable, sizeof(htable)/sizeof(gasnet_handlerentry_t),
                            TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET));
#if GASNET_PAR
  test_init("testam", 1, "[options] (iters) (maxsz) (test_sections)\n"
               "  The '-in' or '-out' option selects whether the requestor's\n"
               "    buffer is in the GASNet segment or not (default is 'in').\n"
               "  The -p option gives the number of polling threads, specified as\n"
               "    a non-negative integer argument (default is no polling threads).\n"
               "  The -c option enables cross-machine pairing (default is nearest neighbor).\n");
#else
  test_init("testam", 1, "[options] (iters) (maxsz) (test_sections)\n"
               "  The '-in' or '-out' option selects whether the requestor's\n"
               "    buffer is in the GASNet segment or not (default is 'in').\n"
               "  The -c option enables cross-machine pairing (default is nearest neighbor).\n");
#endif
  if (help || argc > arg) test_usage();

  TEST_PRINT_CONDUITINFO();

  if (insegment) {
    myseg = TEST_MYSEG();
  } else {
    char *space = test_malloc(alignup(maxsz,PAGESZ) + PAGESZ);
    myseg = alignup_ptr(space, PAGESZ);
  }

  maxmed = MIN(maxsz, gasnet_AMMaxMedium());
  maxlongreq = MIN(maxsz, gasnet_AMMaxLongRequest());
  maxlongrep = MIN(maxsz, gasnet_AMMaxLongReply());

  if (crossmachinemode) {
    if ((numnode%2) && (mynode == numnode-1)) {
      sender = 1;
      peer = mynode;
    } else {
      gasnet_node_t half = numnode / 2;
      sender = (mynode < half);
      peer = sender ? (mynode + half) : (mynode - half);
    }
  } else {
    peer = mynode ^ 1;
    sender = mynode % 2 == 0;
    if (peer == numnode) {
      peer = mynode;
    }
  }

  recvr = !sender || (peer == mynode);

  peerseg = TEST_SEG(peer);

  BARRIER();

  if (mynode == 0) {
      printf("Running %sAM performance test with %i iterations"
#if GASNET_PAR
             " and %i extra recvr polling threads"
#endif
             "...\n",
             (crossmachinemode ? "cross-machine ": ""),
             iters
#if GASNET_PAR
             ,pollers
#endif
            );
      printf("   Msg Sz  Description                             Total time   Avg. time   Bandwidth\n"
             "   ------  -----------                             ----------   ---------   ---------\n");
      fflush(stdout);
  }
#if GASNET_PAR
  if (pollers)
    test_createandjoin_pthreads(pollers+1,doAll,NULL,0);
  else
#endif
    doAll(NULL);

  MSG("done.");

  gasnet_exit(0);
  return 0;
}
Пример #2
0
void process_tasks(long process_id)
{
    Task *t ;

    t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;

retry_entry:
    while( t )
    {
        switch( t->task_type )
        {
        case TASK_MODELING:
            process_model( t->task.model.model, t->task.model.type, process_id ) ;
            break ;
        case TASK_BSP:
            define_patch( t->task.bsp.patch, t->task.bsp.parent, process_id ) ;
            break ;
        case TASK_FF_REFINEMENT:
            ff_refine_elements( t->task.ref.e1, t->task.ref.e2, 0, process_id ) ;
            break ;
        case TASK_RAY:
            process_rays( t->task.ray.e, process_id ) ;
            break ;
        case TASK_VISIBILITY:
            visibility_task( t->task.vis.e, t->task.vis.inter,
                             t->task.vis.n_inter, t->task.vis.k, process_id ) ;
            break ;
        case TASK_RAD_AVERAGE:
            radiosity_averaging( t->task.rad.e, t->task.rad.mode, process_id ) ;
            break ;
        default:
            fprintf( stderr, "Panic:process_tasks:Illegal task type\n" );
        }

        /* Free the task */
        free_task( t, process_id ) ;

        /* Get next task */
        t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;
    }


    /* Barrier. While waiting for other processors to finish, poll the task
       queues and resume processing if there is any task */

    LOCK(global->pbar_lock);
    /* Reset the barrier counter if not initialized */
    if( global->pbar_count >= n_processors )
        global->pbar_count = 0 ;

    /* Increment the counter */
    global->pbar_count++ ;
    UNLOCK(global->pbar_lock);

    /* barrier spin-wait loop */
    while( global->pbar_count < n_processors )
    {
        /* Wait for a while and then retry dequeue */
        if( _process_task_wait_loop() )
            break ;

        /* Waited for a while but other processors are still running.
           Poll the task queue again */
        t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;
        if( t )
        {
            /* Task found. Exit the barrier and work on it */
            LOCK(global->pbar_lock);
            global->pbar_count-- ;
            UNLOCK(global->pbar_lock);
            goto retry_entry ;
        }

    }

    BARRIER(global->barrier, n_processors);
}
Пример #3
0
void Render_Loop()
{
    long step,i;
    PIXEL *local_image_address;
    MPIXEL *local_mask_image_address;
    char outfile[FILENAME_STRING_SIZE];
    long image_partition,mask_image_partition;
    float inv_num_nodes;
    long my_node;

    LOCK(Global->IndexLock);
    my_node = Global->Index++;
    UNLOCK(Global->IndexLock);
    my_node = my_node%num_nodes;

    BARINCLUDE(Global->TimeBarrier);
    BARINCLUDE(Global->SlaveBarrier);

    /*  POSSIBLE ENHANCEMENT:  Here's where one might bind the process to a
        processor, if one wanted to.
    */

    inv_num_nodes = 1.0/(float)num_nodes;
    image_partition = ROUNDUP(image_length*inv_num_nodes);
    mask_image_partition = ROUNDUP(mask_image_length*inv_num_nodes);

#ifdef DIM
    int dim;
    for (dim=0; dim<NM; dim++) {
#endif

        for (step=0; step<ROTATE_STEPS; step++) { /* do rotation sequence */

            /*  POSSIBLE ENHANCEMENT:  Here is where one might reset statistics, if
                		one wanted to.
            */

            frame = step;
            /* initialize images here */
            local_image_address = image_address + image_partition * my_node;
            local_mask_image_address = mask_image_address +
                                       mask_image_partition * my_node;

            BARRIER(Global->SlaveBarrier,num_nodes);

            if (my_node == num_nodes-1) {
                for (i=image_partition*my_node; i<image_length; i++)
                    *local_image_address++ = background;
                if (adaptive)
                    for (i=mask_image_partition*my_node; i<mask_image_length; i++)
                        *local_mask_image_address++ = NULL_PIXEL;
            }
            else {
                for (i=0; i<image_partition; i++)
                    *local_image_address++ = background;
                if (adaptive)
                    for (i=0; i<mask_image_partition; i++)
                        *local_mask_image_address++ = NULL_PIXEL;
            }

            if (my_node == ROOT) {
#ifdef DIM
                Select_View((float)STEP_SIZE, dim);
#else
                Select_View((float)STEP_SIZE, Y);
#endif
            }

            BARRIER(Global->SlaveBarrier,num_nodes);

            Global->Counter = num_nodes;
            Global->Queue[num_nodes][0] = num_nodes;
            Global->Queue[my_node][0] = 0;

            Render(my_node);

            if (my_node == ROOT) {
                if (ROTATE_STEPS > 1) {
#ifdef DIM
                    sprintf(outfile, "%s_%ld",filename, 1000+dim*ROTATE_STEPS+step);
#else
                    sprintf(outfile, "%s_%ld.tiff",filename, 1000+step);
#endif
                    /*	  Store_Image(outfile);
                              p = image_address;
                              for (zz = 0;zz < image_length;zz++) {
                                tiff_image[zz] = (long) ((*p)*256*256*256 + (*p)*256*256 +
                    				    (*p)*256 + (*p));
                    	    p++;
                              }
                    tiff_save_rgba(outfile,tiff_image,image_len[X],image_len[Y]);  */
                    WriteGrayscaleTIFF(outfile, image_len[X],image_len[Y],image_len[X], image_address);
                } else {
                    /*	  Store_Image(filename);
                    	  p = image_address;
                              for (zz = 0;zz < image_length;zz++) {
                                tiff_image[zz] = (long) ((*p)*256*256*256 + (*p)*256*256 +
                    				    (*p)*256 + (*p));
                    	    p++;
                              }
                    tiff_save_rgba(filename,tiff_image,image_len[X],image_len[Y]);    */
                    strcat(filename,".tiff");
                    WriteGrayscaleTIFF(filename, image_len[X],image_len[Y],image_len[X], image_address);
                }
            }
        }
#ifdef DIM
    }
#endif
}
Пример #4
0
int main(int argc, char **argv) {
  int iters = 0;
  int arg;
  void *alloc = NULL;
  int firstlastmode = 0;
  int fullduplexmode = 0;
  int crossmachinemode = 0;
  int singlesender = 0;
  int help = 0;   

  /* call startup */
  GASNET_Safe(gasnet_init(&argc, &argv));

  /* parse arguments */
  arg = 1;
  while (argc > arg) {
    if (!strcmp(argv[arg], "-in")) {
      insegment = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-out")) {
      insegment = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-sl")) {
      ++arg;
      if (argc > arg) { stridelevels = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mincontig")) {
      ++arg;
      if (argc > arg) { min_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxcontig")) {
      ++arg;
      if (argc > arg) { max_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-contigfactor")) {
      ++arg;
      if (argc > arg) { contigfactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mindata")) {
      ++arg;
      if (argc > arg) { min_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxdata")) {
      ++arg;
      if (argc > arg) { max_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-datafactor")) {
      ++arg;
      if (argc > arg) { datafactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-densitysteps")) {
      ++arg;
      if (argc > arg) { densitysteps = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-f")) {
      firstlastmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-c")) {
      crossmachinemode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-a")) {
      fullduplexmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-p")) {
      dogets = 0; doputs = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-g")) {
      dogets = 1; doputs = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-r")) {
      remotecontig = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-l")) {
      localcontig = 1;
      ++arg;
    } else if (argv[arg][0] == '-') {
      help = 1;
      ++arg;
    } else break;
  }

  if (argc > arg) { iters = atoi(argv[arg]); arg++; }
  if (!iters) iters = 1000;
  if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); arg++; }
  if (min_contig && max_contig && min_contig > max_contig) { ERR("min_contig > max_contig"); help = 1; }
  if (min_payload && max_payload && min_payload > max_payload) { ERR("min_payload > max_payload"); help = 1; }
  if (min_payload && min_contig && min_payload < min_contig) { ERR("min_payload < min_contig"); help = 1; }
  if (max_contig && max_payload && max_contig > max_payload) { ERR("max_contig > max_payload"); help = 1; }
  if (contigfactor < 2) { ERR("contigfactor < 2"); help = 1; }
  if (datafactor < 2) { ERR("datafactor < 2"); help = 1; }

  if (!max_payload) max_payload = 2*1024*1024; /* 2 MB default */
  #ifdef GASNET_SEGMENT_EVERYTHING
    maxsz = gasnet_getMaxGlobalSegmentSize();
  #else
    maxsz = 16*1024*1024;
  #endif
  max_payload = (int)MIN(maxsz, max_payload);
  maxsz = MIN(((uint64_t)max_payload) * densitysteps,maxsz);
  if (!min_contig) min_contig = 8;
  if (!max_contig) max_contig = MIN(256*1024,max_payload);
  if (!min_payload) min_payload = min_contig;

  GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET));
  test_init("testvisperf",1, "[options] (iters) (test_sections)\n"
             "  -p/-g     selects puts only or gets only (default is both).\n"
             "  -r/-l     selects remotely contiguous or locally contiguous (default is neither).\n"
             "  -mindata/-maxdata <sz>   \n"
             "            selects sz as min/max data payload per operation.\n"
             "  -mincontig/-maxcontig <sz>   \n"
             "            selects sz as min/max contig size.\n"
             "  -datafactor/-contigfactor <f>   \n"
             "            selects f as growth factor for data/contig sizes.\n"
             "  -densitysteps <d>   \n"
             "            selects d density steps, inclusive from 100%..100/d%\n"
             "  -sl <n>   selects n striding levels (default is 2).\n"
             "  -in/-out  selects whether the initiator-side\n"
             "            memory is in the GASNet segment or not (default is not).\n"
             "  -a        enables full-duplex mode, where all nodes send.\n"
             "  -c        enables cross-machine pairing, default is nearest neighbor.\n"
             "  -f        enables 'first/last' mode, where the first/last\n"
             "            nodes communicate with each other, while all other nodes sit idle.");
  if (help || argc > arg) test_usage();

  /* get SPMD info */
  myproc = gasnet_mynode();
  numprocs = gasnet_nodes();

  if (!firstlastmode) {
    /* Only allow 1 or even number for numprocs */
    if (numprocs > 1 && numprocs % 2 != 0) {
      MSG("WARNING: This test requires a unary or even number of nodes. Test skipped.\n");
      gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */
    }
  }

  /* Setting peer thread rank */
  if (firstlastmode) {
    peerproc = (myproc == 0 ? numprocs-1 : 0);
    iamsender = (fullduplexmode ? myproc == 0 || myproc == numprocs-1 : myproc == 0);
  } else if (numprocs == 1) {
    peerproc = 0;
    iamsender = 1;
  } else if (crossmachinemode) {
    if (myproc < numprocs / 2) {
      peerproc = myproc + numprocs/2;
      iamsender = 1;
    } else {
      peerproc = myproc - numprocs/2;
      iamsender = fullduplexmode;
    }
  } else { 
    peerproc = (myproc % 2) ? (myproc - 1) : (myproc + 1);
    iamsender = (fullduplexmode || myproc % 2 == 0);
  }
  singlesender = (numprocs == 1) || ((numprocs == 2 || firstlastmode) && !fullduplexmode);

  Rbase = TEST_SEG(peerproc);

  if (insegment) {
    Lbase = TEST_SEG(myproc);
  } else {
    alloc = test_calloc(maxsz+PAGESZ,1); /* use calloc to prevent valgrind warnings */
    Lbase = alignup_ptr(alloc, PAGESZ); /* ensure page alignment of base */
  }
  assert(((uintptr_t)Lbase) % PAGESZ == 0);

  if (myproc == 0) {
    MSG0("Running %i iterations of %s%s%snon-contiguous put/get%s%s\n local data %s-segment for sizes: %i...%i\n", 
    iters, 
    (firstlastmode ? "first/last " : ""),
    (fullduplexmode ? "full-duplex ": ""),
    (crossmachinemode ? "cross-machine ": ""),
    (remotecontig?"(remotely-contiguous)":""),
    (localcontig?"(locally-contiguous)":""),
    insegment ? "in" : "out", 
    min_payload, max_payload);
    printf("rows are databytes/op : bandwidth values in MB/s\n");
  }
  BARRIER();

  { int contigsz;
    int rawdatasz;
    int isget;
    test_vis_t viscat;
    for (viscat = TEST_V; viscat <= TEST_S; viscat++) {
    for (isget = 0; isget < 2; isget++) {
      if (TEST_SECTION_BEGIN_ENABLED()) {
        if (isget && !dogets) continue;
        if (!isget && !doputs) continue;
        if (!dovis[viscat]) continue;
        for (contigsz = min_contig; contigsz <= max_contig; contigsz *= contigfactor) {
          int di;
          size_t lastdatasz = 0;
          if (contigsz > max_payload) continue;
          if (!myproc) {
            printf("\n%c: %s %s CONTIGSZ = %i\n", TEST_SECTION_NAME(),
                        visdesc[(int)viscat], (isget?"GET":"PUT"), contigsz);
            printf(" density:");
            for (di = 0; di < densitysteps; di++) {
              printf("%8i%%", (int)((densitysteps-di)*100.0/densitysteps));
            }
            printf("\n");
          }
          for (rawdatasz = min_payload; rawdatasz <= max_payload; rawdatasz *= datafactor) {
            char mystr[255];
            size_t datasz = aligndown(rawdatasz,contigsz);
            if (datasz == lastdatasz) continue;
            lastdatasz = datasz;
            if (singlesender) sprintf(mystr,"%8i: ", (int)datasz);
            else  sprintf(mystr,"P%i: %6i: ", myproc, (int)datasz);
            for (di = 0; di < densitysteps; di++) {
              gasnett_tick_t begin, end;
              size_t Lcnt = (localcontig ? 1 : datasz/contigsz);
              size_t Rcnt = (remotecontig? 1 : datasz/contigsz);
              size_t Lsz = datasz/Lcnt;
              size_t Rsz = datasz/Rcnt;
              void **Lilist = NULL;
              void **Rilist = NULL;
              gasnet_memvec_t *Lvlist = NULL;
              gasnet_memvec_t *Rvlist = NULL;
              size_t *Lstrides = NULL;
              size_t *Rstrides = NULL;
              size_t *LRcount = NULL;
              size_t stride = contigsz*(((double)densitysteps)/(densitysteps-di));
              if (stride * MAX(Lcnt,Rcnt) > maxsz) { strcat(mystr,"    -   "); continue; }

              if (iamsender) { /* setup metadata */
                switch (viscat) {
                  case TEST_V: 
                    Lvlist = make_vlist(Lbase, stride, Lcnt, Lsz);
                    Rvlist = make_vlist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_I: 
                    Lilist = make_ilist(Lbase, stride, Lcnt, Lsz);
                    Rilist = make_ilist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_S: {
                    size_t chunkcnt = datasz/contigsz;
                    int dim;
                    Lstrides = test_malloc(sizeof(size_t)*stridelevels);
                    Rstrides = test_malloc(sizeof(size_t)*stridelevels);
                    LRcount = test_malloc(sizeof(size_t)*(stridelevels+1));
                    LRcount[0] = contigsz;
                    Lstrides[0] = (localcontig ? contigsz : stride);
                    Rstrides[0] = (remotecontig ? contigsz : stride);
                    for (dim = 1; dim < stridelevels; dim++) {
                      size_t factor = 1, fi;
                      for (fi = 1; fi <= chunkcnt/(2*(stridelevels-dim)); fi++) /* choose a reasonable factor */
                        if (chunkcnt/fi*fi == chunkcnt) factor = fi;
                      LRcount[dim] = factor;
                      chunkcnt /= factor;
                      Lstrides[dim] = LRcount[dim]*Lstrides[dim-1];
                      Rstrides[dim] = LRcount[dim]*Rstrides[dim-1];
                    }
                    LRcount[stridelevels] = chunkcnt;
                    { size_t tmp = 1;
                      for (dim = 0; dim <= stridelevels; dim++) tmp *= LRcount[dim];
                      assert(tmp == datasz);
                    }
                    break;
                  }
                }
              }
              #define DOIT(iters) do {                                                           \
                int i;                                                                           \
                switch (viscat) {                                                                \
                  case TEST_V:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_getv_nbi_bulk(Lcnt,Lvlist,peerproc,Rcnt,Rvlist);         \
                      else gasnet_putv_nbi_bulk(peerproc,Rcnt,Rvlist,Lcnt,Lvlist);               \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_I:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_geti_nbi_bulk(Lcnt,Lilist,Lsz,peerproc,Rcnt,Rilist,Rsz); \
                      else gasnet_puti_nbi_bulk(peerproc,Rcnt,Rilist,Rsz,Lcnt,Lilist,Lsz);       \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_S:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_gets_nbi_bulk(Lbase,Lstrides,peerproc,Rbase,Rstrides,    \
                                                      LRcount,stridelevels);                     \
                      else gasnet_puts_nbi_bulk(peerproc,Rbase,Rstrides,Lbase,Lstrides,          \
                                                LRcount,stridelevels);                           \
                    }                                                                            \
                    break;                                                                       \
                }                                                                                \
                gasnet_wait_syncnbi_all();                                                       \
              } while (0)
              if (iamsender) DOIT(1); /* pay some warm-up costs */
              BARRIER();
              if (iamsender) { 
  	        begin = gasnett_ticks_now();
                DOIT(iters);
	        end = gasnett_ticks_now();
              }
              BARRIER();
              if (iamsender) { 
                char tmp[80];
                double secs = gasnett_ticks_to_ns(end - begin)/1.0E9;
                double dataMB = ((double)datasz) * iters / (1024*1024);
                sprintf(tmp, " %8.3f", dataMB / secs);
                strcat(mystr, tmp);
              }
              if (Lilist) test_free(Lilist);
              if (Rilist) test_free(Rilist);
              if (Lvlist) test_free(Lvlist);
              if (Rvlist) test_free(Rvlist);
              if (Lstrides) test_free(Lstrides);
              if (Rstrides) test_free(Rstrides);
              if (LRcount) test_free(LRcount);
            }
            if (iamsender) { printf("%s\n", mystr); fflush(stdout); }
            BARRIER();
          }
        }
      }
    }
    }
  }

  BARRIER();
  if (alloc) test_free(alloc);

  gasnet_exit(0);

  return 0;
}
Пример #5
0
int main(int argc, char **argv)
{
    int outer_iterations = 0;
    int inner_iterations = 0;
    int seedoffset = 0;
    int numprocs, myproc;
#if PLATFORM_COMPILER_CLANG
    // The addition of a gratuitous 'volatile' here has been observed to
    // work-around Bug 3630 in which Clang 4 and newer misoptimize this test.
    volatile int peerproc;
#else
    int peerproc;
#endif
    int sender_p;
    char *shadow_region_1, *shadow_region_2;
    int i,j;
    char *local_base, *target_base;

    /* call startup */
    GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testslice", &argc, &argv, 0));

    /* get SPMD info */
    myproc = gex_TM_QueryRank(myteam);
    numprocs = gex_TM_QuerySize(myteam);

    if (argc > 1) segsize = atoi(argv[1]);
    if (!segsize) segsize = 1024*1000;
    if (argc > 2) outer_iterations = atoi(argv[2]);
    if (!outer_iterations) outer_iterations = 10;
    if (argc > 3) inner_iterations = atoi(argv[3]);
    if (!inner_iterations) inner_iterations = 10;
    if (argc > 4) seedoffset = atoi(argv[4]);

    GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ));

    test_init("testslice",0, "(segsize) (iterations) (# of sizes per iteration) (seed)");

    /* parse arguments */
    if (argc > 5) test_usage();
    
    if(numprocs & 1) {
        MSG0("WARNING: This test requires an even number of nodes. Test skipped.\n");
        gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */
    }
    sender_p = !(myproc & 1);
    peerproc = myproc ^ 1;

    if (seedoffset == 0) {
      seedoffset = (((unsigned int)TIME()) & 0xFFFF);
      TEST_BCAST(&seedoffset, 0, &seedoffset, sizeof(&seedoffset));
    }
    TEST_SRAND(myproc+seedoffset);

    MSG0("Running with segment size = %d outer iterations=%d inner iterations=%d seed=%d",
         segsize,outer_iterations, inner_iterations, seedoffset);

    BARRIER();

    /* Allocate two shadow regions the same size as the segment */
    shadow_region_1 = (char *) test_malloc(segsize);
    shadow_region_2 = (char *) test_malloc(segsize);
   
    /* Fill up the shadow region with random data */
    for(i=0;i < segsize;i++) {
      shadow_region_1[i] = (char) TEST_RAND(0,255);
    }
    memset(shadow_region_2,0,segsize);

    /* Big loop performing the following */
    for(i=0;i < outer_iterations;i++) {
      if(sender_p) {
        /* Pick a starting point anywhere in the segment */
        int starting_point = TEST_RAND(0,(segsize-1));

        local_base = TEST_SEG(myproc);
        target_base = TEST_SEG(peerproc);
 
        for(j=0;j < inner_iterations;j++) {
          /* Pick a length */
          int len = TEST_RAND(1,segsize-starting_point);
          int remote_starting_point = TEST_RAND(0,segsize-len);
          int local_starting_point_1 = TEST_RAND(0,segsize-len);
          int local_starting_point_2 = TEST_RAND(0,segsize-len);

          /* Perform operations */
          /* Out of segment put from shadow_region 1 to remote */
          gex_RMA_PutBlocking(myteam, peerproc,target_base+remote_starting_point,shadow_region_1 + starting_point,len, 0); 
  
          /* In segment get from remote to local segment */
          gex_RMA_GetBlocking(myteam, local_base+local_starting_point_1,peerproc,target_base+remote_starting_point,len, 0); 
  
          /* Verify */
          assert_eq(shadow_region_1 + starting_point, local_base + local_starting_point_1, len,starting_point,i,j,"Out of segment put + in segment get");
  
          /* Out of segment get from remote to shadow_region_2 (starting from 0) */
          gex_RMA_GetBlocking(myteam, shadow_region_2+local_starting_point_2,peerproc,target_base+remote_starting_point,len, 0); 
  
          /* Verify */
          assert_eq(shadow_region_2+local_starting_point_2, shadow_region_1 + starting_point, len,starting_point,i,j,"Out of segment get");
        }
        TEST_PROGRESS_BAR(i,outer_iterations);
      }
      BARRIER();
    }
    if(sender_p && !failures) {
      MSG("testslice PASSED");
    }
    gasnet_exit(0);

    return 0;

}
Пример #6
0
EXTERN_ENV
#include <stdio.h>

#include "parameters.h"
#include "mdvar.h"
#include "water.h"
#include "wwpot.h"
#include "cnst.h"
#include "mddata.h"
#include "fileio.h"
#include "split.h"
#include "global.h"

/************************************************************************/

/* routine that implements the time-steps. Called by main routine and calls others */
double MDMAIN(long NSTEP, long NPRINT, long NSAVE, long NORD1, long ProcID)
{
    double XTT;
    long i;
    double POTA,POTR,POTRF;
    double XVIR,AVGT,TEN;
    double TTMV = 0.0, TKIN = 0.0, TVIR = 0.0;

    /*.......ESTIMATE ACCELERATION FROM F/M */
    INTRAF(&gl->VIR,ProcID);

    BARRIER(gl->start, NumProcs);

    INTERF(ACC,&gl->VIR,ProcID);

    BARRIER(gl->start, NumProcs);

    /* MOLECULAR DYNAMICS LOOP OVER ALL TIME-STEPS */

    for (i=1;i <= NSTEP; i++) {
        TTMV=TTMV+1.00;

        /* reset simulator stats at beginning of second time-step */

        /* POSSIBLE ENHANCEMENT:  Here's where one start measurements to avoid
           cold-start effects.  Recommended to do this at the beginning of the
           second timestep; i.e. if (i == 2).
           */

        /* initialize various shared sums */
        if (ProcID == 0) {
            long dir;
            if (i >= 2) {
                CLOCK(gl->trackstart);
            }
            gl->VIR = 0.0;
            gl->POTA = 0.0;
            gl->POTR = 0.0;
            gl->POTRF = 0.0;
            for (dir = XDIR; dir <= ZDIR; dir++)
                gl->SUM[dir] = 0.0;
        }

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->intrastart);
        }

        BARRIER(gl->start, NumProcs);
        PREDIC(TLC,NORD1,ProcID);
        INTRAF(&gl->VIR,ProcID);
        BARRIER(gl->start, NumProcs);

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->intraend);
            gl->intratime += gl->intraend - gl->intrastart;
        }


        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->interstart);
        }

        INTERF(FORCES,&gl->VIR,ProcID);

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->interend);
            gl->intertime += gl->interend - gl->interstart;
        }

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->intrastart);
        }

        CORREC(PCC,NORD1,ProcID);

        BNDRY(ProcID);

        KINETI(gl->SUM,HMAS,OMAS,ProcID);

        BARRIER(gl->start, NumProcs);

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->intraend);
            gl->intratime += gl->intraend - gl->intrastart;
        }

        TKIN=TKIN+gl->SUM[0]+gl->SUM[1]+gl->SUM[2];
        TVIR=TVIR-gl->VIR;

        /*  check if potential energy is to be computed, and if
            printing and/or saving is to be done, this time step.
            Note that potential energy is computed once every NPRINT
            time-steps */

        if (((i % NPRINT) == 0) || ( (NSAVE > 0) && ((i % NSAVE) == 0))){

            if ((ProcID == 0) && (i >= 2)) {
                CLOCK(gl->interstart);
            }

            /*  call potential energy computing routine */
            POTENG(&gl->POTA,&gl->POTR,&gl->POTRF,ProcID);
            BARRIER(gl->start, NumProcs);

            if ((ProcID == 0) && (i >= 2)) {
                CLOCK(gl->interend);
                gl->intertime += gl->interend - gl->interstart;
            }

            POTA=gl->POTA*FPOT;
            POTR=gl->POTR*FPOT;
            POTRF=gl->POTRF*FPOT;

            /* compute some values to print */
            XVIR=TVIR*FPOT*0.50/TTMV;
            AVGT=TKIN*FKIN*TEMP*2.00/(3.00*TTMV);
            TEN=(gl->SUM[0]+gl->SUM[1]+gl->SUM[2])*FKIN;
            XTT=POTA+POTR+POTRF+TEN;

            if ((i % NPRINT) == 0 && ProcID == 0) {
                fprintf(six,"     %5ld %14.5lf %12.5lf %12.5lf  \
                %12.5lf\n %16.3lf %16.5lf %16.5lf\n",
                        i,TEN,POTA,POTR,POTRF,XTT,AVGT,XVIR);
            }
        }

        /* wait for everyone to finish time-step */
        BARRIER(gl->start, NumProcs);

        if ((ProcID == 0) && (i >= 2)) {
            CLOCK(gl->trackend);
            gl->tracktime += gl->trackend - gl->trackstart;
        }
    } /* for i */
Пример #7
0
void
ParallelExecute ()
{
   long my_id;
   long num_boxes;
   unsigned long start, finish = 0;
   time_info *local_time;
   long time_all = 0;
   time_info *timing;
   unsigned long local_init_done = 0;

   THREAD_INIT_FREE();

   BARINCLUDE(G_Memory->synch);
   local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS);
   BARRIER(G_Memory->synch, Number_Of_Processors);
   LOCK(G_Memory->count_lock);
     my_id = G_Memory->id;
     G_Memory->id++;
   UNLOCK(G_Memory->count_lock);


/* POSSIBLE ENHANCEMENT:  Here is where one might pin processes to
   processors to avoid migration */

   if (my_id == 0) {
     time_all = 1;
   } else if (do_stats) {
     time_all = 1;
   }

   if (my_id == 0) {
      /* have to allocate extra space since it will construct the grid by
       * itself for the first time step */
      CreateParticleList(my_id, Total_Particles);
      InitParticleList(my_id, Total_Particles, 0);
   }
   else {
      CreateParticleList(my_id, ((Total_Particles * PDF)
				 / Number_Of_Processors));
      InitParticleList(my_id, 0, 0);
   }
   num_boxes = 1.333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX));
   if (my_id == 0)
      CreateBoxes(my_id, TOLERANCE * num_boxes);
   else
      CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors);

   if (my_id == 0) {
      LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors,
		  (Number_Of_Processors == 1) ? "" : "s");
   }
   BARRIER(G_Memory->synch, Number_Of_Processors);
   Local[my_id].Time = 0.0;
   for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {

      if (MY_TIME_STEP == 2) {
/* POSSIBLE ENHANCEMENT:  Here is where one might reset the
   statistics that one is measuring about the parallel execution */
         // Reset Models
         CarbonEnableModels();
      }

      if (MY_TIME_STEP == 2) {
        if (do_stats || my_id == 0) {
          CLOCK(local_init_done);
        }
      }

      if (MY_TIME_STEP == 0) {
	 CLOCK(start);
      }
      else
	 start = finish;
      ConstructGrid(my_id,local_time,time_all);
      ConstructLists(my_id,local_time,time_all);
      PartitionGrid(my_id,local_time,time_all);
      StepSimulation(my_id,local_time,time_all);
      DestroyGrid(my_id,local_time,time_all);
      CLOCK(finish);
      Local[my_id].Time += Timestep_Dur;
      MY_TIMING[MY_TIME_STEP].total_time = finish - start;
   }
   if (my_id == 0) {
      CLOCK(endtime);
   }
   BARRIER(G_Memory->synch, Number_Of_Processors);
   for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {
     timing = &(MY_TIMING[MY_TIME_STEP]);
     timing->other_time = local_time[MY_TIME_STEP].other_time;
     timing->construct_time = local_time[MY_TIME_STEP].construct_time;
     timing->list_time = local_time[MY_TIME_STEP].list_time;
     timing->partition_time = local_time[MY_TIME_STEP].partition_time;
     timing->pass_time = local_time[MY_TIME_STEP].pass_time;
     timing->inter_time = local_time[MY_TIME_STEP].inter_time;
     timing->barrier_time = local_time[MY_TIME_STEP].barrier_time;
     timing->intra_time = local_time[MY_TIME_STEP].intra_time;
   }
   Local[my_id].init_done_times = local_init_done;
   BARRIER(G_Memory->synch, Number_Of_Processors);
}
Пример #8
0
void INTERF(long DEST, double *VIR, long ProcID)
{
    /* This routine gets called both from main() and from mdmain().
       When called from main(), it is used to estimate the initial
       accelerations by computing intermolecular forces.  When called
       from mdmain(), it is used to compute intermolecular forces.
       The parameter DEST specifies whether results go into the
       accelerations or the forces. Uses routine UPDATE_FORCES in this
       file, and routine CSHIFT in file cshift.U */
    /*
      .....this routine calculates inter-molecular interaction forces
      the distances are arranged in the order  M-M, M-H1, M-H3, H1-M,
      H3-M, H1-H3, H1-H1, H3-H1, H3-H3, O-O, O-H1, O-H3, H1-O, H3-O,
      where the M are "centers" of the molecules.
      */

    long mol, comp, dir, icomp;
    long comp_last, half_mol;
    long    KC, K;
    double YL[15], XL[15], ZL[15], RS[15], FF[15], RL[15]; /* per-
                                                              interaction arrays that hold some computed distances */
    double  FTEMP;
    double LVIR = 0.0;
    double *temp_p;

    { /* initialize PFORCES array */

        long ct1,ct2,ct3;

        for (ct1 = 0; ct1<NMOL; ct1++)
            for (ct2 = 0; ct2<NDIR; ct2++)
                for (ct3 = 0; ct3<NATOM; ct3++)
                    PFORCES[ProcID][ct1][ct2][ct3] = 0;

    }

    half_mol = NMOL/2;
    for (mol = StartMol[ProcID]; mol < StartMol[ProcID+1]; mol++) {
        comp_last = mol + half_mol;
        if (NMOL%2 == 0) {
            if ((half_mol <= mol) && (mol%2 == 0)) {
                comp_last--;
            }
            if ((mol < half_mol) && (comp_last%2 == 1)) {
                comp_last--;
            }
        }
        for (icomp = mol+1; icomp <= comp_last; icomp++) {
            comp = icomp;
            if (comp > NMOL1) comp = comp%NMOL;

            /*  compute some intermolecular distances */

            CSHIFT(VAR[mol].F[DISP][XDIR],VAR[comp].F[DISP][XDIR],
                   VAR[mol].VM[XDIR],VAR[comp].VM[XDIR],XL,BOXH,BOXL);
            CSHIFT(VAR[mol].F[DISP][YDIR],VAR[comp].F[DISP][YDIR],
                   VAR[mol].VM[YDIR],VAR[comp].VM[YDIR],YL,BOXH,BOXL);
            CSHIFT(VAR[mol].F[DISP][ZDIR],VAR[comp].F[DISP][ZDIR],
                   VAR[mol].VM[ZDIR],VAR[comp].VM[ZDIR],ZL,BOXH,BOXL);

            KC=0;
            for (K = 0; K < 9; K++) {
                RS[K]=XL[K]*XL[K]+YL[K]*YL[K]+ZL[K]*ZL[K];
                if (RS[K] > CUT2)
                    KC++;
            } /* for K */

            if (KC != 9) {
                for (K = 0; K < 14; K++)
                    FF[K]=0.0;
                if (RS[0] < CUT2) {
                    FF[0]=QQ4/(RS[0]*sqrt(RS[0]))+REF4;
                    LVIR = LVIR + FF[0]*RS[0];
                } /* if */
                for (K = 1; K < 5; K++) {
                    if (RS[K] < CUT2) {
                        FF[K]= -QQ2/(RS[K]*sqrt(RS[K]))-REF2;
                        LVIR = LVIR + FF[K]*RS[K];
                    } /* if */
                    if (RS[K+4] <= CUT2) {
                        RL[K+4]=sqrt(RS[K+4]);
                        FF[K+4]=QQ/(RS[K+4]*RL[K+4])+REF1;
                        LVIR = LVIR + FF[K+4]*RS[K+4];
                    } /* if */
                } /* for K */
                if (KC == 0) {
                    RS[9]=XL[9]*XL[9]+YL[9]*YL[9]+ZL[9]*ZL[9];
                    RL[9]=sqrt(RS[9]);
                    FF[9]=AB1*exp(-B1*RL[9])/RL[9];
                    LVIR = LVIR + FF[9]*RS[9];
                    for (K = 10; K < 14; K++) {
                        FTEMP=AB2*exp(-B2*RL[K-5])/RL[K-5];
                        FF[K-5]=FF[K-5]+FTEMP;
                        LVIR= LVIR+FTEMP*RS[K-5];
                        RS[K]=XL[K]*XL[K]+YL[K]*YL[K]+ZL[K]*ZL[K];
                        RL[K]=sqrt(RS[K]);
                        FF[K]=(AB3*exp(-B3*RL[K])-AB4*exp(-B4*RL[K]))/RL[K];
                        LVIR = LVIR + FF[K]*RS[K];
                    } /* for K */
                } /* if KC == 0 */

                UPDATE_FORCES(mol, comp, XL, YL, ZL, FF, ProcID);

            }  /* if KC != 9 */
        } /* for comp */
    } /* for mol */
    /*  accumulate the running sum from private
        per-interaction partial sums   */
    LOCK(gl->InterfVirLock);
    *VIR = *VIR + LVIR;
    UNLOCK(gl->InterfVirLock);

    /* at the end of the above force-computation, comp_last */
    /* contains the number of the last molecule (no modulo) */
    /* that this process touched                            */

    if (comp_last > NMOL1) {
        for (mol = StartMol[ProcID]; mol < NMOL; mol++) {
            ALOCK(gl->MolLock, mol % MAXLCKS);
            for ( dir = XDIR; dir  <= ZDIR; dir++) {
                temp_p = VAR[mol].F[DEST][dir];
                temp_p[H1] += PFORCES[ProcID][mol][dir][H1];
                temp_p[O]  += PFORCES[ProcID][mol][dir][O];
                temp_p[H2] += PFORCES[ProcID][mol][dir][H2];
            }
            AULOCK(gl->MolLock, mol % MAXLCKS);
        }
        comp = comp_last % NMOL;
        for (mol = 0; ((mol <= comp) && (mol < StartMol[ProcID])); mol++) {
            ALOCK(gl->MolLock, mol % MAXLCKS);
            for ( dir = XDIR; dir  <= ZDIR; dir++) {
                temp_p = VAR[mol].F[DEST][dir];
                temp_p[H1] += PFORCES[ProcID][mol][dir][H1];
                temp_p[O]  += PFORCES[ProcID][mol][dir][O];
                temp_p[H2] += PFORCES[ProcID][mol][dir][H2];
            }
            AULOCK(gl->MolLock, mol % MAXLCKS);
        }
    }
    else{
        for (mol = StartMol[ProcID]; mol <= comp_last; mol++) {
            ALOCK(gl->MolLock, mol % MAXLCKS);
            for ( dir = XDIR; dir  <= ZDIR; dir++) {
                temp_p = VAR[mol].F[DEST][dir];
                temp_p[H1] += PFORCES[ProcID][mol][dir][H1];
                temp_p[O]  += PFORCES[ProcID][mol][dir][O];
                temp_p[H2] += PFORCES[ProcID][mol][dir][H2];
            }
            AULOCK(gl->MolLock, mol % MAXLCKS);
        }
    }

    /* wait till all forces are updated */

    BARRIER(gl->InterfBar, NumProcs);

    /* divide final forces by masses */

    for (mol = StartMol[ProcID]; mol < StartMol[ProcID+1]; mol++) {
        for ( dir = XDIR; dir  <= ZDIR; dir++) {
            temp_p = VAR[mol].F[DEST][dir];
            temp_p[H1] = temp_p[H1] * FHM;
            temp_p[O]  = temp_p[O] * FOM;
            temp_p[H2] = temp_p[H2] * FHM;
        } /* for dir */
    } /* for mol */

}/* end of subroutine INTERF */