예제 #1
0
void mca_memheap_modex_recv_all(void)
{
    int i;
    int j;
    int nprocs, my_pe;
    oshmem_proc_t *proc;
    mca_spml_mkey_t *mkey;
    void* dummy_rva;

    if (!mca_memheap_base_key_exchange)
        return;

    /* init rkey cache */
    nprocs = oshmem_num_procs();
    my_pe = oshmem_my_proc_id();

    /* Note:
     * Doing exchange via rml till we figure out problem with grpcomm.modex and barrier
     */
    for (i = 0; i < nprocs; i++) {
        if (i == my_pe)
            continue;

        proc = oshmem_proc_group_find(oshmem_group_all, i);
        for (j = 0; j < memheap_map->n_segments; j++) {
            mkey =
                    mca_memheap_base_get_cached_mkey(i,
                                                     memheap_map->mem_segs[j].start,
                                                     proc->transport_ids[0],
                                                     &dummy_rva);
            if (!mkey) {
                MEMHEAP_ERROR("Failed to receive mkeys");
                oshmem_shmem_abort(-1);
            }
        }

    }

    /*
     * There is an issue with orte_grpcomm.barrier usage as
     * ess/pmi directs to use grpcomm/pmi in case slurm srun() call grpcomm/pmi calls PMI_Barrier() 
     * that is a function of external library.
     * There is no opal_progress() in such way. As a result slow PEs send a request (MEMHEAP_RKEY_REQ) to
     * fast PEs waiting on barrier and do not get a respond (MEMHEAP_RKEY_RESP).
     *
     * there are following ways to solve one:
     * 1. calculate requests from remote PEs and do ORTE_PROGRESSED_WAIT waiting for expected value;
     * 2. use shmem_barrier_all();
     * 3. rework pmi/barrier to use opal_progress();
     * 4. use orte_grpcomm.barrier carefully;
     * 
     * It seems there is no need to use orte_grpcomm.barrier here
     */

    if (memheap_map->mem_segs[HEAP_SEG_INDEX].shmid != MEMHEAP_SHM_INVALID) {
        /* unfortunately we must do barrier here to assure that everyone are attached to our segment
         * good thing that this code path only invoked on older linuxes (-mca shmalloc_use_hugepages 3|4)
         * try to minimize damage here by waiting 5 seconds and doing progress
         */
        shmem_barrier_all();
        /* keys exchanged, segments attached, now we can safely cleanup */
        if (memheap_map->mem_segs[HEAP_SEG_INDEX].type
                == MAP_SEGMENT_ALLOC_SHM) {
            shmctl(memheap_map->mem_segs[HEAP_SEG_INDEX].shmid,
                   IPC_RMID,
                   NULL );
        }
    }
}
예제 #2
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE* check_arr = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
    int i = 0;
    int j = 0;
    int k = 0;
    int flag = 0;
    int missed_values = 0;
    static long* pSync = NULL;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    check_arr = shmalloc(sizeof(*check_arr) * num_proc);

    pSync = shmalloc(sizeof(*pSync) * _SHMEM_COLLECT_SYNC_SIZE);
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
        pSync[i] = _SHMEM_SYNC_VALUE;
    }

    if (shmem_addr && pSync && check_arr)
    {
        static TYPE_VALUE value = 0;

        /* Store my value */
        my_value = (TYPE_VALUE)my_proc;
        *shmem_addr = DEFAULT_VALUE;

        shmem_barrier_all();
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            missed_values = 0;
            my_value = (TYPE_VALUE)my_proc;

            value = FUNC_VALUE(shmem_addr, my_value, peer_proc);

            shmem_barrier_all();
            shmem_collect32(check_arr, &value, (sizeof(value) + 3 ) / 4, 0, 0, num_proc, pSync);
            shmem_barrier_all();
            for (j = 0; j < num_proc ; j++)
            {
                flag = 0;
                for (k = 0; k < num_proc; k++)
                {
                    if (sys_fcompare(check_arr[k], j))
                    {
                        flag = 1;
                        break;
                    }
                }
                if (flag == 0)
                {
                    missed_values++;
                }
                if (missed_values > 1)
                {
                    rc = TC_FAIL;
                    break;
                }
            }
        }
        shmem_barrier_all();

        log_debug(OSH_TC, "my(#%d:%lld) missed_values expected = 1 vs missed_values = %d\n",
                           my_proc, (INT64_TYPE)my_value, missed_values);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }
    if (pSync)
    {
        shfree(pSync);
    }

    return rc;
}
예제 #3
0
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    short src1[N];
    int src2[N];
    long src3[N];
    long double src4[N];
    long long src5[N];
    double src6[N];
    float src7[N];
    char *src8;
    short src9;
    int src10;
    long src11;
    double src12;
    float src13;

    int fail_count = 0;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        src8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;


        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        dest9 = -9;
        dest10 = -9;
        dest11 = -9;
        dest12 = -9;
        dest13 = -9.0;

        nextpe = (me + 1) % npes;

        /* Testing shmem_short_put, shmem_short_put, shmem_int_put,
           shmem_long_put, shmem_longdouble_put, shmem_longlong_put,
           shmem_double_put, shmem_float_put, shmem_putmem */
        shmem_barrier_all ();

        shmem_short_put (dest1, src1, N, nextpe);
        shmem_int_put (dest2, src2, N, nextpe);
        shmem_long_put (dest3, src3, N, nextpe);
        shmem_longdouble_put (dest4, src4, N, nextpe);
        shmem_longlong_put (dest5, src5, N, nextpe);
        shmem_double_put (dest6, src6, N, nextpe);
        shmem_float_put (dest7, src7, N, nextpe);
        shmem_putmem (dest8, src8, N * sizeof (char), nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest4[i] != (npes - 1)) {
                    success4 = 1;
                }
                if (dest5[i] != (npes - 1)) {
                    success5 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
                if (dest8[i] != (npes - 1)) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_put: Passed\n");
            else {
                printf ("Test shmem_short_put: Failed\n");
                fail_count++;
            }
            if (success2 == 0)
                printf ("Test shmem_int_put: Passed\n");
            else {
                printf ("Test shmem_int_put: Failed\n");
                fail_count++;
            }
            if (success3 == 0)
                printf ("Test shmem_long_put: Passed\n");
            else {
                printf ("Test shmem_long_put: Failed\n");
                fail_count++;
            }
            if (success4 == 0)
                printf ("Test shmem_longdouble_put: Passed\n");
            else {
                printf ("Test shmem_longdouble_put: Failed\n");
                fail_count++;
            }
            if (success5 == 0)
                printf ("Test shmem_longlong_put: Passed\n");
            else {
                printf ("Test shmem_longlong_put: Failed\n");
                fail_count++;
            }
            if (success6 == 0)
                printf ("Test shmem_double_put: Passed\n");
            else {
                printf ("Test shmem_double_put: Failed\n");
                fail_count++;
            }
            if (success7 == 0)
                printf ("Test shmem_float_put: Passed\n");
            else {
                printf ("Test shmem_float_put: Failed\n");
                fail_count++;
            }
            if (success8 == 0)
                printf ("Test shmem_putmem: Passed\n");
            else {
                printf ("Test shmem_putmem: Failed\n");
                fail_count++;
            }
        }
        shmem_barrier_all ();

        /* Testing shmem_put32, shmem_put64, shmem_put128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest2, src2, N, nextpe);
            shmem_put64 (dest3, src3, N, nextpe);
            shmem_put128 (dest4, src4, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else {
                    printf ("Test shmem_put32: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else {
                    printf ("Test shmem_put64: Failed\n");
                    fail_count++;
                }

                if (success4 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else {
                    printf ("Test shmem_put128: Failed\n");
                    fail_count++;
                }
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest1, src1, N, nextpe);
            shmem_put64 (dest2, src2, N, nextpe);
            shmem_put128 (dest3, src3, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else {
                    printf ("Test shmem_put32: Failed\n");
                    fail_count++;
                }

                if (success2 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else {
                    printf ("Test shmem_put64: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else {
                    printf ("Test shmem_put128: Failed\n");
                    fail_count++;
                }
            }
        }
        /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */
        shmem_barrier_all ();
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest2, src2, 1, 2, N / 2, nextpe);
            shmem_iput64 (dest3, src3, 1, 2, N / 2, nextpe);
            shmem_iput128 (dest4, src4, 1, 2, N / 2, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else {
                    printf ("Test shmem_iput32: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else {
                    printf ("Test shmem_iput64: Failed\n");
                    fail_count++;
                }

                if (success4 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else {
                    printf ("Test shmem_iput128: Failed\n");
                    fail_count++;
                }
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest1, src1, 1, 2, N / 2, nextpe);
            shmem_iput64 (dest2, src2, 1, 2, N / 2, nextpe);
            shmem_iput128 (dest3, src3, 1, 2, N / 2, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else {
                    printf ("Test shmem_iput32: Failed\n");
                    fail_count++;
                }

                if (success2 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else {
                    printf ("Test shmem_iput64: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else {
                    printf ("Test shmem_iput128: Failed\n");
                    fail_count++;
                }
            }
        }

        /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput,
           shmem_double_iput, shmem_float_iput */
        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9;
        }
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success6 = 0;
        success7 = 0;

        shmem_barrier_all ();

        shmem_short_iput (dest1, src1, 1, 2, N / 2, nextpe);
        shmem_int_iput (dest2, src2, 1, 2, N / 2, nextpe);
        shmem_long_iput (dest3, src3, 1, 2, N / 2, nextpe);
        shmem_double_iput (dest6, src6, 1, 2, N / 2, nextpe);
        shmem_float_iput (dest7, src7, 1, 2, N / 2, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N / 2; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_iput: Passed\n");
            else {
                printf ("Test shmem_short_iput: Failed\n");
                fail_count++;
            }
            if (success2 == 0)
                printf ("Test shmem_int_iput: Passed\n");
            else {
                printf ("Test shmem_int_iput: Failed\n");
                fail_count++;
            }
            if (success3 == 0)
                printf ("Test shmem_long_iput: Passed\n");
            else {
                printf ("Test shmem_long_iput: Failed\n");
                fail_count++;
            }
            if (success6 == 0)
                printf ("Test shmem_double_iput: Passed\n");
            else {
                printf ("Test shmem_double_iput: Failed\n");
                fail_count++;
            }
            if (success7 == 0)
                printf ("Test shmem_float_iput: Passed\n");
            else {
                printf ("Test shmem_float_iput: Failed\n");
                fail_count++;
            }
        }

        /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p,
           shmem_short_p */
        shmem_barrier_all ();

        shmem_short_p (&dest9, src9, nextpe);
        shmem_int_p (&dest10, src10, nextpe);
        shmem_long_p (&dest11, src11, nextpe);
        shmem_double_p (&dest12, src12, nextpe);
        shmem_float_p (&dest13, src13, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            if (dest9 == (npes - 1))
                printf ("Test shmem_short_p: Passed\n");
            else {
                printf ("Test shmem_short_p: Failed\n");
                fail_count++;
            }
            if (dest10 == (npes - 1))
                printf ("Test shmem_int_p: Passed\n");
            else {
                printf ("Test shmem_int_p: Failed\n");
                fail_count++;
            }
            if (dest11 == (npes - 1))
                printf ("Test shmem_long_p: Passed\n");
            else {
                printf ("Test shmem_long_p: Failed\n");
                fail_count++;
            }
            if (dest12 == (npes - 1))
                printf ("Test shmem_double_p: Passed\n");
            else {
                printf ("Test shmem_double_p: Failed\n");
                fail_count++;
            }
            if (dest13 == (npes - 1))
                printf ("Test shmem_float_p: Passed\n");
            else {
                printf ("Test shmem_float_p: Failed\n");
                fail_count++;
            }
        }

        shmem_barrier_all ();

        if (me == 0) {
            if (fail_count == 0)
                printf("All Tests Passed\n");
            else
                printf("%d Tests Failed\n", fail_count);
        }
    }
    else {
        printf ("Number of PEs must be > 1 to test shmem put, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
예제 #4
0
int
main (int argc, char *argv[])
{
  double t, tv[2];

  int reps = 10000;
  int doprint = 0;
  char *progName;
  int minWords = 1;
  int maxWords = 1;
  int incWords;
  int nwords;
  int nproc;
  int proc;
  int peer;
  int c;
  int r;
  int i;
  long *rbuf;
  long *tbuf;

  start_pes (0);

  proc = _my_pe ();
  nproc = _num_pes ();

  for (progName = argv[0] + strlen (argv[0]);
       progName > argv[0] && *(progName - 1) != '/'; progName--)
    ;

  while ((c = getopt (argc, argv, "n:eh")) != -1)
    switch (c)
      {
      case 'n':
	if ((reps = getSize (optarg)) <= 0)
	  usage (progName);
	break;

      case 'e':
	doprint++;
	break;

      case 'h':
	help (progName);

      default:
	usage (progName);
      }

  if (optind == argc)
    minWords = 1;
  else if ((minWords = getSize (argv[optind++])) <= 0)
    usage (progName);

  if (optind == argc)
    maxWords = minWords;
  else if ((maxWords = getSize (argv[optind++])) < minWords)
    usage (progName);

  if (optind == argc)
    incWords = 0;
  else if ((incWords = getSize (argv[optind++])) < 0)
    usage (progName);

  if (!(rbuf = (long *) shmalloc (maxWords * sizeof (long))))
    {
      perror ("Failed memory allocation");
      exit (1);
    }
  memset (rbuf, 0, maxWords * sizeof (long));
  shmem_barrier_all ();

  if (!(tbuf = (long *) malloc (maxWords * sizeof (long))))
    {
      perror ("Failed memory allocation");
      exit (1);
    }

  if (nproc == 1)
    return 0;

  for (i = 0; i < maxWords; i++)
    tbuf[i] = 1000 + (i & 255);

  if (doprint)
    printf
      ("%d(%d): Shmem PING reps %d minWords %d maxWords %d incWords %d\n",
       proc, nproc, reps, minWords, maxWords, incWords);

  shmem_barrier_all ();

  peer = proc ^ 1;
  if (peer >= nproc)
    doprint = 0;

  for (nwords = minWords;
       nwords <= maxWords;
       nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1)
    {
      r = reps;

      shmem_barrier_all ();

      tv[0] = gettime ();

      if (peer < nproc)
	{
	  if (proc & 1)
	    {
	      r--;
	      shmem_wait (&rbuf[nwords - 1], 0);
	      rbuf[nwords - 1] = 0;
	    }

	  while (r-- > 0)
	    {
	      shmem_long_put (rbuf, tbuf, nwords, peer);
	      shmem_wait (&rbuf[nwords - 1], 0);
	      rbuf[nwords - 1] = 0;
	    }

	  if (proc & 1)
	    shmem_long_put (rbuf, tbuf, nwords, peer);
	}

      tv[1] = gettime ();


      t = dt (&tv[1], &tv[0]) / (2 * reps);

      shmem_barrier_all ();

      printStats (proc, peer, doprint, nwords, t);
    }

  shmem_barrier_all ();

  free (tbuf);
  shfree (rbuf);

  return 0;
}
예제 #5
0
파일: shmem_ShMem.c 프로젝트: ORNL/ompi
JNIEXPORT void JNICALL Java_shmem_ShMem_barrierAll(JNIEnv *env, jclass clazz)
{
    shmem_barrier_all();
}
예제 #6
0
static int test_item7(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    target_addr = (TYPE_VALUE*)shmalloc(sizeof(*target_addr) * __max_buffer_size);
    source_addr = (TYPE_VALUE*)shmalloc(sizeof(*source_addr) * __max_buffer_size);
    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
            if (pWrk)
            {
                /* Set initial target value */
                value = DEFAULT_VALUE;
                fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value));

                /* Give some time to all PE for setting their values */
                shmem_barrier_all();

                /* Set my value */
                source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);
                fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value));

                /* Define expected value */
                expect_value = ( my_proc % 2 ? DEFAULT_VALUE : BASE_VALUE );

                /* This guarantees that PE set initial value before peer change one */
                for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
                {
                    pSync[j] = _SHMEM_SYNC_VALUE;
                }
                shmem_barrier_all();

                /* Put value to peer */
                FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrk, pSync);

                /* Get value put by peer:
                 * These routines start the remote transfer and may return before the data
                 * is delivered to the remote PE
                 */
                shmem_barrier_all();
                {
                    int wait = WAIT_COUNT;

                    while (wait--)
                    {
                        value = *target_addr;
                        if (expect_value == value) break;
                        sleep(1);
                    }
                }

                rc = (!compare_buffer_with_const(target_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                   my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                if (rc)
                {
                    TYPE_VALUE* check_addr = target_addr;
                    int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                    int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                    int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                    log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                    log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                    show_buffer(check_addr + show_index, show_size);
                }

                shfree(pWrk);
            } else {
                rc = TC_SETUP_FAIL;
            }
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (target_addr)
    {
        shfree(target_addr);
    }

    return rc;
}
예제 #7
0
static int test_item8(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE target_addr[MAX_BUFFER_SIZE * 2];
    static TYPE_VALUE source_addr[MAX_BUFFER_SIZE * 2];
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    long* pSyncMult = NULL;
    TYPE_VALUE* pWrkMult = NULL;
    int pSyncNum = 2;
    int pWrkNum = 2;

    num_proc = _num_pes();
    my_proc = _my_pe();

    pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_REDUCE_SYNC_SIZE);
    if (pSyncMult)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for ( j = 0; j < pSyncNum * _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSyncMult[j] = _SHMEM_SYNC_VALUE;
        }

        /* Give some time to all PE for setting their values */
        shmem_barrier_all();

        pWrkMult = shmalloc(sizeof(*pWrkMult) * pWrkNum * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
        if (pWrkMult)
        {
            value = DEFAULT_VALUE;
            source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);
            fill_buffer((void *)source_addr, MAX_BUFFER_SIZE * 2, (void *)&source_value, sizeof(source_value));
            fill_buffer((void *)target_addr, MAX_BUFFER_SIZE * 2, (void *)&value, sizeof(value));
            shmem_barrier_all();
            for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
            {
                cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
                /* Set initial target value */
                value = DEFAULT_VALUE;

                /* Set my value */
                source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);

                /* Define expected value */
                expect_value = ( my_proc % 2 ? DEFAULT_VALUE : BASE_VALUE );

                /* Put value to peer */
                FUNC_VALUE(target_addr + (i % 2) * MAX_BUFFER_SIZE, source_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrkMult + (i % pWrkNum) * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE),  pSyncMult + (i % pSyncNum) * _SHMEM_REDUCE_SYNC_SIZE);
                rc = (!compare_buffer_with_const(target_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                   my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                if (rc)
                {
                    TYPE_VALUE* check_addr = target_addr + (i % 2) * MAX_BUFFER_SIZE;
                    int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                    int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                    int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                    log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                    log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                    show_buffer(check_addr + show_index, show_size);
                }
                fill_buffer((void *)(source_addr + (i % 2) * MAX_BUFFER_SIZE), cur_buf_size, (void *)&source_value, sizeof(source_value));
                fill_buffer((void *)(target_addr + (i % 2) * MAX_BUFFER_SIZE ), cur_buf_size, (void *)&value, sizeof(value));
            }
            shfree(pWrkMult);
        } else {
            rc = TC_SETUP_FAIL;
        }
        shfree(pSyncMult);
    } else {
        rc = TC_SETUP_FAIL;
    }

    return rc;
}
예제 #8
0
파일: shmem_2dheat.c 프로젝트: coti/oshmpi
void
sor (float **current_ptr, float **next_ptr)
{
  int i, j, my_start, my_end, my_num_rows;
  float *U_Curr_Above = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H)));	/* 1d array holding values from bottom row of PE above */
  float *U_Curr_Below = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H)));	/* 1d array holding values from top row of PE below */
  float *U_Send_Buffer = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H)));	/* 1d array holding values that are currently being sent */
  //float U_Curr_Above[(int)floor(WIDTH/H)];  /* 1d array holding values from bottom row of PE above */
  //float U_Curr_Below[(int)floor(WIDTH/H)];  /* 1d array holding values from top row of PE below */
  //float U_Send_Buffer[(int)floor(WIDTH/H)]; /* 1d array holding values that are currently being sent */
  float W = 1.5;

  //MPI_Request request;
  //MPI_Status status;

  //MPI_Comm_size(MPI_COMM_WORLD,&p);
  //MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);

  my_start = get_start (my_rank);
  my_end = get_end (my_rank);
  my_num_rows = get_num_rows (my_rank);

  /*
   * Communicating ghost rows - only bother if p > 1
   */

  if (p > 1)
    {
      /* send/receive bottom rows */
      if (my_rank < (p - 1))
	{
	  /* populate send buffer with bottow row */
	  for (i = 0; i < (int) floor (WIDTH / H); i++)
	    {
	      U_Send_Buffer[i] = current_ptr[my_num_rows - 1][i];
	    }
	  /* non blocking send */
	  //MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&request);
	  shmem_float_put (U_Curr_Above, U_Send_Buffer,
			   (int) floor (WIDTH / H), my_rank + 1);
	}
      //if (my_rank > ROOT) {
      /* blocking receive */
      //MPI_Recv(U_Curr_Above,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&status);
      //}
      //MPI_Barrier(MPI_COMM_WORLD);
      shmem_barrier_all ();

      /* send/receive top rows */
      if (my_rank > ROOT)
	{
	  /* populate send buffer with top row */
	  for (i = 0; i < (int) floor (WIDTH / H); i++)
	    {
	      U_Send_Buffer[i] = current_ptr[0][i];
	    }
	  /* non blocking send */
	  //MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&request);
	  shmem_float_put (U_Curr_Below, U_Send_Buffer,
			   (int) floor (WIDTH / H), my_rank - 1);
	}
      //if (my_rank < (p-1)) {
      /* blocking receive */
      //MPI_Recv(U_Curr_Below,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&status);
      //}
      //MPI_Barrier(MPI_COMM_WORLD);
      shmem_barrier_all ();
    }

  /* solve next reds (i+j odd) */
  for (j = my_start; j <= my_end; j++)
    {
      for (i = 0; i < (int) floor (WIDTH / H); i++)
	{
	  if ((i + j) % 2 != 0)
	    {
	      next_ptr[j - my_start][i] =
		get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank,
			     i, j) + (W / 4) * (get_val_par (U_Curr_Above,
							     current_ptr,
							     U_Curr_Below,
							     my_rank, i - 1,
							     j) +
						get_val_par (U_Curr_Above,
							     current_ptr,
							     U_Curr_Below,
							     my_rank, i + 1,
							     j) +
						get_val_par (U_Curr_Above,
							     current_ptr,
							     U_Curr_Below,
							     my_rank, i,
							     j - 1) +
						get_val_par (U_Curr_Above,
							     current_ptr,
							     U_Curr_Below,
							     my_rank, i,
							     j + 1) -
						4 *
						(get_val_par
						 (U_Curr_Above, current_ptr,
						  U_Curr_Below, my_rank, i,
						  j)) - (pow (H, 2) * f (i,
									 j)));
	      enforce_bc_par (next_ptr, my_rank, i, j);
	    }
	}
    }
  /* solve next blacks (i+j) even .... using next reds */
  for (j = my_start; j <= my_end; j++)
    {
      for (i = 0; i < (int) floor (WIDTH / H); i++)
	{
	  if ((i + j) % 2 == 0)
	    {
	      next_ptr[j - my_start][i] =
		get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank,
			     i, j) + (W / 4) * (get_val_par (U_Curr_Above,
							     next_ptr,
							     U_Curr_Below,
							     my_rank, i - 1,
							     j) +
						get_val_par (U_Curr_Above,
							     next_ptr,
							     U_Curr_Below,
							     my_rank, i + 1,
							     j) +
						get_val_par (U_Curr_Above,
							     next_ptr,
							     U_Curr_Below,
							     my_rank, i,
							     j - 1) +
						get_val_par (U_Curr_Above,
							     next_ptr,
							     U_Curr_Below,
							     my_rank, i,
							     j + 1) -
						4 *
						(get_val_par
						 (U_Curr_Above, next_ptr,
						  U_Curr_Below, my_rank, i,
						  j)) - (pow (H, 2) * f (i,
									 j)));
	      enforce_bc_par (next_ptr, my_rank, i, j);
	    }
	}
    }
    shfree(U_Send_Buffer);
    shfree(U_Curr_Below);
    shfree(U_Curr_Above);
}
예제 #9
0
int main()
{
   int start,stride,rmlast,rstride,np_aset,inset,lpe;
   int my_pe,n_pes;
   int i,fail,n_err,asfail,nasfail;
   char Case[40];
   
   static int sSource_int[NREDUCE];
   static int sTarget_int[NREDUCE];
   static int spWrk_int[PWRKELEM];
   
   static long spSync[_SHMEM_REDUCE_SYNC_SIZE];


   shmem_init();
   my_pe = shmem_my_pe();
   n_pes = shmem_n_pes();
   lpe=my_pe;

   dpSync=shmem_malloc(_SHMEM_REDUCE_SYNC_SIZE*sizeof(long));
   for(i=0;i<_SHMEM_REDUCE_SYNC_SIZE;i++) {
      gpSync[i]=_SHMEM_SYNC_VALUE;
      dpSync[i]=_SHMEM_SYNC_VALUE;
      spSync[i]=_SHMEM_SYNC_VALUE;
   }
      
   dSource_int=shmem_malloc(NREDUCE*sizeof(int));
   dTarget_int=shmem_malloc(NREDUCE*sizeof(int));
   dpWrk_int=shmem_malloc((NREDUCE/2+1 > _SHMEM_REDUCE_MIN_WRKDATA_SIZE ? NREDUCE/2+1 : _SHMEM_REDUCE_MIN_WRKDATA_SIZE)*sizeof(int));

   for(start=0;start<=MAXSTART;start++) {
      rstride=1; 
      for(stride=0;stride<=MAXSTRIDE;stride++) {
         for(rmlast=0;rmlast<=MAXRMLAST;rmlast++)
	 {
	    np_aset=(n_pes+rstride-1-start)/rstride-rmlast; /* number of processes in the active set */
	    if(np_aset > 0) /* if active set is not empty */
	    {
	       if(my_pe==0) printf("\nActive set triplet: PE_start=%d,logPE_stride=%d,PE_size=%d \n",start,stride,np_aset);
	       if((my_pe>=start) && ((my_pe-start)%rstride==0) && ((my_pe-start)/rstride<np_aset)) inset=1;
	       else inset=0;

/* Initialize Source and Target arrays */
	       for(i=0;i<NREDUCE;i++) {
                  sSource_int[i]=SINIT;
                  sTarget_int[i]=TINIT;
                  gSource_int[i]=SINIT;
                  gTarget_int[i]=TINIT;
                  dSource_int[i]=SINIT;
                  dTarget_int[i]=TINIT;
	       }
               shmem_barrier_all();

/* CASE: static arrays, source is different from target */
               sprintf(Case,"static, source!=target");
	       if(inset) 
	          asfail=or_int(sSource_int,sTarget_int,start,stride,np_aset,rstride,0,dpWrk_int,gpSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(sSource_int,Case);
		  nasfail+=check_tval_notchanged(sTarget_int,Case);
	       }
		  
	       
/* CASE: global arrays, source is different from target */
               sprintf(Case,"global, source!=target");
	       if(inset)
                  asfail=or_int(gSource_int,gTarget_int,start,stride,np_aset,rstride,0,spWrk_int,dpSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(gSource_int,Case);
		  nasfail+=check_tval_notchanged(gTarget_int,Case);
	       }
	       
/* CASE: symmetric heap arrays, source is different from target */
               sprintf(Case,"sym heap, source!=target");
	       if(inset)
                  asfail=or_int(dSource_int,dTarget_int,start,stride,np_aset,rstride,0,gpWrk_int,spSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(dSource_int,Case);
		  nasfail+=check_tval_notchanged(dTarget_int,Case);
	       }
	       

/* Reinitialize Source arrays for new tests */
	       for(i=0;i<NREDUCE;i++) {
                  sSource_int[i]=SINIT;
                  gSource_int[i]=SINIT;
                  dSource_int[i]=SINIT;
	       }
               shmem_barrier_all();

/* CASE: static arrays, source and target are the same array */
               sprintf(Case,"static, source==target");
	       if(inset)
                  asfail=or_int(sSource_int,sSource_int,start,stride,np_aset,rstride,1,gpWrk_int,dpSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(sSource_int,Case);

/* CASE: global arrays, source and target are the same array */
               sprintf(Case,"global, source==target");
	       if(inset)
                  asfail=or_int(gSource_int,gSource_int,start,stride,np_aset,rstride,1,dpWrk_int,spSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(gSource_int,Case);

/* CASE: symmetric heap arrays, source and target are the same array */
               sprintf(Case,"sym heap, source==target");
	       if(inset)
                  asfail=or_int(dSource_int,dSource_int,start,stride,np_aset,rstride,1,spWrk_int,gpSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(dSource_int,Case);

	       
	    }	/* end of if active set is not empty */
	 } 	/* end of for loop on rmlast */
	 rstride*=2;
      } 	/* end of for loop on stride */
   } 		/* end of for loop on start */

   shmem_barrier_all();  
#ifdef NEEDS_FINALIZE
   shmem_finalize();
#endif
   return(0);
}
예제 #10
0
int
HPCC_SHMEMRandomAccess(HPCC_Params *params) {
  s64Int i;
  static s64Int NumErrors, GlbNumErrors;

  int NumProcs, logNumProcs, MyProc;
  u64Int GlobalStartMyProc;
  int Remainder;            /* Number of processors with (LocalTableSize + 1) entries */
  u64Int Top;               /* Number of table entries in top of Table */
  s64Int LocalTableSize;    /* Local table width */
  u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */
  u64Int logTableSize, TableSize;

  double CPUTime;               /* CPU  time to update table */
  double RealTime;              /* Real time to update table */

  double TotalMem;
  static int sAbort, rAbort;
  int PowerofTwo;

  double timeBound = -1;  /* OPTIONAL time bound for execution time */
  u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */
  u64Int NumUpdates;  /* actual number of updates to table - may be smaller than
                       * NumUpdates_Default due to execution time bounds */
  s64Int ProcNumUpdates; /* number of updates per processor */

#ifdef RA_TIME_BOUND
  s64Int GlbNumUpdates;  /* for reduction */
#endif

  static long llpSync[_SHMEM_BCAST_SYNC_SIZE];
  static long long int llpWrk[_SHMEM_REDUCE_SYNC_SIZE];

  static long ipSync[_SHMEM_BCAST_SYNC_SIZE];
  static int ipWrk[_SHMEM_REDUCE_SYNC_SIZE];

  FILE *outFile = NULL;
  double *GUPs;
  double *temp_GUPs;


  int numthreads;


  for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1){
        ipSync[i] = _SHMEM_SYNC_VALUE;
        llpSync[i] = _SHMEM_SYNC_VALUE;
  }


  params->SHMEMGUPs = -1;
  GUPs = &params->SHMEMGUPs;

  NumProcs = shmem_n_pes();
  MyProc = shmem_my_pe();

  if (0 == MyProc) {
    outFile = stdout;
    setbuf(outFile, NULL);
  }

  params->HPLMaxProcMem = 200000;

  TotalMem = params->HPLMaxProcMem; /* max single node memory */
  TotalMem *= NumProcs;             /* max memory in NumProcs nodes */

  TotalMem /= sizeof(u64Int);

  /* calculate TableSize --- the size of update array (must be a power of 2) */
  for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1;
       TotalMem >= 1.0;
       TotalMem *= 0.5, logTableSize++, TableSize <<= 1)
    ; /* EMPTY */


  /* determine whether the number of processors is a power of 2 */
  if ( (NumProcs & (NumProcs -1)) == 0) {
    PowerofTwo = HPCC_TRUE;
    Remainder = 0;
    Top = 0;
    MinLocalTableSize = (TableSize / NumProcs);
    LocalTableSize = MinLocalTableSize;
    GlobalStartMyProc = (MinLocalTableSize * MyProc);
  }
  else {
    if(MyProc == 0) {
        printf("Number of processes must be power of 2\n");

    }
    return 0;
  }
  sAbort = 0;
  HPCC_Table = HPCC_XMALLOC( s64Int, LocalTableSize );

  if (! HPCC_Table) sAbort = 1;



  shmem_barrier_all();
  shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, ipSync);
  shmem_barrier_all();

  if (rAbort > 0) {
    if (MyProc == 0) fprintf(outFile, "Failed to allocate memory for the main table.\n");
    /* check all allocations in case there are new added and their order changes */
    if (HPCC_Table) HPCC_free( HPCC_Table );
    goto failed_table;
  }

  params->SHMEMRandomAccess_N = (s64Int)TableSize;

  /* Default number of global updates to table: 4x number of table entries */
  NumUpdates_Default = 4 * TableSize;
  ProcNumUpdates = 4*LocalTableSize;
  NumUpdates = NumUpdates_Default;

  if (MyProc == 0) {
    fprintf( outFile, "Running on %d processors%s\n", NumProcs, PowerofTwo ? " (PowerofTwo)" : "");
    fprintf( outFile, "Total Main table size = 2^" FSTR64 " = " FSTR64 " words\n",
             logTableSize, TableSize );
    if (PowerofTwo)
        fprintf( outFile, "PE Main table size = 2^" FSTR64 " = " FSTR64 " words/PE\n",
                 (logTableSize - logNumProcs), TableSize/NumProcs );
      else
        fprintf( outFile, "PE Main table size = (2^" FSTR64 ")/%d  = " FSTR64 " words/PE MAX\n",
                 logTableSize, NumProcs, LocalTableSize);

    fprintf( outFile, "Default number of updates (RECOMMENDED) = " FSTR64 "\n", NumUpdates_Default);
    params->SHMEMRandomAccess_ExeUpdates = NumUpdates;
  }

  /* Initialize main table */
  for (i=0; i<LocalTableSize; i++)
    HPCC_Table[i] = i + GlobalStartMyProc;

  shmem_barrier_all();

  RealTime = -RTSEC();

  Power2NodesRandomAccessUpdate(logTableSize, TableSize, LocalTableSize,
                                     MinLocalTableSize, GlobalStartMyProc, Top,
                                     logNumProcs, NumProcs, Remainder,
                                     MyProc, ProcNumUpdates);

  shmem_barrier_all();

  /* End timed section */

  RealTime += RTSEC();

  /* Print timing results */
  if (MyProc == 0){
    params->SHMEMRandomAccess_time = RealTime;
    *GUPs = 1e-9*NumUpdates / RealTime;
    fprintf( outFile, "Real time used = %.6f seconds\n", RealTime );
    fprintf( outFile, "%.9f Billion(10^9) Updates    per second [GUP/s]\n",
             *GUPs );
    fprintf( outFile, "%.9f Billion(10^9) Updates/PE per second [GUP/s]\n",
             *GUPs / NumProcs );
    /* No longer reporting per CPU number */
    /* *GUPs /= NumProcs; */
  }
  /* distribute result to all nodes */
  temp_GUPs = GUPs;
  shmem_barrier_all();
  shmem_broadcast64(GUPs,temp_GUPs,1,0,0,0,NumProcs,llpSync);
  shmem_barrier_all();

  /* Verification phase */

  /* Begin timing here */

  RealTime = -RTSEC();


  HPCC_Power2NodesSHMEMRandomAccessCheck(logTableSize, TableSize, LocalTableSize,
                                    GlobalStartMyProc,
                                    logNumProcs, NumProcs,
                                    MyProc, ProcNumUpdates,
                                    &NumErrors);

  shmem_barrier_all(); 
  shmem_longlong_sum_to_all( &GlbNumErrors,  &NumErrors, 1, 0,0, NumProcs,llpWrk, llpSync);
  shmem_barrier_all(); 

  /* End timed section */

  RealTime += RTSEC();

  if(MyProc == 0){
    params->SHMEMRandomAccess_CheckTime = RealTime;

    fprintf( outFile, "Verification:  Real time used = %.6f seconds\n", RealTime);
    fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n",
             GlbNumErrors, TableSize, (GlbNumErrors <= 0.01*TableSize) ?
             "passed" : "failed");
    if (GlbNumErrors > 0.01*TableSize) params->Failure = 1;
    params->SHMEMRandomAccess_Errors = (s64Int)GlbNumErrors;
    params->SHMEMRandomAccess_ErrorsFraction = (double)GlbNumErrors / (double)TableSize;
    params->SHMEMRandomAccess_Algorithm = 1;
  }
  /* End verification phase */


  /* Deallocate memory (in reverse order of allocation which should
     help fragmentation) */

  HPCC_free( HPCC_Table );
  failed_table:

  if (0 == MyProc) if (outFile != stderr) fclose( outFile );

  shmem_barrier_all();

  return 0;
}
예제 #11
0
파일: shmem_2dheat.c 프로젝트: coti/oshmpi
int
main (int argc, char **argv)
{
  /* arrays used to contain each PE's rows - specify cols, no need to spec rows */
  float **U_Curr;
  float **U_Next;
  /* helper variables */
  /* available iterator  */
  int i, j, k, m, n;
  int per_proc, remainder, my_start_row, my_end_row, my_num_rows;
  int verbose = 0;
  int show_time = 0;
  double time;
  double t, tv[2];

  /*OpenSHMEM initilization*/
  start_pes (0);
  p = _num_pes ();
  my_rank = _my_pe ();

  if (p > 8) {
      fprintf(stderr, "Ignoring test when run with more than 8 pes\n");
      return 77;
  }

  /* argument processing done by everyone */
  int c, errflg;
  extern char *optarg;
  extern int optind, optopt;

  while ((c = getopt (argc, argv, "e:h:m:tw:v")) != -1)
    {
      switch (c)
	{
	case 'e':
	  EPSILON = atof (optarg);
	  break;
	case 'h':
	  HEIGHT = atoi (optarg);
	  break;
	case 'm':
	  /* selects the numerical methods */
	  switch (atoi (optarg))
	    {
	    case 1:		/* jacobi */
	      meth = 1;
	      break;
	    case 2:		/* gauss-seidel */
	      meth = 2;
	      break;
	    case 3:		/* sor */
	      meth = 3;
	      break;
	    }
	  break;
	case 't':
	  show_time++;		/* overridden by -v (verbose) */
	  break;
	case 'w':
	  WIDTH = atoi (optarg);
	  break;
	case 'v':
	  verbose++;
	  break;
	  /* handle bad arguments */
	case ':':		/* -h or -w without operand */
	  if (ROOT == my_rank)
	    fprintf (stderr, "Option -%c requires an operand\n", optopt);
	  errflg++;
	  break;
	case '?':
	  if (ROOT == my_rank)
	    fprintf (stderr, "Unrecognized option: -%c\n", optopt);
	  errflg++;
	  break;
	}
    }

  if (ROOT == my_rank && argc < 2)
    {
      printf ("Using defaults: -h 20 -w 20 -m 2\n");
    }

//  if (0 < errflg) 
//      exit(EXIT_FAILURE);


  /* wait for user to input runtime params */
 
  for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1)
    pSync[i] = _SHMEM_SYNC_VALUE;

  shmem_barrier_all ();


  /* broadcast method to use  */
  
  shmem_broadcast32 (&meth, &meth, 1, 0, 0, 0, p, pSync);
  switch (meth)
    {
    case 1:
      method = &jacobi;
      break;
    case 2:
      method = &gauss_seidel;
      break;
    case 3:
      method = &sor;
      break;
    }

  /* let each processor decide what rows(s) it owns */
  my_start_row = get_start (my_rank);
  my_end_row = get_end (my_rank);
  my_num_rows = get_num_rows (my_rank);

  if (0 < verbose)
    printf ("proc %d contains (%d) rows %d to %d\n", my_rank, my_num_rows,
	    my_start_row, my_end_row);
  fflush (stdout);

  /* allocate 2d array */
  U_Curr = (float **) malloc (sizeof (float *) * my_num_rows);
  U_Curr[0] =
    (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H));
  for (i = 1; i < my_num_rows; i++)
    {
      U_Curr[i] = U_Curr[i - 1] + (int) floor (WIDTH / H);
    }

  /* allocate 2d array */
  U_Next = (float **) malloc (sizeof (float *) * my_num_rows);
  U_Next[0] =
    (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H));
  for (i = 1; i < my_num_rows; i++)
    {
      U_Next[i] = U_Next[i - 1] + (int) floor (WIDTH / H);
    }

  /* initialize global grid */
  init_domain (U_Curr, my_rank);
  init_domain (U_Next, my_rank);

  /* iterate for solution */
  if (my_rank == ROOT)
    {
     
      tv[0] = gettime ();
    }
  k = 1;
  while (1)
    {
      method (U_Curr, U_Next);

      local_convergence_sqd = get_convergence_sqd (U_Curr, U_Next, my_rank);
     
      shmem_barrier_all ();
      shmem_float_sum_to_all (&convergence_sqd, &local_convergence_sqd, 1, 0,
			      0, p, pWrk, pSync);
      if (my_rank == ROOT)
	{
	  convergence = sqrt (convergence_sqd);
	  if (verbose == 1)
	    {
	      printf ("L2 = %f\n", convergence);
	    }
	}

      /* broadcast method to use */
     
      shmem_barrier_all ();
      shmem_broadcast32 (&convergence, &convergence, 1, 0, 0, 0, p, pSync);
      if (convergence <= EPSILON)
	{
	  break;
	}

      /* copy U_Next to U_Curr */
      for (j = my_start_row; j <= my_end_row; j++)
	{
	  for (i = 0; i < (int) floor (WIDTH / H); i++)
	    {
	      U_Curr[j - my_start_row][i] = U_Next[j - my_start_row][i];
	    }
	}
      k++;
      //MPI_Barrier(MPI_COMM_WORLD);    
      shmem_barrier_all ();
    }


  /* say something at the end */
  if (my_rank == ROOT)
    {
      //time = MPI_Wtime() - time;
      tv[1] = gettime ();
      t = dt (&tv[1], &tv[0]);
      printf
	("Estimated time to convergence in %d iterations using %d processors on a %dx%d grid is %f seconds\n",
	 k, p, (int) floor (WIDTH / H), (int) floor (HEIGHT / H),
	 t / 1000000.0);
    }

  //MPI_Finalize();
  exit (EXIT_SUCCESS);
  return 0;
}
int main(int argc, char **argv)
{
  int i,j;
  long modj,oldj,oldxmodj,newcount;
  int my_pe,n_pes;
  size_t max_elements_bytes;
  static long *x;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_lock_set_clear(%s) n_pes=%d\n", argv[0],n_pes);

/*  shmalloc x on all pes (only use the one on PE 0)  */

  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  x = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    x[i] = 0;
  count = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      /* emulate  oldj = shmem_long_finc(&count, 0); */
      shmem_set_lock(&lock);
      shmem_long_get(&oldj,&count,1,0);   /* get oldj from PE 0's count */
      newcount = oldj+1;
      shmem_long_put(&count,&newcount,1,0);  /* update count on PE 0 */
      shmem_quiet;                        /* insure that write completes */
      shmem_clear_lock(&lock);
      /* end of emulation */
      modj = (oldj % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* increment value in x[modj] */
      oldxmodj = shmem_long_finc(&x[modj], 0); 
      /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check x[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (x[j-1] != (long) ITER)
        fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", 
                         my_pe, n_pes, j-1, x[j-1], (long) ITER);
    }
  }

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
예제 #13
0
int
main(int argc, char **argv)
{
	int i,ps,ps_cnt=2;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;
	double start_time, time_taken;

	shmem_init();
	me = shmem_my_pe();
	npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) {
        switch (i)
        {
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'p':
              if ((ps_cnt = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 's':
              Serialize++;
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              shmem_finalize();
              return 1;
        }
    }

	ps_cnt *= _SHMEM_BCAST_SYNC_SIZE;
	pSync = shmem_malloc( ps_cnt * sizeof(long) );

	for (i = 0; i < ps_cnt; i++)
	  pSync[i] = _SHMEM_SYNC_VALUE;

	source = (int *) shmem_malloc( elements * sizeof(*source) );

	target = (int *) shmem_malloc( elements * sizeof(*target) );
	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

    if (me==0 && Verbose)
        fprintf(stderr,"ps_cnt %d loops %d nElems %d\n",
                        ps_cnt,loops,elements);

	shmem_barrier_all();

	for(time_taken = 0.0, ps = i = 0; i < loops; i++) {

	    start_time = shmemx_wtime();

	    shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]);

        if (Serialize) shmem_barrier_all();

	    time_taken += (shmemx_wtime() - start_time);

        if (ps_cnt > 1 ) {
	        ps += _SHMEM_BCAST_SYNC_SIZE;
	        if ( ps >= ps_cnt ) ps = 0;
        }
	}

	if(me == 0 && Verbose) {
        printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n",
            loops, (elements*sizeof(*source)), npes, time_taken);
        elements = (elements * loops * sizeof(*source)) / (1024*1024);
        printf("  %7.5f secs per broadcast() @ %7.4f MB/sec\n",
               (time_taken/(double)loops), ((double)elements / time_taken) );
    }

    if (Verbose > 1)  fprintf(stderr,"[%d] pre B1\n",me);

	shmem_barrier_all();

    if (Verbose > 1)  fprintf(stderr,"[%d] post B1\n",me);

	shmem_free(pSync);
	shmem_free(target);
	shmem_free(source);

	shmem_finalize();

	return 0;
}
예제 #14
0
int
main ()
{
  int quantum = -1, checktick ();
  int BytesPerWord;
  int k;
  ssize_t j, i;
  STREAM_TYPE scalar;


  /* --- SETUP --- determine precision and check timing --- */

  printf (HLINE);
  printf ("STREAM version $Revision: 5.10 $\n");
  printf (HLINE);
  BytesPerWord = sizeof (STREAM_TYPE);
  printf ("This system uses %d bytes per array element.\n", BytesPerWord);
  /* SHMEM initialize */
  start_pes (0);
  _world_size = _num_pes ();
  _world_rank = _my_pe ();

  STREAM_TYPE *a =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));
  STREAM_TYPE *b =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));
  STREAM_TYPE *c =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));

  /* wait for user to input runtime params */
  for (int j = 0; j < _SHMEM_BARRIER_SYNC_SIZE; j++)
    {
      pSync0[j] = pSync1[j] = pSync2[j] = _SHMEM_SYNC_VALUE;
    }

  int size = _world_size;
  if (!(size == 0) && !(size & (size - 1)))
    ;
  else
    {
      printf ("Program only works for a PE size of power-of-2\n");
      exit (-1);
    }

  if (_world_rank == 0)
    {
      printf (HLINE);
#ifdef N
      printf ("*****  WARNING: ******\n");
      printf
	("      It appears that you set the preprocessor variable N when compiling this code.\n");
      printf
	("      This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n");
      printf ("      Reverting to default value of STREAM_ARRAY_SIZE=%llu\n",
	      (unsigned long long) STREAM_ARRAY_SIZE);
      printf ("*****  WARNING: ******\n");
#endif

      printf ("Array size = %llu (elements), Offset = %d (elements)\n",
	      (unsigned long long) STREAM_ARRAY_SIZE, OFFSET);
      printf ("Memory per array = %.1f MiB (= %.1f GiB).\n",
	      BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0),
	      BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0 /
			      1024.0));
      printf ("Total memory required = %.1f MiB (= %.1f GiB).\n",
	      (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 /
				      1024.),
	      (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 /
				      1024. / 1024.));
      printf ("Each kernel will be executed %d times.\n", NTIMES);
      printf
	(" The *best* time for each kernel (excluding the first iteration)\n");
      printf (" will be used to compute the reported bandwidth.\n");
      printf ("Number of SHMEM PEs requested = %i\n", _world_size);
    }

  int blocksize = 10000;
  assert (STREAM_ARRAY_SIZE % blocksize == 0);

  // do something really minor
  /* Get initial value for system clock. */
  for (j = 0; j < STREAM_ARRAY_SIZE; j++)
    {
      a[j] = 1.0;
      b[j] = 2.0;
      c[j] = 0.0;
    }

  printf (HLINE);

  if (_world_rank == 0)
    {
      if ((quantum = checktick ()) >= 1)
	printf ("Your clock granularity/precision appears to be "
		"%d microseconds.\n", quantum);
      else
	{
	  printf ("Your clock granularity appears to be "
		  "less than one microsecond.\n");
	  quantum = 1;
	}
    }

  shmem_barrier_all ();
  // assign fixed iterations per PE

  // since we know default STREAM array size
  // we are hardcoding this, but if the value
  // changes, then this blocking factor must
  // also change
  // basically, each PE works on this block
  // size at a time

  time_start = mysecond ();
  /* Initialize */
  next_p = shmem_int_fadd (&gcounter, 1, ROOT);
  for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
    {
      if (next_p == count_p)
	{
	  for (i = j; i < (j + blocksize); i++)
	    {
	      a[i] = 2.0E0 * a[i];
	    }
	  next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	}
      count_p++;
    }
  time_end = mysecond ();
  clock_time_PE = time_end - time_start;
  shmem_double_sum_to_all (&total_clock_time, &clock_time_PE, 1,
			   0, 0, _world_size, pWrk0, pSync0);

  if (_world_rank == 0)
    {
      printf ("Each test below will take on the order"
	      " of %d microseconds.\n", (int) (total_clock_time * 1.0E6));
      printf ("   (= %d clock ticks)\n",
	      (int) ((1.0E6 * total_clock_time) / quantum));
      printf ("Increase the size of the arrays if this shows that\n");
      printf ("you are not getting at least 20 clock ticks per test.\n");

      printf (HLINE);

      printf ("WARNING -- The above is only a rough guideline.\n");
      printf ("For best results, please be sure you know the\n");
      printf ("precision of your system timer.\n");
      printf (HLINE);
    }
  /*      --- MAIN LOOP --- repeat test cases NTIMES times --- */

  // reduction required, as each PE only fills a,b,c partially
  scalar = 3.0;

  for (k = 0; k < NTIMES; k++)
    {
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  a[i] = 1.0;
		  b[i] = 2.0;
		  c[i] = 0.0;
		  a[i] = 2.0E0 * a[i];

		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (a + j, a + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (a + j, a + j, blocksize);
	}
      shmem_barrier_all ();

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  c[i] = a[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (c + j, c + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (c + j, c + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_max_to_all (&times[0][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  b[i] = scalar * c[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (b + j, b + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (b + j, b + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[1][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  c[i] = a[i] + b[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (c + j, c + j, blocksize, 0,
	  //                        0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (c + j, c + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[2][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  a[i] = b[i] + scalar * c[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (a + j, a + j, blocksize, 0,
	  //                      0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (a + j, a + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[3][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);
    }

  shmem_barrier_all ();

  /*      --- SUMMARY --- */

  for (k = 1; k < NTIMES; k++)	/* note -- skip first iteration */
    {
      for (j = 0; j < 4; j++)
	{
	  avgtime[j] = avgtime[j] + times[j][k];
	  mintime[j] = MIN (mintime[j], times[j][k]);
	  maxtime[j] = MAX (maxtime[j], times[j][k]);
	}
    }

  if (_world_rank == 0)
    {
      printf
	("Function    Best Rate MB/s  Avg time     Min time     Max time\n");
      for (j = 0; j < 4; j++)
	{
	  avgtime[j] = avgtime[j] / (double) (NTIMES - 1);

	  printf ("%s%12.1f  %11.6f  %11.6f  %11.6f\n", label[j],
		  1.0E-06 * bytes[j] / mintime[j],
		  avgtime[j], mintime[j], maxtime[j]);
	}
      printf (HLINE);
    }
  /* --- Check Results --- */
  if (_world_rank == 0)
    {
      checkSTREAMresults (a, b, c);
      printf (HLINE);
    }

  shfree (a);
  shfree (b);
  shfree (c);

  return 0;
}
예제 #15
0
파일: quick.c 프로젝트: rutayanp/sorting
int main(int argc, char *argv[]){
	
		
	int i,n,next_pivot, pivot;	
	long pSync[_SHMEM_BCAST_SYNC_SIZE];
	
	for (i=0; i < SHMEM_BCAST_SYNC_SIZE; i++) {
 		pSync[i] = _SHMEM_SYNC_VALUE;
		}	
	
	start_pes(0);
	me = shmem_my_pe();
	npes = shmem_n_pes();
	shmem_barrier_all();
	srand (me+time(NULL));

	N = atoi(argv[1]);
	
	//int *nelems = (int*) shmalloc(sizeof(int));

	//int *nelems_import= (int*) shmalloc(sizeof(int));;
	printf("%d: Size = %d with np=%d\n",me,N,npes);
	A = (int *)shmalloc((N/npes)*sizeof(int));
	temp_arr = (int *)shmalloc((N/npes)*sizeof(int));
	if(A==NULL){
		printf("\nOut of memory");
		return 1;
	}
	n= N/npes;
	i=0;
	while(i<N/npes){
		A[i] = rand()%(10000-0);
		i++;
	}
	printf("\nprocess %d elements:",me);
	for(i=0;i<(N/npes);i++){
                printf("%d, ", A[i]);
       		}
	
	next_pivot = A[0];
	
	//the step two of algo.....broadcast the new pivot
	shmem_broadcast32(&next_pivot,A,1,0,0,0,npes,pSync);	
	shmem_barrier_all();	
	pivot = quicksort(A, 0, n-1);
	printf("Process %d the pivot:%d",me, pivot);
	
	shmem_barrier_all(); //just for the sake of clear display...can be removed in the end
	printf("\nThe sorted list is of process %d: ",me);
	for(i=0;i<n;i++){
		printf("%d,  ",A[i]);
		}
	printf("\n");
	printf("the new pivot of process %d: %d\n",me,next_pivot); // to check the broadcast of new pivots
	
	int check,j; //to check the division of the sorted arrays according to the new pivot.
	shmem_barrier_all();
	check = uplowPartition(next_pivot);
	shmem_barrier_all();	
	printf("(%d)",me);	
	for(int j=0;j<N/npes;j++){
		printf("%d, ",A[j]);
		}
	printf("new partition: %d",check);
	shmem_barrier_all();
	if(me < npes/2)
		{
		i=0;
	//	printf("Hello from %d", me);
		printf("\n");
		for(j=check;j<N/npes;j++){
			temp_arr[i] = A[j];
			i++;
			}
		i=0;
		printf("(%d)",me);
		for(j=check;j<N/npes;j++){
                        printf("%d, ",temp_arr[i]) ;
			i++;
                	}
//	printf("\n");
	}
	
	shmem_barrier_all();
	if(me >= npes/2)
		{
		
	//	printf("Hello from %d", me);
		printf("\n");
		for(j=0;j<check;j++){
			temp_arr[j] = A[j];
			}
		
		printf("(%d)",me);
		for(j=0;j<check;j++){
                        printf("%d, ",temp_arr[j]) ;
			
                	}
//	printf("\n");
	}
	shmem_barrier_all();	
	printf("\n");
	
	if(me < npes/2){
		printf("\n");
		pe = me +npes/2;
		nelems[0] = N/npes - check;
		printf (" process %d pe : %d nelems : %d\n",me,pe,nelems[0]);//to test the value

		printf("(%d) addr = %d , value = %d , pe = %d\n ",me, &nelems_import[0],nelems[0],pe);//to test the value

		shmem_int_p(nelems_import,nelems[0],pe);
		shmem_quiet();
		shmem_int_put(temp_arr,&A[check],nelems[0],pe);
	}

	shmem_barrier_all();//check if the entire barrier is needed
	if(me >= npes/ 2){
		
		pe = me-npes/2;//check if it is synced
		nelems[0]= check;
		printf (" process %d pe : %d nelems : %d\n",me,pe,nelems[0]);//to test the value
		shmem_int_p(nelems_import,nelems[0],pe);
		shmem_quiet();
		shmem_int_put(temp_arr,A,nelems[0],pe);
	}
	
	shmem_barrier_all();//again sync is required...check it with profiling
//this snippet is to check if the processors have got the high and low lists respectively	-------------------
		printf("(%d) nelems_import = %d\n",me,nelems_import[0]);//to test the value
                printf("(%d) new elements = ",me);
                for(i=0;i<nelems_import[0];i++){
                        printf("%d, ",temp_arr[i]);
                }
                printf("\n");
//------------------------------------here this checking snippet ends----

//----------------------------------merging of arrays begin-------------------------
	if(me < npes/2){
		i=0;
		for(j=nelems_import[0];j<(nelems_import[0]+check);j++){
		
			temp_arr[j] = A[i];
			i++;
		}

	}
	

	if(me >= npes/2){
		i=check;
		for(j=nelems_import[0];j<(nelems_import[0]+N/npes-check);j++){
		
			temp_arr[j] = A[i];
			i++;
		}

	}

	shmem_barrier_all(); //to test if the arrays are merged properly
	int size;
	if(me < npes/2){	
		size = (nelems_import[0]+check);
		printf("(%d) merged array:",me);
		for(j=0;j<size;j++){
			printf("%d, ",temp_arr[j]);
		}
		printf("\n");
	}
		
	if(me >= npes/2){
		size = (nelems_import[0]+N/npes-check);
		printf("(%d) merged array:",me);
		for(j=0;j<size;j++){
			printf("%d, ",temp_arr[j]);
		}
	printf("\n");
	}
			//-----------------------check of merging finishes--------
//--------------------------------------------------merging finishes------------------------------

//-----------------------sort again-----------------------------------------------	
	if(me < npes/2){
		quicksort(temp_arr,0,(nelems_import[0]+check-1));	
	}
	
	if(me >= npes/2){
		quicksort(temp_arr,0,(nelems_import[0]+N/npes-check-1));
	}
	//sorting routine checked...once program is done we can remove this part-------------
	shmem_barrier_all();//test purpose only
	if(me < npes/2){
		printf("(%d) sorted list: ",me);
		for(i=0;i<size;i++){
			printf("%d, ",temp_arr[i]);
		}
		printf("\n");
	}
	
	
	if(me >= npes/2){
		printf("(%d) sorted list: ",me);
		for(i=0;i<size;i++){
			printf("%d, ",temp_arr[i]);
		}
		printf("\n");
	}
	//-------------------------------------------------------------
//---------------------------------------------------------------------------------
	
shfree(temp_arr);
shfree(A);
shmem_finalize();
}
예제 #16
0
/* Performance test for shmem_XX_put (latency and bandwidth) */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <shmem.h>
#include <sys/time.h>

long double time_taken;

long pSync[_SHMEM_REDUCE_SYNC_SIZE];
long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];

//#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/
  int
main(void)
{
  int i,j,k;
  int *target;
  int *source;
  int me, npes;
  int nxtpe;
  struct timeval start, end;
  long double start_time,end_time;

  int N_ELEMENTS = (4194304*2)/sizeof(int);

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1)
  {
    pSync[i] = _SHMEM_SYNC_VALUE;
  }
  nxtpe = (me+1)%npes;
  source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) );
  target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) );

  if(me == 0)
    printf("Put performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n");

  for (i = 0; i < N_ELEMENTS; i += 1) {
    source[i] = i + 1;
    target[i] = -90;
  }
  shmem_barrier_all();

  /*For int put we take average of all the times realized by a pair of PEs, thus
   * reducing effects of physical location of PEs*/
  for (i=1;i<=N_ELEMENTS;i=i*2)
  {
    time_taken = 0;

    for(j=0;j<10000;j++){
      gettimeofday(&start, NULL);

      start_time = (start.tv_sec * 1000000.0) + start.tv_usec;

      shmem_int_put(target, source, i,nxtpe);

      gettimeofday(&end, NULL);

      end_time = (end.tv_sec * 1000000.0) + end.tv_usec;

      time_taken = time_taken + (end_time - start_time);

    }
    shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync);


    if(me == 0){
      time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/
      if (i*sizeof(i) < 1048576)
        printf("%ld \t\t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
               (double)time_taken,(double)((i*sizeof(i))/(time_taken)));
      else
        printf("%ld \t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
               (double)time_taken,(double)((i*sizeof(i))/(time_taken)));

    }

  }
  shmem_barrier_all();

  shfree(target);
  shfree(source);
  return 0;
}
예제 #17
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();


    pWrk = shmalloc(sizeof(*pWrk) * sys_max(1/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
    if (pWrk)
    {
        source_addr = shmalloc(sizeof(*source_addr));
        target_addr = source_addr;
    }

    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int j = 0;

        /* Set my value */
        source_value = (TYPE_VALUE)my_proc;
        *source_addr = source_value;

        /* Define expected value */
        expect_value = 0;

        /* This guarantees that PE set initial value before peer change one */
        for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSync[j] = _SHMEM_SYNC_VALUE;
        }
        shmem_barrier_all();

        /* Put value to peer */
        FUNC_VALUE(target_addr, source_addr, 1, 0, 0, num_proc, pWrk, pSync);

        /* Get value put by peer:
         * These routines start the remote transfer and may return before the data
         * is delivered to the remote PE
         */
        shmem_barrier_all();
        {
            int total_wait = 0;
            while (*target_addr == DEFAULT_VALUE && total_wait < 1000 * WAIT_COUNT)
            {
                total_wait++;
                usleep(1);
            }
            value = *target_addr;
        }

        rc = (expect_value == value ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld\n",
                           my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (pWrk)
    {
        shfree(pWrk);
        pWrk = NULL;
    }

    return rc;
}
예제 #18
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    if (shmem_addr)
    {
        TYPE_VALUE value = -1;
        INT64_TYPE i = 0;

        /* Set my value */
        my_value = (-1);
        *shmem_addr = my_value;
        for (i = 0; i < COUNT_VALUE; i++)
        {
            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* Define expected value */
            expect_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* This guarantees that PE set initial value before peer change one */
            shmem_barrier_all();

            /* Write value to peer */
            FUNC_VALUE(shmem_addr, peer_value, peer_proc);

            /* Get value put by peer:
             * These routines start the remote transfer and may return before the data
             * is delivered to the remote PE
             */
            wait_for_put_completion(peer_proc,10 /* wait for 10 secs */);
            value = *shmem_addr;

            rc = (expect_value == value ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld\n",
                      my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
예제 #19
0
/****************************************************************************
 * Test Case processing procedure
 ***************************************************************************/
int osh_reduce_tc23(const TE_NODE *node, int argc, const char *argv[])
{
    int rc = TC_PASS;

    rc = __parse_opt(node, argc, argv);

    if (rc == TC_PASS)
    {
        pSync = shmalloc(sizeof(*pSync) * _SHMEM_REDUCE_SYNC_SIZE);
        if (!pSync)
        {
            rc = TC_SETUP_FAIL;
        }
    } else {
        rc = TC_SETUP_FAIL;
    }

    /* Every PE does reduction of the single value as symmetric data object to itself */
    if (rc == TC_PASS)
    {
        rc = test_item1();
        log_item(node, 1, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the single value */
    if (rc == TC_PASS)
    {
        rc = test_item2();
        log_item(node, 2, rc);
        shmem_barrier_all();
    }

    /* Every PE does reduction of the single value as symmetric data object to itself
     * (target and source are the same array)
     */
    if (rc == TC_PASS)
    {
        rc = test_item3();
        log_item(node, 3, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the single value
     * (target and source are the same array)
     */
    if (rc == TC_PASS)
    {
        rc = test_item4();
        log_item(node, 4, rc);
        shmem_barrier_all();
    }

    /* Every PE does reduction of the buffer as symmetric data object to itself */
    if (rc == TC_PASS)
    {
        rc = test_item5();
        log_item(node, 5, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the buffer */
    if (rc == TC_PASS)
    {
        rc = test_item6();
        log_item(node, 6, rc);
        shmem_barrier_all();
    }

    /* Even PEs reduce the buffer */
    if (rc == TC_PASS)
    {
        rc = test_item7();
        log_item(node, 7, rc);
        shmem_barrier_all();
    }

    /* reduce calls in loop with alternating multiple pSync and pWrk arrays (without barrrier synchronization between iterations) */
    if (rc == TC_PASS)
    {
        rc = test_item8();
        log_item(node, 8, rc);
        shmem_barrier_all();
    }

    if (pSync)
    {
        shfree(pSync);
    }

    return rc;
}
예제 #20
0
int
main(int argc, char **argv)
{
    int me, nProcs, c, l;
    int nWords, loops, incWords;
    int Verbose = 0, power2 = 0, modulo = 5;
    DataType *dp;

    pgm = strrchr(argv[0],'/');
    if ( pgm )
        pgm++;
    else
        pgm = argv[0];

    shmem_init();
    me = shmem_my_pe();
    nProcs = shmem_n_pes();

    while ((c = getopt (argc, argv, "hpv")) != -1)
        switch (c)
        {
        case 'p':
            power2++;
            break;
        case 'v':
            Verbose++;
            break;
        case 'h':
        default:
            usage();
            break;
        }

    if (optind == argc)
        nWords = DFLT_NWORDS;
    else if ((nWords = getSize (argv[optind++])) <= 0)
        usage ();

    if (optind == argc)
            loops = DFLT_LOOPS;
    else if ((loops = getSize (argv[optind++])) < 0)
        usage ();

    if (optind == argc)
        incWords = DFLT_INCR;
    else if ((incWords = getSize (argv[optind++])) < 0)
        usage ();

    if (power2) {
        nWords = 1;
        modulo = 1;
        loops = 21;
    }

    if (Verbose && me == 0) {
        if (power2) {
            printf("%s: nWords(1) << 1 per loop.\n", pgm);
        }
        else
            printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n",
                pgm, nWords, loops, incWords);
    }

    for(l=0; l < loops; l++) {

        result_sz = (nProcs-1) * (nWords * sizeof(DataType));
        result = (DataType *)shmem_malloc(result_sz);
        if (! result)
        {
            perror ("Failed result memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=result; dp < &result[(result_sz/sizeof(DataType))];)
            *dp++ = 1;


        target_sz = nWords * sizeof(DataType);
        if (!(target = (DataType *)shmem_malloc(target_sz)))
        {
            perror ("Failed target memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=target; dp < &target[(target_sz / sizeof(DataType))];)
            *dp++ = 2;

        source_sz = 2 * nWords * sizeof(DataType);
        if (!(source = (DataType *)shmem_malloc(source_sz)))
        {
            perror ("Failed source memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=source; dp < &source[(source_sz / sizeof(DataType))];)
            *dp++ = 3;
#if 0
        printf("[%d] source %p target %p result %p\n",
            me, (void*)source,(void*)target,(void*)result);
        shmem_barrier_all();
#endif

        shmem_barrier_all(); /* sync sender and receiver */

        for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++)
            if (*dp != 3 ) {
                printf("source not consistent @ 3?\n");
                break;
            }
        shmem_free(source);

        for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++)
            if (*dp != 2 ) {
                printf("target not consistent @ 2?\n");
                break;
            }
        shmem_free(target);

        for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++)
            if (*dp != 1 ) {
                printf("result not consistent @ 1?\n");
                break;
            }
        shmem_free(result);

        if (loops > 1) {
            if (Verbose && me == 0) {
                if (l == 0 || (l % modulo == 0))
                    printf("End loop %3d nWords(%d)\n",(l+1),nWords);
            }
            if (power2)
                nWords <<= 1;
            else
                nWords += incWords; // watch for double inc.
        }
    }

    shmem_finalize();

    return 0;
}
예제 #21
0
int main(void)
{
    int i, me, npes;
    int errors = 0;

    shmem_init();

    me = shmem_my_pe();
    npes = shmem_n_pes();

    for (i = 0; i < NELEM; i++) {
        src[i] = me;
        dst_max[i] = -1;
        dst_min[i] = -1;
    }

    for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) {
        max_psync[i] = SHMEM_SYNC_VALUE;
        max_psync[i] = SHMEM_SYNC_VALUE;
    }

    if (me == 0)
        printf("Shrinking active set test\n");

    shmem_barrier_all();

    /* A total of npes tests are performed, where the active set in each test
     * includes PEs i..npes-1 */
    for (i = 0; i <= me; i++) {
        int j;

        if (me == i)
            printf(" + PE_start=%d, logPE_stride=0, PE_size=%d\n", i, npes-i);

        shmem_long_max_to_all(dst_max, src, NELEM, i, 0, npes-i, max_pwrk, max_psync);

        /* Validate reduced data */
        for (j = 0; j < NELEM; j++) {
            long expected = npes-1;
            if (dst_max[j] != expected) {
                printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n",
                       me, j, expected, j, dst_max[j], i);
                errors++;
            }
        }

        shmem_long_min_to_all(dst_min, src, NELEM, i, 0, npes-i, min_pwrk, min_psync);

        /* Validate reduced data */
        for (j = 0; j < NELEM; j++) {
            long expected = i;
            if (dst_min[j] != expected) {
                printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n",
                       me, j, expected, j, dst_min[j], i);
                errors++;
            }
        }

    }

    shmem_finalize();

    return errors != 0;
}
예제 #22
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* recv_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = (TYPE_VALUE*)shmalloc(sizeof(*shmem_addr) * __max_buffer_size);
    recv_addr = (TYPE_VALUE*)sys_malloc(sizeof(*recv_addr) * __max_buffer_size);
    if (shmem_addr && recv_addr)
    {
        INT64_TYPE i = 0;
        long cur_buf_size = 0;

        my_value = 0;
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            /* Set my value */
            my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));

            /* Define expected value */
            expect_value = peer_value;

            /* Get value from peer */
            FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc);

            rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld buffer size = %lld\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)cur_buf_size);

            if (rc)
            {
                TYPE_VALUE* check_addr = recv_addr;
                int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }

            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (recv_addr)
    {
        sys_free(recv_addr);
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
예제 #23
0
파일: spam.c 프로젝트: stjohnt/sandia-shmem
int
main(int argc, char **argv)
{
    int i;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;

	shmem_init();
	me = shmem_my_pe();
	npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    /* lower-case switch enable only a specific test; otherwise run all tests */
    while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) {
        switch (i)
        {
          case 'a':
              All2++;
              break;
          case 'b':
              Bcast++;
              break;
          case 'c':
              Collect++;
              break;
          case 'm':
              Many++;
              break;
          case 'n':
              Neighbor++;
              break;
          case 'q':
              Verbose=0;
              break;
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              shmem_finalize();
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              shmem_finalize();
              return 1;
        }
    }

    if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0)
        All2 = Bcast = Collect = Many = Neighbor = 1;

	source = (int *) shmem_malloc( elements * sizeof(*source) );
	target = (int *) shmem_malloc( elements * sizeof(*target) );

	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

	shmem_barrier_all();

    if (Neighbor) {
        neighbor_put( target, source, elements, me, npes, loops );
        neighbor_get( target, source, elements, me, npes, loops );
    }

    if (All2) {
        all2all_put( target, source, elements, me, npes, loops );
        all2all_get( target, source, elements, me, npes, loops );
    }

    if (Many) {
        one2many_put( target, source, elements, me, npes, loops );
        many2one_get( target, source, elements, me, npes, loops );
    }

    if (Bcast) bcast( target, source, elements, me, npes, loops );

    if (Collect) {
        collect( NULL, source, elements, me, npes, loops );
        fcollect( NULL, source, elements, me, npes, loops );
    }

	shmem_barrier_all();

	shmem_free(target);
	shmem_free(source);

	shmem_finalize();

	return 0;
}
예제 #24
0
/* exchanges the field  l */
void xchange_field(spinor * const l, const int ieo) {

#  ifdef MPI
  int i,ix, mu, x0, x1, x2, x3, k;

#ifdef _KOJAK_INST
#pragma pomp inst begin(xchangefield)
#endif

  shmem_barrier_all();

  shmem_double_put((double*)(l+T*LX*LY*LZ/2), (double*)l,
                   (LX*LY*LZ*12), g_nb_t_dn);
  shmem_double_put((double*)(l+(T+1)*LX*LY*LZ/2), (double*)(l+(T-1)*LX*LY*LZ/2),
                   (LX*LY*LZ*12), g_nb_t_up);

#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
  k = (T+2)*LX*LY*LZ/2;
  for(x0 = 0; x0 < T; x0++) {
    shmem_double_put((double*)(l + k),
                     (double*)(l + g_lexic2eo[g_ipt[x0][0][0][0]]),
                     12*LZ*LY, g_nb_x_dn);
    k+=LZ*LY;
  }
  k = ((T+2)*LX*LY*LZ + T*LY*LZ)/2;
  for(x0 = 0; x0 < T; x0++) {
    shmem_double_put((double*)(l + k),
                     (double*)(l + g_lexic2eo[g_ipt[x0][LX-1][0][0]]),
                     12*LZ*LY, g_nb_x_up);
    k+=LZ*LY;
  }
#    endif

#    if (defined PARALLELXYT || defined PARALLELXYZT)
  k = ((T+2)*LX*LY*LZ + 2*T*LY*LZ)/2;
  for(x0 = 0; x0 < T; x0++) {
    for(x1 = 0; x1 < LX; x1++) {
      shmem_double_put((double*)(l + k),
                       (double*)(l + g_lexic2eo[g_ipt[x0][x1][0][0]]),
                       12*LZ, g_nb_y_dn);
      k+=LZ;
    }
  }
  k = ((T+2)*LX*LY*LZ + 2*T*LY*LZ + T*LX*LZ)/2;
  for(x0 = 0; x0 < T; x0++) {
    for(x1 = 0; x1 < LX; x1++) {
      shmem_double_put((double*)(l + k),
                       (double*)(l + g_lexic2eo[g_ipt[x0][x1][LY-1][0]]),
                       12*LZ, g_nb_y_up);
      k+=LZ;
    }
  }
#    endif

#    if (defined PARALLELXYZT)
  x0 = (VOLUME/2 + LX*LY*LZ + T*LY*LZ +T*LX*LZ);
  if(ieo == 1) {
    for(k = 0; k < T*LX*LY/2; k++) {
      shmem_double_put((double*)(l + x0),
                       (double*)(l + g_field_z_ipt_even[k]),
                       24, g_nb_z_dn);
      x0++;
    }
  }
  else {
    for(k = 0; k < T*LX*LY/2; k++) {
      shmem_double_put((double*)(l + x0),
                       (double*)(l + g_field_z_ipt_odd[k]),
                       24, g_nb_z_dn);
      x0++;
    }
  }
  x0 = (VOLUME/2 + LX*LY*LZ + T*LY*LZ + T*LX*LZ + T*LX*LY/2);
  if(ieo == 1) {
    for(k = T*LX*LY/2; k < T*LX*LY; k++) {
      shmem_double_put((double*)(l + x0),
                       (double*)(l + g_field_z_ipt_even[k]),
                       24, g_nb_z_up);
      x0++;
    }
  }
  else {
    for(k = T*LX*LY/2; k < T*LX*LY; k++) {
      shmem_double_put((double*)(l + x0),
                       (double*)(l + g_field_z_ipt_even[k]),
                       24, g_nb_z_up);
      x0++;
    }
  }
#    endif

  shmem_barrier_all();
#  endif // MPI
  return;
#ifdef _KOJAK_INST
#pragma pomp inst end(xchangefield)
#endif
}
예제 #25
0
static int test_item1(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* local_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE* expect_value = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
	int tst, sst;
    int max_stride = MAX_ARRAY_SIZE/2-1;
    int *wait_variable = NULL;
    wait_variable = shmalloc(sizeof(int));
    num_proc = _num_pes();
    my_proc = _my_pe();
    shmem_addr = shmalloc(sizeof(*shmem_addr)*MAX_ARRAY_SIZE);
    local_addr = malloc(sizeof(*local_addr)*MAX_ARRAY_SIZE);
    expect_value = malloc(sizeof(*expect_value)*MAX_ARRAY_SIZE);
    if (shmem_addr)
    {
        INT64_TYPE i = 0;
        INT64_TYPE j = 0;
        int num_to_get;
        my_value = 0;
        size_t odd_pos;
        for (i = 0; (i < COUNT_VALUE) && (rc == TC_PASS); i++)
        {
            tst = (i < max_stride) ? i+1 : max_stride;
            sst = tst;
            num_to_get = MAX_ARRAY_SIZE/tst;
            /* Set my value */
            my_value = (TYPE_VALUE)(my_proc + 1);
            memset(shmem_addr,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            memset(expect_value,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            for (j = 0; j < MAX_ARRAY_SIZE; j++)
                local_addr[j] = my_value;


            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (TYPE_VALUE)((my_proc == 0) ? num_proc : my_proc);


            /* Define expected value */
            for (j=0; j<num_to_get; j++)
                expect_value[j*tst] = peer_value;
            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Get value from peer */
            FUNC_VALUE(shmem_addr,local_addr,tst,sst,num_to_get,peer_proc);
            wait_for_completion(wait_variable,peer_proc,&rc);

            if (rc == TC_PASS)
            {
                rc = (compare_buffer((unsigned char*)shmem_addr, (unsigned char*)expect_value, MAX_ARRAY_SIZE, &odd_pos) ? TC_PASS : TC_FAIL);
            }
            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value[0], (INT64_TYPE)local_addr[0]);

            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (local_addr)
    {
        free(local_addr);
    }
    if (expect_value)
    {
        free(expect_value);
    }
    if (shmem_addr)
    {
        shfree(shmem_addr);
    }
    if (wait_variable)
    {
        shfree(wait_variable);
    }
    return rc;
}
int main(int argc, char **argv)
{
  int i,j,iter;
  int my_pe,n_pes;
  int *flag,*one;
  size_t max_elements,max_elements_bytes;
  size_t elements[16] = {1,2,4,8,12,16,24,32,64,128,256,512,1024,2048,4096,8192};
  int num_elements = 16;

  short *srce_short,*targ_short;
  int *srce_int,*targ_int;
  long *srce_long,*targ_long;
  float *srce_float,*targ_float;
  double *srce_double,*targ_double;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  flag = shmem_malloc((size_t) sizeof(int));
  one  = shmem_malloc((size_t) sizeof(int));
  *one  = 1;

/*  fail if trying to use odd number of processors  */
  if ( (n_pes % 2) != 0 ){
        fprintf(stderr, "FAIL - test requires even number of PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_both_put_nb_size(%s)\n", argv[0]);

/*  alloc arrays   */

  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_int_put_nb        max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(short));
  max_elements_bytes = (size_t) (sizeof(short)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_short_put         max_elements = %d\n",max_elements);
  srce_short = shmem_malloc(max_elements_bytes);
  targ_short = shmem_malloc(max_elements_bytes);
  if((srce_short == NULL) || (targ_short == NULL))
    shmalloc_error();

   max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_long_put_nb       max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(float));
  max_elements_bytes = (size_t) (sizeof(float)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_float_put_nb      max_elements = %d\n",max_elements);
  srce_float = shmem_malloc(max_elements_bytes);
  targ_float = shmem_malloc(max_elements_bytes);
  if((srce_float == NULL) || (targ_float == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(double));
  max_elements_bytes = (size_t) (sizeof(double)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_double_put_nb     max_elements = %d\n",max_elements);
  srce_double = shmem_malloc(max_elements_bytes);
  targ_double = shmem_malloc(max_elements_bytes);
  if((srce_double == NULL) || (targ_double == NULL))
    shmalloc_error();

  if(my_pe == 0)
    fprintf(stderr,"Actual value used for   max_elements = %d\n",max_elements);
  /* try the different sizes MAX_ITER times */
  for (iter = 0; iter < MAX_ITER; iter++) {
   for (i = 0; i < num_elements; i++) {
    *flag = 0;
    if (elements[i] <= max_elements) {
     if ( (my_pe % 2) == 0 )
       for(j = 0; j < elements[i]; j++) {
         srce_short[j] = (short)(my_pe+j);
         srce_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         srce_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         srce_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         srce_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     else
       for(j = 0; j < elements[i]; j++) {
         targ_short[j] = (short)(my_pe+j);
         targ_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         targ_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         targ_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         targ_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     shmem_barrier_all();
     if ( (my_pe % 2) == 0 ) {
#ifndef OPENSHMEM
       shmemx_int_put_nb(targ_int,srce_int,elements[i],my_pe+1,NULL);
       shmemx_long_put_nb(targ_long,srce_long,elements[i],my_pe+1,NULL);
       shmemx_float_put_nb(targ_float,srce_float,elements[i],my_pe+1,NULL);
       shmemx_double_put_nb(targ_double,srce_double,elements[i],my_pe+1,NULL);
#else
       shmem_int_put_nbi(targ_int,srce_int,elements[i],my_pe+1);
       shmem_long_put_nbi(targ_long,srce_long,elements[i],my_pe+1);
       shmem_float_put_nbi(targ_float,srce_float,elements[i],my_pe+1);
       shmem_double_put_nbi(targ_double,srce_double,elements[i],my_pe+1);
#endif
       /* this one is blocking */
       shmem_short_put(targ_short,srce_short,elements[i],my_pe+1);
       shmem_quiet();
       shmem_int_put(flag,one,(size_t)1,my_pe+1);
     } else {
       shmem_int_wait(flag,0);
       for(j = 0; j < elements[i]; j++) {
         if ( targ_short[j] != (short)(my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_short[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_short[j],my_pe+j-1);
         if ( targ_int[j] != (int)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr, 
           "FAIL: PE [%d] iter=%d i=%d targ_int[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_int[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_long[j] != (long)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_long[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_float[j] != (float)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_float[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_double[j] != (double)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_double[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_double[j],iter*10000+elements[i]*100+my_pe+j-1);
         }
     }
    }
   }
  }
  shmem_free(srce_short);  shmem_free(targ_short);
  shmem_free(srce_int);  shmem_free(targ_int);
  shmem_free(srce_long);  shmem_free(targ_long);
  shmem_free(srce_float);  shmem_free(targ_float);
  shmem_free(srce_double);  shmem_free(targ_double);
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
예제 #27
0
static int _shmem_finalize(void)
{
    int ret = OSHMEM_SUCCESS;

    shmem_barrier_all();

    shmem_lock_finalize();

    /* Finalize preconnect framework */
    if (OSHMEM_SUCCESS != (ret = oshmem_shmem_preconnect_all_finalize())) {
        return ret;
    }

    /* free requests */
    if (OSHMEM_SUCCESS != (ret = oshmem_request_finalize())) {
        return ret;
    }
    /* must free cached groups before we kill collectives */
    if (OSHMEM_SUCCESS != (ret = oshmem_group_cache_list_free())) {
        return ret;
    }
    /* We need to call mca_scoll_base_group_unselect explicitly for each group
     * that are not freed by oshmem_group_cache_list_free. We can only release its collectives at this point */
    mca_scoll_base_group_unselect(oshmem_group_all);
    mca_scoll_base_group_unselect(oshmem_group_self);

    /* Close down MCA modules */

    if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_atomic_base_framework) ) ) {
        return ret;
    }

    if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_scoll_base_framework) ) ) {
        return ret;
    }

    if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_memheap_base_framework) ) ) {
        return ret;
    }

    if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_sshmem_base_framework) ) ) {
        return ret;
    }

    if (OSHMEM_SUCCESS
            != (ret =
                    MCA_SPML_CALL(del_procs(oshmem_group_all->proc_array, oshmem_group_all->proc_count)))) {
        return ret;
    }

    oshmem_shmem_barrier();

    /* free spml resource */
    if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) {
        return ret;
    }

    if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_spml_base_framework) ) ) {
        return ret;
    }

    /* free op resources */
    if (OSHMEM_SUCCESS != (ret = oshmem_op_finalize())) {
        return ret;
    }

    /* free proc_group resources */
    if (OSHMEM_SUCCESS != (ret = oshmem_proc_group_finalize())) {
        return ret;
    }

    /* free proc resources */
    if (OSHMEM_SUCCESS != (ret = oshmem_proc_finalize())) {
        return ret;
    }

    /* free info resources */
    if (OSHMEM_SUCCESS != (ret = oshmem_info_finalize())) {
        return ret;
    }

    return ret;
}
예제 #28
0
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    short src1[N];
    int src2[N];
    long src3[N];
    long double src4[N];
    long long src5[N];
    double src6[N];
    float src7[N];
    char *src8;
    short src9;
    int src10;
    long src11;
    double src12;
    float src13;

    short *dest1;
    int *dest2;
    long *dest3;
    long double *dest4;
    long long *dest5;
    double *dest6;
    float *dest7;
    char *dest8;
    short *dest9;
    int *dest10;
    long *dest11;
    double *dest12;
    float *dest13;


    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        src8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;


        dest1 = (short *) shmem_malloc (N * sizeof (*dest1));
        dest2 = (int *) shmem_malloc (N * sizeof (*dest2));
        dest3 = (long *) shmem_malloc (N * sizeof (*dest3));
        dest4 = (long double *) shmem_malloc (N * sizeof (*dest4));
        dest5 = (long long *) shmem_malloc (N * sizeof (*dest5));
        dest6 = (double *) shmem_malloc (N * sizeof (*dest6));
        dest7 = (float *) shmem_malloc (N * sizeof (*dest7));
        dest8 = (char *) shmem_malloc (4 * sizeof (*dest8));
        dest9 = (short *) shmem_malloc (sizeof (*dest9));
        dest10 = (int *) shmem_malloc (sizeof (*dest10));
        dest11 = (long *) shmem_malloc (sizeof (*dest11));
        dest12 = (double *) shmem_malloc (sizeof (*dest12));
        dest13 = (float *) shmem_malloc (sizeof (*dest13));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        *dest9 = -9;
        *dest10 = -9;
        *dest11 = -9;
        *dest12 = -9;
        *dest13 = -9.0;

        nextpe = (me + 1) % npes;

        /* Testing shmem_short_put, shmem_short_put, shmem_int_put,
           shmem_long_put, shmem_longdouble_put, shmem_longlong_put,
           shmem_double_put, shmem_float_put, shmem_putmem */
        shmem_barrier_all ();

        shmem_short_put (dest1, src1, 0, nextpe);
        shmem_int_put (dest2, src2, 0, nextpe);
        shmem_long_put (dest3, src3, 0, nextpe);
        shmem_longdouble_put (dest4, src4, 0, nextpe);
        shmem_longlong_put (dest5, src5, 0, nextpe);
        shmem_double_put (dest6, src6, 0, nextpe);
        shmem_float_put (dest7, src7, 0, nextpe);
        shmem_putmem (dest8, src8, 0, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != -9) {
                    success1 = 1;
                }
                if (dest2[i] != -9) {
                    success2 = 1;
                }
                if (dest3[i] != -9) {
                    success3 = 1;
                }
                if (dest4[i] != -9) {
                    success4 = 1;
                }
                if (dest5[i] != -9) {
                    success5 = 1;
                }
                if (dest6[i] != -9) {
                    success6 = 1;
                }
                if (dest7[i] != -9) {
                    success7 = 1;
                }
                if (dest8[i] != -9) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_put of zero length: Passed\n");
            else
                printf ("Test shmem_short_put of zero length: Failed\n");
            if (success2 == 0)
                printf ("Test shmem_int_put of zero length: Passed\n");
            else
                printf ("Test shmem_int_put of zero length: Failed\n");
            if (success3 == 0)
                printf ("Test shmem_long_put of zero length: Passed\n");
            else
                printf ("Test shmem_long_put of zero length: Failed\n");
            if (success4 == 0)
                printf ("Test shmem_longdouble_put of zero length: Passed\n");
            else
                printf ("Test shmem_longdouble_put of zero length: Failed\n");
            if (success5 == 0)
                printf ("Test shmem_longlong_put of zero length: Passed\n");
            else
                printf ("Test shmem_longlong_put of zero length: Failed\n");
            if (success6 == 0)
                printf ("Test shmem_double_put of zero length: Passed\n");
            else
                printf ("Test shmem_double_put of zero length: Failed\n");
            if (success7 == 0)
                printf ("Test shmem_float_put of zero length: Passed\n");
            else
                printf ("Test shmem_float_put of zero length: Failed\n");
            if (success8 == 0)
                printf ("Test shmem_putmem of zero length: Passed\n");
            else
                printf ("Test shmem_putmem of zero length: Failed\n");

        }
        shmem_barrier_all ();

        /* Testing shmem_put32, shmem_put64, shmem_put128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest2, src2, 0, nextpe);
            shmem_put64 (dest3, src3, 0, nextpe);
            shmem_put128 (dest4, src4, 0, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != -9) {
                        success2 = 1;
                    }
                    if (dest3[i] != -9) {
                        success3 = 1;
                    }
                    if (dest4[i] != -9) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_put32 of zero length: Passed\n");
                else
                    printf ("Test shmem_put32 of zero length: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_put64 of zero length: Passed\n");
                else
                    printf ("Test shmem_put64 of zero length: Failed\n");

                if (success4 == 0)
                    printf ("Test shmem_put128 of zero length: Passed\n");
                else
                    printf ("Test shmem_put128 of zero length: Failed\n");
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest1, src1, 0, nextpe);
            shmem_put64 (dest2, src2, 0, nextpe);
            shmem_put128 (dest3, src3, 0, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != -9) {
                        success1 = 1;
                    }
                    if (dest2[i] != -9) {
                        success2 = 1;
                    }
                    if (dest3[i] != -9) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_put32 of zero length: Passed\n");
                else
                    printf ("Test shmem_put32 of zero length: Failed\n");
                if (success2 == 0)
                    printf ("Test shmem_put64 of zero length: Passed\n");
                else
                    printf ("Test shmem_put64 of zero length: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_put128 of zero length: Passed\n");
                else
                    printf ("Test shmem_put128 of zero length: Failed\n");
            }
        }


        shmem_barrier_all ();

        shmem_free (dest1);
        shmem_free (dest2);
        shmem_free (dest3);
        shmem_free (dest4);
        shmem_free (dest5);
        shmem_free (dest6);
        shmem_free (dest7);
        shmem_free (dest8);
        shmem_free (dest9);
        shmem_free (dest10);
        shmem_free (dest11);
        shmem_free (dest12);
        shmem_free (dest13);

    }
    else {
        printf
        ("Number of PEs must be > 1 to test shmem put of zero length, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
예제 #29
0
int main(int argc, char *argv[])
{
    int myid, numprocs, i;
    int size;
    char *s_buf, *r_buf;
    char *s_buf_heap, *r_buf_heap;
    int align_size;
    int64_t t_start = 0, t_end = 0;
    int use_heap = 0;   //default uses global

    start_pes(0);
    myid = _my_pe();
    numprocs = _num_pes();

    if(numprocs != 2) {
        if(myid == 0) {
            fprintf(stderr, "This test requires exactly two processes\n");
        }

        return EXIT_FAILURE;
    }

    if(argc != 2) {
        usage(myid);

        return EXIT_FAILURE;
    }

    if(0 == strncmp(argv[1], "heap", strlen("heap"))){
        use_heap = 1;
    } else if(0 == strncmp(argv[1], "global", strlen("global"))){
        use_heap = 0;
    } else {
        usage(myid);
        return EXIT_FAILURE;
    }

    align_size = MESSAGE_ALIGNMENT;

    /**************Allocating Memory*********************/

    if(use_heap){

        s_buf_heap = shmalloc(MYBUFSIZE);
        r_buf_heap = shmalloc(MYBUFSIZE);

        s_buf =
            (char *) (((unsigned long) s_buf_heap + (align_size - 1)) /
                    align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_heap + (align_size - 1)) /
                    align_size * align_size);
    } else {

        s_buf =
            (char *) (((unsigned long) s_buf_original + (align_size - 1)) /
                    align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_original + (align_size - 1)) /
                    align_size * align_size);
    }

    /**************Memory Allocation Done*********************/

    if(myid == 0) {
        fprintf(stdout, HEADER);
        fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
        fflush(stdout);
    }

    for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
        
        /* touch the data */
        for(i = 0; i < size; i++) {
            s_buf[i] = 'a';
            r_buf[i] = 'b';
        }

        if(size > large_message_size) {
            loop = loop_large = 100;
            skip = skip_large = 0;
        }

        shmem_barrier_all();

        if(myid == 0) 
        {
            for(i = 0; i < loop + skip; i++) {
                if(i == skip) t_start = TIME();

                shmem_getmem(r_buf, s_buf, size, 1);
            }

            t_end = TIME();
        }
        shmem_barrier_all();

        if(myid == 0) {
            double latency = (1.0 * (t_end-t_start)) / loop;

            fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
                    FLOAT_PRECISION, latency);
            fflush(stdout);
        }
    }

    shmem_barrier_all();

    if(use_heap){
        shfree(s_buf_heap);
        shfree(r_buf_heap);
    }

    shmem_barrier_all();
    return EXIT_SUCCESS;
}
예제 #30
0
파일: lfinc.c 프로젝트: caomw/SOS
int main( int argc, char *argv[])
{
    int rc=0, my_pe, npes, neighbor;
    int loops=LOOPS;
    int j;
    size_t data_sz=sizeof(long) * 3;
    double start_time;
    long *data, lval=0;

    if (argc > 1)
        loops = atoi(argv[1]);

    shmem_init();

    my_pe = shmem_my_pe();
    npes = shmem_n_pes();

    data = shmem_malloc(data_sz);
    if (!data) {
        fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n",
                my_pe,data_sz,errno);
        shmem_global_exit(1);
    }
    memset((void*)data,0,data_sz);

    shmem_barrier_all();

    neighbor = (my_pe + 1) % npes;
    start_time = shmemx_wtime();
    for(j=0,elapsed=0.0; j < loops; j++) {
        start_time = shmemx_wtime();
        lval = shmem_long_finc( (void*)&data[1], neighbor );
        elapsed += shmemx_wtime() - start_time;
        if (lval != (long) j) {
            fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n",
                    my_pe, lval, j);
            shmem_global_exit(1);
        }
    }
    shmem_barrier_all();

    rc = 0;
    if (data[1] != (long)loops) { 
        fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n",
                    my_pe, (void*)&data[1], data[1], loops);
        rc--;
    }

    /* check if adjancent memory locations distrubed */
    assert(data[0] == 0);
    assert(data[2] == 0);

    if (my_pe == 0 ) {
        if (rc == 0 && Verbose)
            fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe);
        fprintf(stderr,"[%d] %d loops of shmem_long_finc() in %6.4f secs\n"
                "  %2.6f usecs per shmem_long_finc()\n",
                    my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops));
    }
    shmem_free(data);

    shmem_finalize();

    return rc;
}