void
__shmem_barrier_linear (int PE_start, int logPE_stride, int PE_size,
			long *pSync)
{
  const int me = _my_pe ();
  const int step = 1 << logPE_stride;
  const long nreplies = _SHMEM_SYNC_VALUE + PE_size - 1;
  int i, round;
  int thatpe;

  for (round = 0; round < 2; round += 1)
    {

      for (thatpe = PE_start, i = 0; i < PE_size; thatpe += step, i += 1)
	{

	  if (thatpe != me)
	    {
	      shmem_long_inc (&pSync[round], thatpe);

	      __shmem_trace (SHMEM_LOG_BARRIER,
			     "round = %d, sent increment to PE %d",
			     round, thatpe);
	    }

	}
      shmem_long_wait_until (&pSync[round], _SHMEM_CMP_EQ, nreplies);

      pSync[round] = _SHMEM_SYNC_VALUE;

    }
}
Beispiel #2
0
int
main(int argc, char* argv[])
{
    int me, num_procs, l, j;
    int Verbose = 0;

    start_pes(0);
    me = _my_pe();
    num_procs = _num_pes();
    if ( num_procs < 2 ) {
        if (me ==0)
            printf("PE[0] requires 2 or more PEs?\n");
        return 1;
    }

    for (l = 0 ; l < loops ; ++l) {

        if ((src_int = shmalloc(2*num_procs*sizeof(int))) == NULL) {
            printf("PE-%d int shmalloc() failed?\n", me);
            exit(1);
        }
        dst_int = &src_int[num_procs];
        for(j=0; j < num_procs; j++) {
            src_int[j] = 4;
            dst_int[j] = 0;
        }

        if ((src_float = shmalloc(2*num_procs*sizeof(float))) == NULL) {
            printf("PE-%d float shmalloc() failed?\n", me);
            exit(1);
        }
        dst_float = &src_float[num_procs];
        for(j=0; j < num_procs; j++) {
            src_float[j] = 4.0;
            dst_float[j] = 0.0;
        }

        if ((src_double = shmalloc(2*num_procs*sizeof(double))) == NULL) {
            printf("PE-%d double shmalloc() failed?\n", me);
            exit(1);
        }
        dst_double = &src_double[num_procs];
        for(j=0; j < num_procs; j++) {
            src_double[j] = 8.0;
            dst_double[j] = 0.0;
        }

        if ((src_long = shmalloc(2*num_procs*sizeof(long))) == NULL) {
            printf("PE-%d long shmalloc() failed?\n", me);
            exit(1);
        }
        dst_long = &src_long[num_procs];
        for(j=0; j < num_procs; j++) {
            src_long[j] = 8;
            dst_long[j] = 0;
        }

        if ((src_llong = shmalloc(2*num_procs*sizeof(long long))) == NULL) {
            printf("PE-%d long shmalloc() failed?\n", me);
            exit(1);
        }
        dst_llong = &src_llong[num_procs];
        for(j=0; j < num_procs; j++) {
            src_llong[j] = 16;
            dst_llong[j] = 0;
        }

        shmem_barrier_all();

        if ( me != 0 ) {
            /* is 'src_*' accessible from PE0? should be. */
            if (!shmem_addr_accessible(src_int,0)) {
                printf("PE-%d local src_int %p not accessible from PE-%d?\n",
                       me, (void*)src_int, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_float,0)) {
                printf("PE-%d local src_float %p not accessible from PE-%d?\n",
                       me, (void*)src_float, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_double,0)) {
                printf("PE-%d local src_double %p not accessible from PE-%d?\n",
                       me, (void*)src_double, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_long,0)) {
                printf("PE-%d local src_long %p not accessible from PE-%d?\n",
                       me, (void*)src_long, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_llong,0)) {
                printf("PE-%d local src_llong %p not accessible from PE-%d?\n",
                       me, (void*)src_llong, 0);
                exit(1);
            }
        }
        shmem_barrier_all();

        if ( me == 0 ) {
            shmem_quiet();
            for(j=1; j < num_procs; j++) {
                dst_int[j] = shmem_int_swap(src_int+j,0,j);
                if (dst_int[j] != 4) {
                    printf("PE-%d dst_int[%d] %d != 4?\n",me,j,dst_int[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                itmp = shmem_int_g(src_int+j,j);
                if (itmp != 0) {
                    printf("PE-0 int PE[%d] rem(%d) != 0?\n",j,itmp);
                    exit(1);
                }

                /* swap back */
                dst_int[j] = shmem_int_swap(src_int+j,dst_int[j],j);
                if (dst_int[j] != 0) {
                    printf("PE-0 dst_int[%d] %d != 0?\n",j,dst_int[j]);
                    exit(1);
                }

                itmp = shmem_int_g(src_int+j,j);
                if (itmp != 4) {
                    printf("PE-0 PE[%d] rem %d != 4?\n",j,itmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_float[j] = shmem_float_swap(src_float+j,0.0,j);
                if (dst_float[j] != 4.0) {
                    printf("PE-0 dst_float[%d] %f != 4.0?\n",j,dst_float[j]);
                    exit(1);
                }

                /* verify remote data */
                ftmp = shmem_float_g(src_float+j,j);
                if (ftmp != 0.0) {
                    printf("PE-0 float rem(%f) != 0.0?\n",ftmp);
                    exit(1);
                }
                /* swap back */
                dst_float[j] = shmem_float_swap(src_float+j,dst_float[j],j);
                if (dst_float[j] != 0.0) {
                    printf("PE-0 dst_float[%d] %f != 0.0?\n",j,dst_float[j]);
                    exit(1);
                }
                ftmp = shmem_float_g(src_float+j,j);
                if (ftmp != 4.0) {
                    printf("PE-%d float rem(%f) != 4.0?\n",me,ftmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_double[j] = shmem_double_swap(src_double+j,0.0,j);
                if (dst_double[j] != 8.0) {
                    printf("PE-0 dst_double[%d] %f != 8.0?\n",j,dst_double[j]);
                    exit(1);
                }
                /* verify remote data */
                dtmp = shmem_double_g(src_double+j,j);
                if (dtmp != 0.0) {
                    printf("PE-0 float rem(%f) != 0.0?\n",dtmp);
                    exit(1);
                }
                dst_double[j] = shmem_double_swap(src_double+j,dst_double[j],j);
                if (dst_double[j] != 0.0) {
                    printf("PE-0 dst_double[%d] %f != 0.0?\n",j,dst_double[j]);
                    exit(1);
                }
                dtmp = shmem_double_g(src_double+j,j);
                if (dtmp != 8.0) {
                    printf("PE-0 double rem(%f) != 8.0?\n",dtmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_long[j] = shmem_long_swap(src_long+j,0,j);
                if (dst_long[j] != 8) {
                    printf("PE-0 dst_long[%d] %ld != 8?\n",j,dst_long[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                ltmp = shmem_long_g(src_long+j,j);
                if (ltmp != 0) {
                    printf("PE-0 PE[%d]long rem(%ld) != 0?\n",j,ltmp);
                    exit(1);
                }
                /* swap back */
                dst_long[j] = shmem_long_swap(src_long+j,dst_long[j],j);
                if (dst_long[j] != 0) {
                    printf("PE-%d dst_long[%d] %ld != 0?\n",me,j,dst_long[j]);
                    exit(1);
                }
                ltmp = shmem_long_g(src_long+j,j);
                if (ltmp != 8) {
                    printf("PE-%d long rem(%ld) != 8?\n",me,ltmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_llong[j] = shmem_longlong_swap(src_llong+j,0,j);
                if (dst_llong[j] != 16) {
                    printf("PE-%d dst_llong[%d] %lld != 16?\n",me,j,dst_llong[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                lltmp = shmem_longlong_g(src_llong+j,j);
                if (lltmp != 0) {
                    printf("PE-%d long long rem(%lld) != 0?\n",me,lltmp);
                    exit(1);
                }
                /* swap back */
                dst_llong[j] = shmem_longlong_swap(src_llong+j,dst_llong[j],j);
                if (dst_llong[j] != 0) {
                    printf("PE-%d  dst_llong[%d] %lld != 0?\n", me,j,dst_llong[j]);
                    exit(1);
                }
                lltmp = shmem_longlong_g(src_llong+j,j);
                if (lltmp != 16) {
                    printf("PE-%d longlong rem(%lld) != 16?\n",me,lltmp);
                    exit(1);
                }
            }
        }
        else {
            shmem_int_wait_until(&src_int[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();

            shmem_long_wait_until(&src_long[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();

            shmem_longlong_wait_until(&src_llong[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();
        }

        shmem_barrier_all();

        shfree(src_int);
        shfree(src_float);
        shfree(src_double);
        shfree(src_long);
        shfree(src_llong);
    }

    if (Verbose)
        fprintf(stderr,"[%d] exit\n",_my_pe());

    return 0;
}
Beispiel #3
0
void FORTRANIFY (shmem_wait_until) (long *ivar, int *cmp, long *cmp_value)
{
    shmem_long_wait_until (ivar, *cmp, *cmp_value);
}
Beispiel #4
0
void
shmemi_broadcast32_tree (void *target, const void *source,
                         size_t nlong,
                         int PE_root, int PE_start,
                         int logPE_stride, int PE_size, long *pSync)
{
    int child_l, child_r, parent;
    const int step = 1 << logPE_stride;
    int my_pe = GET_STATE (mype);
    int *target_ptr, *source_ptr;
    int no_children;
    long is_ready, lchild_ready, rchild_ready;


    is_ready = 1;
    lchild_ready = -1;
    rchild_ready = -1;

    shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE);
    shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE);

    pSync[0] = 0;
    pSync[1] = 0;

    target_ptr = (int *) target;
    source_ptr = (int *) source;

    set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe);
    no_children = 0;
    build_tree (PE_start, step, PE_root, PE_size,
                &parent, &child_l, &child_r, my_pe);
    shmemi_trace (SHMEM_LOG_BROADCAST,
                  "before broadcast, R_child = %d L_child = %d",
                  child_r, child_l);
    /* The actual broadcast */

    if (PE_size > 1) {
        if (my_pe == (PE_start + step * PE_root)) {
            pSync[0] = SHMEM_SYNC_VALUE;

            if (child_l != -1) {
                shmem_long_get (&lchild_ready, (const long *) &pSync[0],
                                1, child_l);
                while (lchild_ready != 0)
                    shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);

                shmem_int_put (target_ptr, source_ptr, nlong, child_l);
                shmem_fence ();
                shmem_long_put (&pSync[0], &is_ready, 1, child_l);
                no_children = 1;
            }
            if (child_r != -1) {
                shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                while (rchild_ready != 0)
                    shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);

                shmem_int_put (target_ptr, source_ptr, nlong, child_r);
                shmem_fence ();
                shmem_long_put (&pSync[0], &is_ready, 1, child_r);
                no_children = 2;
            }

            shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ,
                                   (long) no_children);
            pSync[1] = SHMEM_SYNC_VALUE;

        }
        else {
            shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready);
            pSync[0] = SHMEM_SYNC_VALUE;
            shmemi_trace (SHMEM_LOG_BROADCAST, "inside else");
            memcpy (source_ptr, target_ptr, nlong * sizeof (int));
            if (child_l != -1) {
                shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);
                while (lchild_ready != 0)
                    shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);

                shmem_int_put (target_ptr, source_ptr, nlong, child_l);
                shmem_fence ();
                shmem_long_put (&pSync[0], &is_ready, 1, child_l);
                no_children = 1;
            }
            if (child_r != -1) {
                shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                while (rchild_ready != 0)
                    shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);

                shmem_int_put (target_ptr, source_ptr, nlong, child_r);
                shmem_fence ();
                shmem_long_put (&pSync[0], &is_ready, 1, child_r);
                no_children = 2;
            }
            pSync[0] = SHMEM_SYNC_VALUE;

            if (no_children == 0) {
                pSync[1] = SHMEM_SYNC_VALUE;
                /* TO DO: Is check for parents pSync required? */
                shmem_long_inc (&pSync[1], parent);
            }
            else {
                shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ,
                                       (long) no_children);
                pSync[1] = SHMEM_SYNC_VALUE;
                /* printf("PE %d incrementing child count on PE
                   %d\n",my_pe,parent); */
                shmem_long_inc (&pSync[1], parent);
            }
        }
        shmemi_trace (SHMEM_LOG_BROADCAST, "at the end of bcast32");
        /* shmem_barrier(PE_start, logPE_stride, PE_size, pSync); */
    }
}
Beispiel #5
0
void
shmemi_barrier_tree (int PE_start, int logPE_stride, int PE_size, long *pSync)
{
    int child_l, child_r, parent;
    const int step = 1 << logPE_stride;
    int my_pe = GET_STATE (mype);
    int no_children;
    long is_ready, lchild_ready, rchild_ready;


    is_ready = 1;
    lchild_ready = -1;
    rchild_ready = -1;

    shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE);
    shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE);

    /* printf("Tree barrier\n"); */
    set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe);

    no_children = 0;

    shmemi_trace (SHMEM_LOG_BARRIER,
                  "before barrier, R_child = %d L_child = %d",
                  child_r, child_l);

    /* The actual barrier */

    if (PE_size > 1) {
        pSync[0] = 0;
        pSync[1] = 0;

        if (my_pe == PE_start) {
            pSync[0] = SHMEM_SYNC_VALUE;

            if (child_l != -1) {
                shmem_long_get (&lchild_ready, (const long *) &pSync[0],
                                1, child_l);
                while (lchild_ready != 0)
                    shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);
                shmem_long_put (&pSync[0], &is_ready, 1, child_l);
                no_children = 1;
            }

            if (child_r != -1) {
                shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                while (rchild_ready != 0)
                    shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                shmem_long_put (&pSync[0], &is_ready, 1, child_r);
                no_children = 2;
            }

            shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ,
                                   (long) no_children);
            pSync[1] = SHMEM_SYNC_VALUE;

        }
        else {
            shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready);

            shmemi_trace (SHMEM_LOG_BARRIER, "inside else");

            if (child_l != -1) {
                shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);
                while (lchild_ready != 0)
                    shmem_long_get (&lchild_ready, &pSync[0], 1, child_l);
                shmem_long_put (&pSync[0], &is_ready, 1, child_l);
                no_children = 1;
            }

            if (child_r != -1) {
                shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                while (rchild_ready != 0)
                    shmem_long_get (&rchild_ready, &pSync[0], 1, child_r);
                shmem_long_put (&pSync[0], &is_ready, 1, child_r);
                no_children = 2;
            }
            pSync[0] = SHMEM_SYNC_VALUE;

            if (no_children == 0) {
                pSync[1] = SHMEM_SYNC_VALUE;
                shmem_long_inc (&pSync[1], parent);
            }
            else {
                shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ,
                                       (long) no_children);
                pSync[1] = SHMEM_SYNC_VALUE;
                shmem_long_inc (&pSync[1], parent);
            }

        }
        shmemi_trace (SHMEM_LOG_BARRIER, "at the end of barrier");

    }
}
Beispiel #6
0
inline void
shmem_wait_until (long *ivar, int cmp, long cmp_value)
{
    shmem_long_wait_until (ivar, cmp, cmp_value);
}