/* This is a generic barrier implementation. To ensure that tests don't silently fail, this both prints an error message and returns an error result on any failure. */ int MTest_thread_barrier(int nt) { volatile int *cntP; int err = 0; if (nt < 0) nt = nthreads; /* Force a write barrier by using lock/unlock */ err = MTest_thread_lock(&barrierLock); if (err) { fprintf(stderr, "Lock failed in barrier!\n"); return err; } cntP = &c[phase]; err = MTest_thread_unlock(&barrierLock); if (err) { fprintf(stderr, "Unlock failed in barrier!\n"); return err; } /* printf("[%d] cnt = %d, phase = %d\n", pthread_self(), *cntP, phase); */ err = MTest_thread_lock(&barrierLock); if (err) { fprintf(stderr, "Lock failed in barrier!\n"); return err; } /* The first thread to enter will reset the counter */ if (*cntP < 0) *cntP = nt; /* printf("phase = %d, cnt = %d\n", phase, *cntP); */ /* The last thread to enter will force the counter to be negative */ if (*cntP == 1) { /* printf("[%d] changing phase from %d\n", pthread_self(), phase); */ phase = !phase; c[phase] = -1; *cntP = 0; } /* Really need a write barrier here */ *cntP = *cntP - 1; err = MTest_thread_unlock(&barrierLock); if (err) { fprintf(stderr, "Unlock failed in barrier!\n"); return err; } while (*cntP > 0); return err; }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int thread_id = (int)(long) arg; int i, j, peer; MPI_Status status[WINDOW]; MPI_Request req[WINDOW]; double start, end; int err; int local_num_threads = -1; if (tp[thread_id].use_proc_null) peer = MPI_PROC_NULL; else peer = (rank % 2) ? rank - 1 : rank + 1; err = MTest_thread_lock(&num_threads_lock); if (err) ABORT_MSG("unable to acquire lock, aborting\n"); local_num_threads = num_threads; err = MTest_thread_unlock(&num_threads_lock); if (err) ABORT_MSG("unable to release lock, aborting\n"); MTest_thread_barrier(num_threads); start = MPI_Wtime(); if (tp[thread_id].use_blocking_comm) { if ((rank % 2) == 0) { for (i = 0; i < LOOPS; i++) for (j = 0; j < WINDOW; j++) MPI_Send(sbuf, tp[thread_id].msg_size, MPI_CHAR, peer, 0, MPI_COMM_WORLD); } else { for (i = 0; i < LOOPS; i++) for (j = 0; j < WINDOW; j++) MPI_Recv(rbuf, tp[thread_id].msg_size, MPI_CHAR, peer, 0, MPI_COMM_WORLD, &status[0]); } } else { for (i = 0; i < LOOPS; i++) { if ((rank % 2) == 0) { for (j = 0; j < WINDOW; j++) MPI_Isend(sbuf, tp[thread_id].msg_size, MPI_CHAR, peer, 0, MPI_COMM_WORLD, &req[j]); } else { for (j = 0; j < WINDOW; j++) MPI_Irecv(rbuf, tp[thread_id].msg_size, MPI_CHAR, peer, 0, MPI_COMM_WORLD, &req[j]); } MPI_Waitall(WINDOW, req, status); } } end = MPI_Wtime(); tp[thread_id].latency = 1000000.0 * (end - start) / (LOOPS * WINDOW); MTest_thread_barrier(num_threads); return MTEST_THREAD_RETVAL_IGN; }
/* FIXME this barrier interface should be changed to more closely match the * pthread interface. Specifically, nt should not be a barrier-time * parameter but an init-time parameter. The double-checked locking below * isn't valid according to pthreads, and it isn't guaranteed to be robust * in the presence of aggressive CPU/compiler optimization. */ int MTest_thread_barrier(int nt) { int err; if (nt < 0) nt = nthreads; if (bcount != nt) { /* One thread needs to initialize the barrier */ MTest_thread_lock(&barrierLock); /* Test again in case another thread already fixed the problem */ if (bcount != nt) { if (bcount > 0) { err = pthread_barrier_destroy(&barrier); if (err) return err; } err = pthread_barrier_init(&barrier, NULL, nt); if (err) return err; bcount = nt; } err = MTest_thread_unlock(&barrierLock); if (err) return err; } return pthread_barrier_wait(&barrier); }
void loops(void) { int i, nt; double latency, mrate, avg_latency, agg_mrate; int err; err = MTest_thread_lock_create(&num_threads_lock); if (err) ABORT_MSG("unable to create lock, aborting\n"); for (nt = 1; nt <= MAX_THREADS; nt++) { err = MTest_thread_lock(&num_threads_lock); if (err) ABORT_MSG("unable to acquire lock, aborting\n"); num_threads = 1; MPI_Barrier(MPI_COMM_WORLD); MTest_thread_barrier_init(); for (i = 1; i < nt; i++) { err = MTest_Start_thread(run_test, (void *)(long)i); if (err) { /* attempt to continue with fewer threads, we may be on a * thread-constrained platform like BG/P in DUAL mode */ break; } ++num_threads; } err = MTest_thread_unlock(&num_threads_lock); if (err) ABORT_MSG("unable to release lock, aborting\n"); if (nt > 1 && num_threads <= 1) { ABORT_MSG("unable to create any additional threads, aborting\n"); } run_test((void *) 0); /* we are thread 0 */ err = MTest_Join_threads(); if (err) { printf("error joining threads, err=%d", err); MPI_Abort(MPI_COMM_WORLD, 1); } MTest_thread_barrier_free(); latency = 0; for (i = 0; i < num_threads; i++) latency += tp[i].latency; latency /= num_threads; /* Average latency */ mrate = num_threads / latency; /* Message rate */ /* Global latency and message rate */ MPI_Reduce(&latency, &avg_latency, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); avg_latency /= size; MPI_Reduce(&mrate, &agg_mrate, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (!rank && verbose) { printf("Threads: %d; Latency: %.3f; Mrate: %.3f\n", num_threads, latency, mrate); } } err = MTest_thread_lock_free(&num_threads_lock); if (err) ABORT_MSG("unable to free lock, aborting\n"); }