static inline void atomic_daccum (double *p, const double val) { #if defined(ATOMIC_FP_FE_EMUL) double pv, upd; int done = 0; do { __atomic_load ((int64_t*)p, (int64_t*)&pv, __ATOMIC_ACQUIRE); if (__atomic_compare_exchange ((int64_t*)p, (int64_t*)&pv, (int64_t*)&NAN_EMPTY, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { upd = pv + val; __atomic_store ((int64_t*)p, (int64_t*)&upd, __ATOMIC_RELEASE); done = 1; } else MM_PAUSE(); } while (!done); #elif defined(ATOMIC_FP_OPTIMISTIC) double pv, upd; __atomic_load ((int64_t*)p, (int64_t*)&pv, __ATOMIC_ACQUIRE); do { upd = pv + val; if (__atomic_compare_exchange ((int64_t*)p, (int64_t*)&pv, (int64_t*)&upd, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) break; else MM_PAUSE(); } while (1); #else OMP(omp atomic) *p += val; #endif }
void ULIBC_node_barrier(void) { if ( omp_in_parallel() ) { #if _POSIX_BARRIERS > 0 const struct numainfo_t ni = ULIBC_get_current_numainfo(); struct NUMA_barrier_t *barrier = __barrier[ni.node]; assert( barrier ); pthread_barrier_wait(&barrier->barrier); #else OMP("omp barrier"); #endif // _POSIX_BARRIERS } }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT(); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t**)&off, (int64_t**)&ind, (int64_t**)&weight, (int64_t**)&graphmem, action_stream_name, &naction, (int64_t**)&action, (int64_t**)&actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK(); #if defined(_OPENMP) OMP(omp parallel) { OMP(omp master) PRINT_STAT_INT64 ("num_threads", (long int) omp_get_num_threads()); }
stinger_return_t binary_stream_batch(stinger_t * S, stinger_workflow_t * wkflow, void ** workspace, int64_t batch, edge_action_t ** actions, int64_t * count) { binary_stream_t * stream = binary_stream_from_void(S, workspace); if(stream->actno < stream->nbatch * stream->batch_size) { const int64_t endact = (stream->actno + stream->batch_size > stream->naction ? stream->naction : stream->actno + stream->batch_size); int64_t *acts = &stream->action[2*stream->actno]; int64_t numActions = endact - stream->actno; if(*count < numActions) { *actions = xrealloc(*actions, sizeof(edge_action_t) * numActions); if(!(*actions)) return STINGER_ALLOC_FAILED; } *count = numActions; MTA("mta assert parallel") MTA("mta block dynamic schedule") OMP("omp parallel for") for(uint64_t k = 0; k < numActions; k++) { const int64_t i = acts[2 * k]; const int64_t j = acts[2 * k + 1]; (*actions)[k].type = 0; (*actions)[k].source = i; (*actions)[k].dest = j; (*actions)[k].weight = 1; (*actions)[k].time = batch; } stream->actno += stream->batch_size; if(stream->actno >= stream->nbatch * stream->batch_size || stream->actno >= stream->naction) return STINGER_REMOVE; else return STINGER_SUCCESS; } else { return STINGER_REMOVE;
int64_t st_conn_stinger (const struct stinger *G, const int64_t nv, const int64_t ne, const int64_t * sources, const int64_t num, const int64_t numSteps) { int64_t k, x; int64_t *Q_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t)); int64_t *marks_s_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t)); int64_t *marks_t_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t)); int64_t *QHead_big = (int64_t *) xmalloc (INC * 2 * numSteps * sizeof (int64_t)); int64_t *neighbors_big = (int64_t *) xmalloc (INC * ne * sizeof (int64_t)); int64_t count = 0; k = 0; x = 0; OMP ("omp parallel for") MTA ("mta assert parallel") MTA ("mta loop future") MTA ("mta assert nodep") MTA ("mta assert no alias") for (x = 0; x < INC; x++) { int64_t *Q = Q_big + x * nv; int64_t *marks_s = marks_s_big + x * nv; int64_t *marks_t = marks_t_big + x * nv; int64_t *QHead = QHead_big + x * 2 * numSteps; int64_t *neighbors = neighbors_big + x * ne; for (int64_t claimedk = stinger_int64_fetch_add (&k, 2); claimedk < 2 * num; claimedk = stinger_int64_fetch_add (&k, 2)) { int64_t s = sources[claimedk]; int64_t deg_s = stinger_outdegree (G, s); int64_t t = sources[claimedk + 1]; int64_t deg_t = stinger_outdegree (G, t); if (deg_s == 0 || deg_t == 0) { stinger_int64_fetch_add (&count, 1); } else { bfs_stinger (G, nv, ne, s, marks_s, numSteps, Q, QHead, neighbors); bfs_stinger (G, nv, ne, t, marks_t, numSteps, Q, QHead, neighbors); int64_t local_count = 0; MTA ("mta assert nodep") for (int64_t j = 0; j < nv; j++) { if (marks_s[j] && marks_t[j]) stinger_int64_fetch_add (&local_count, 1); } if (local_count == 0) stinger_int64_fetch_add (&count, 1); } } } free (neighbors_big); free (QHead_big); free (marks_t_big); free (marks_s_big); free (Q_big); return count; }