static inline void
atomic_daccum (double *p, const double val)
{
#if defined(ATOMIC_FP_FE_EMUL)
  double pv, upd;
  int done = 0;
  do {
    __atomic_load ((int64_t*)p, (int64_t*)&pv, __ATOMIC_ACQUIRE);
    if (__atomic_compare_exchange ((int64_t*)p, (int64_t*)&pv, (int64_t*)&NAN_EMPTY, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) {
      upd = pv + val;
      __atomic_store ((int64_t*)p, (int64_t*)&upd, __ATOMIC_RELEASE);
      done = 1;
    } else
      MM_PAUSE();
  } while (!done);
#elif defined(ATOMIC_FP_OPTIMISTIC)
  double pv, upd;
  __atomic_load ((int64_t*)p, (int64_t*)&pv, __ATOMIC_ACQUIRE);
  do {
    upd = pv + val;
    if (__atomic_compare_exchange ((int64_t*)p, (int64_t*)&pv, (int64_t*)&upd, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
      break;
    else
      MM_PAUSE();
  } while (1);
#else
  OMP(omp atomic) *p += val;
#endif
}
Example #2
0
void ULIBC_node_barrier(void) {
  if ( omp_in_parallel() ) {
#if _POSIX_BARRIERS > 0
    const struct numainfo_t ni = ULIBC_get_current_numainfo();
    struct NUMA_barrier_t *barrier = __barrier[ni.node];
    assert( barrier );
    pthread_barrier_wait(&barrier->barrier);
#else
    OMP("omp barrier");
#endif // _POSIX_BARRIERS
  }
}
Example #3
0
int
main (const int argc, char *argv[])
{
  parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch);
  STATS_INIT();

  load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t**)&off,
	      (int64_t**)&ind, (int64_t**)&weight, (int64_t**)&graphmem,
	      action_stream_name, &naction, (int64_t**)&action, (int64_t**)&actionmem);

  print_initial_graph_stats (nv, ne, batch_size, nbatch, naction);
  BATCH_SIZE_CHECK();

#if defined(_OPENMP)
  OMP(omp parallel)
  {
  OMP(omp master)
  PRINT_STAT_INT64 ("num_threads", (long int) omp_get_num_threads());
  }
stinger_return_t
binary_stream_batch(stinger_t * S, stinger_workflow_t * wkflow, void ** workspace, int64_t batch, edge_action_t ** actions, int64_t * count) {
  binary_stream_t * stream = binary_stream_from_void(S, workspace);

  if(stream->actno < stream->nbatch * stream->batch_size) {

    const int64_t endact = (stream->actno + stream->batch_size > stream->naction ? stream->naction : stream->actno + stream->batch_size);
    int64_t *acts = &stream->action[2*stream->actno];
    int64_t numActions = endact - stream->actno;

    if(*count < numActions) {
      *actions = xrealloc(*actions, sizeof(edge_action_t) * numActions);
      if(!(*actions))
	return STINGER_ALLOC_FAILED;
    }
    *count = numActions;

    MTA("mta assert parallel")
    MTA("mta block dynamic schedule")
    OMP("omp parallel for")
    for(uint64_t k = 0; k < numActions; k++) {
      const int64_t i = acts[2 * k];
      const int64_t j = acts[2 * k + 1];
      
      (*actions)[k].type	  = 0;
      (*actions)[k].source	  = i;
      (*actions)[k].dest	  = j;
      (*actions)[k].weight	  = 1;
      (*actions)[k].time	  = batch;
    }

    stream->actno += stream->batch_size;
    if(stream->actno >= stream->nbatch * stream->batch_size || stream->actno >= stream->naction)
      return STINGER_REMOVE;
    else
      return STINGER_SUCCESS;
  } else {
    return STINGER_REMOVE;
Example #5
0
int64_t
st_conn_stinger (const struct stinger *G, const int64_t nv, const int64_t ne,
                 const int64_t * sources, const int64_t num,
                 const int64_t numSteps)
{
  int64_t k, x;

  int64_t *Q_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t));
  int64_t *marks_s_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t));
  int64_t *marks_t_big = (int64_t *) xmalloc (INC * nv * sizeof (int64_t));
  int64_t *QHead_big =
    (int64_t *) xmalloc (INC * 2 * numSteps * sizeof (int64_t));
  int64_t *neighbors_big = (int64_t *) xmalloc (INC * ne * sizeof (int64_t));

  int64_t count = 0;

  k = 0;
  x = 0;

  OMP ("omp parallel for")
    MTA ("mta assert parallel")
    MTA ("mta loop future")
    MTA ("mta assert nodep")
    MTA ("mta assert no alias")
    for (x = 0; x < INC; x++) {
      int64_t *Q = Q_big + x * nv;
      int64_t *marks_s = marks_s_big + x * nv;
      int64_t *marks_t = marks_t_big + x * nv;
      int64_t *QHead = QHead_big + x * 2 * numSteps;
      int64_t *neighbors = neighbors_big + x * ne;

      for (int64_t claimedk = stinger_int64_fetch_add (&k, 2);
           claimedk < 2 * num; claimedk = stinger_int64_fetch_add (&k, 2)) {
        int64_t s = sources[claimedk];
        int64_t deg_s = stinger_outdegree (G, s);
        int64_t t = sources[claimedk + 1];
        int64_t deg_t = stinger_outdegree (G, t);

        if (deg_s == 0 || deg_t == 0) {
          stinger_int64_fetch_add (&count, 1);
        } else {
          bfs_stinger (G, nv, ne, s, marks_s, numSteps, Q, QHead, neighbors);
          bfs_stinger (G, nv, ne, t, marks_t, numSteps, Q, QHead, neighbors);
          int64_t local_count = 0;

          MTA ("mta assert nodep")
            for (int64_t j = 0; j < nv; j++) {
              if (marks_s[j] && marks_t[j])
                stinger_int64_fetch_add (&local_count, 1);
            }

          if (local_count == 0)
            stinger_int64_fetch_add (&count, 1);
        }
      }
    }

  free (neighbors_big);
  free (QHead_big);
  free (marks_t_big);
  free (marks_s_big);
  free (Q_big);

  return count;

}