Esempio n. 1
0
void mp_barrier(cycles_t *measurement)
{
    coreid_t tid = get_core_id();

#ifdef QRM_DBG_ENABLED
    ++_num_barrier;
    uint32_t _num_barrier_recv = _num_barrier;
#endif

    debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier);

    // Recution
    // --------------------------------------------------
#ifdef QRM_DBG_ENABLED
    uint32_t _tmp =
#endif
    mp_reduce(_num_barrier);

#ifdef QRM_DBG_ENABLED
    // Sanity check
    if (tid==get_sequentializer()) {
        assert (_tmp == get_num_threads()*_num_barrier);
    }
    if (measurement)
        *measurement = bench_tsc();

#endif

    // Broadcast
    // --------------------------------------------------
    if (tid == get_sequentializer()) {
        mp_send_ab(_num_barrier);

    } else {
#ifdef QRM_DBG_ENABLED
        _num_barrier_recv =
#endif
            mp_receive_forward(0);
    }

#ifdef QRM_DBG_ENABLED
    if (_num_barrier_recv != _num_barrier) {
    debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier);
    }
    assert (_num_barrier_recv == _num_barrier);

    // Add a shared memory barrier to absolutely make sure that
    // everybody finished the barrier before leaving - this simplifies
    // debugging, as the programm will get stuck if barriers are
    // broken, rather than some threads (wrongly) continuing and
    // causing problems somewhere else
#if 0 // Enable separately
    debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n");
    shl_barrier_shm(get_num_threads());
#endif
#endif

    debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier);
}
Esempio n. 2
0
amp *
mp_sqrt_to(amp *r, amp *a, amp *rp)

{
  amp	*c;
  amp	*b;
  int	l;
  int	i;
  int	j;

  mp_reduce(a);
  if (a->denom)
    return 0;
  l = a->len;
  l /= 2;
  if (r)
    c = r;
  else
    c = new_amp();
  MP_NEED(c,l+1);
  if (l) {
    for(i = l-1, j = a->len-1; i >= 0; i--,j--)
      c->data[i] = a->data[j];
    c->len = l;
  } else {
    if (a->len == 1 && a->data[0] <= 1) {
      mp_copy_to(c,a);
      return c;
    }
    MP_ASSIGN_SMALL(c,1);
  }
  MP_TOUCH(c);
  b = new_amp();
  mp_div_to(b,a,c,(amp*)0);
  mp_add_to(b,b,c);
  mp_div_x_to(b,b,2,(mp_long*)0);
  do {
    mp_copy_to(c,b);
    mp_div_to(b,a,c,(amp*)0);
    mp_add_to(b,b,c);
    mp_div_x_to(b,b,2,(mp_long*)0);
  } while (mp_cmp(c,b) > 0);
  if (rp) {
    mp_mul_to(b,c,c);
    mp_sub_to(rp,a,b);
  }
  mp_free(b);
  return c;
}