void mp_barrier(cycles_t *measurement) { coreid_t tid = get_core_id(); #ifdef QRM_DBG_ENABLED ++_num_barrier; uint32_t _num_barrier_recv = _num_barrier; #endif debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier); // Recution // -------------------------------------------------- #ifdef QRM_DBG_ENABLED uint32_t _tmp = #endif mp_reduce(_num_barrier); #ifdef QRM_DBG_ENABLED // Sanity check if (tid==get_sequentializer()) { assert (_tmp == get_num_threads()*_num_barrier); } if (measurement) *measurement = bench_tsc(); #endif // Broadcast // -------------------------------------------------- if (tid == get_sequentializer()) { mp_send_ab(_num_barrier); } else { #ifdef QRM_DBG_ENABLED _num_barrier_recv = #endif mp_receive_forward(0); } #ifdef QRM_DBG_ENABLED if (_num_barrier_recv != _num_barrier) { debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier); } assert (_num_barrier_recv == _num_barrier); // Add a shared memory barrier to absolutely make sure that // everybody finished the barrier before leaving - this simplifies // debugging, as the programm will get stuck if barriers are // broken, rather than some threads (wrongly) continuing and // causing problems somewhere else #if 0 // Enable separately debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n"); shl_barrier_shm(get_num_threads()); #endif #endif debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier); }
amp * mp_sqrt_to(amp *r, amp *a, amp *rp) { amp *c; amp *b; int l; int i; int j; mp_reduce(a); if (a->denom) return 0; l = a->len; l /= 2; if (r) c = r; else c = new_amp(); MP_NEED(c,l+1); if (l) { for(i = l-1, j = a->len-1; i >= 0; i--,j--) c->data[i] = a->data[j]; c->len = l; } else { if (a->len == 1 && a->data[0] <= 1) { mp_copy_to(c,a); return c; } MP_ASSIGN_SMALL(c,1); } MP_TOUCH(c); b = new_amp(); mp_div_to(b,a,c,(amp*)0); mp_add_to(b,b,c); mp_div_x_to(b,b,2,(mp_long*)0); do { mp_copy_to(c,b); mp_div_to(b,a,c,(amp*)0); mp_add_to(b,b,c); mp_div_x_to(b,b,2,(mp_long*)0); } while (mp_cmp(c,b) > 0); if (rp) { mp_mul_to(b,c,c); mp_sub_to(rp,a,b); } mp_free(b); return c; }