static void task_axpby_work_steal( void * arg , TPI_ThreadPool pool ) { enum { BLOCK = UNROLL * 128 }; int p_size ; int p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskXY * const t = (struct TaskXY *) arg ; const double a = t->alpha ; const double b = t->beta ; const unsigned n = t->number ; const double * const x = t->x_beg ; double * const y = t->y_beg ; unsigned * const all_iter = t->iter ; unsigned * const my_iter = all_iter + p_size ; { unsigned i ; for ( i = 0 ; i < n ; ) { TPI_Lock( pool , p_rank ); i = *my_iter * BLOCK ; *my_iter += p_size ; TPI_Unlock( pool , p_rank ); if ( i < n ) { const unsigned len = BLOCK < n - i ? BLOCK : n - i ; daxpby_work( len, a, x + i, b, y + i ); } } } /* Finished my work, steal work from someone else */ { int working ; int p = 0 ; for ( working = 1 ; working ; ) { working = 0 ; for ( p = 0 ; p < p_size ; ++p ) { if ( all_iter[p] * BLOCK < n ) { if ( ! TPI_Trylock( pool , p ) ) { const unsigned i = all_iter[p] * BLOCK ; all_iter[p] += p_size ; TPI_Unlock( pool , p ); if ( i < n ) { const unsigned len = BLOCK < n - i ? BLOCK : n - i ; daxpby_work( len, a, x + i, b, y + i ); } } working = 1 ; } } } } } }
inline int Trylock( ThreadPool pool , int n ) { return TPI_Trylock( pool , n ); }