static void task_axpby_work_steal( void * arg , TPI_ThreadPool pool ) { enum { BLOCK = UNROLL * 128 }; int p_size ; int p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskXY * const t = (struct TaskXY *) arg ; const double a = t->alpha ; const double b = t->beta ; const unsigned n = t->number ; const double * const x = t->x_beg ; double * const y = t->y_beg ; unsigned * const all_iter = t->iter ; unsigned * const my_iter = all_iter + p_size ; { unsigned i ; for ( i = 0 ; i < n ; ) { TPI_Lock( pool , p_rank ); i = *my_iter * BLOCK ; *my_iter += p_size ; TPI_Unlock( pool , p_rank ); if ( i < n ) { const unsigned len = BLOCK < n - i ? BLOCK : n - i ; daxpby_work( len, a, x + i, b, y + i ); } } } /* Finished my work, steal work from someone else */ { int working ; int p = 0 ; for ( working = 1 ; working ; ) { working = 0 ; for ( p = 0 ; p < p_size ; ++p ) { if ( all_iter[p] * BLOCK < n ) { if ( ! TPI_Trylock( pool , p ) ) { const unsigned i = all_iter[p] * BLOCK ; all_iter[p] += p_size ; TPI_Unlock( pool , p ); if ( i < n ) { const unsigned len = BLOCK < n - i ? BLOCK : n - i ; daxpby_work( len, a, x + i, b, y + i ); } } working = 1 ; } } } } } }
static void task_xddot_x_work( void * arg , TPI_ThreadPool pool ) { int p_size , p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { double partial[2] = { 0 , 0 }; struct TaskX * const t = (struct TaskX *) arg ; { const unsigned p_next = p_rank + 1 ; const unsigned n_global = t->number ; const unsigned n_begin = ( ( n_global * p_rank ) / p_size ); const unsigned n_local = ( ( n_global * p_next ) / p_size ) - n_begin ; dot1_unroll( partial , t->x_beg + n_begin , n_local ); } { TPI_Lock(pool,0); { double * const v = t->x_sum ; SUM_ADD( v , partial[0] ); SUM_ADD( v , partial[1] ); TPI_Unlock(pool,0); } } } }
static void task_norm1_work( void * arg , TPI_ThreadPool pool ) { int p_size , p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskX * const t = (struct TaskX *) arg ; const unsigned p_next = p_rank + 1 ; const unsigned n = t->number ; const double * const xb = t->x_beg + ( n * p_rank ) / p_size ; const double * const xe = t->x_beg + ( n * p_next ) / p_size ; double * const v = t->x_sum ; double partial[2] = { 0 , 0 }; norm1( partial , xb , xe ); TPI_Lock( pool , 0 ); SUM_ADD( v , partial[0] ); SUM_ADD( v , partial[1] ); TPI_Unlock( pool , 0 ); } }
static void task_sum_work( void * arg , TPI_ThreadPool pool ) { int p_size , p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskX * const t = (struct TaskX *) arg ; const unsigned p_next = p_rank + 1 ; const unsigned n = t->number ; const double * const xb = t->x_beg + ( n * p_rank ) / p_size ; const double * const xe = t->x_beg + ( n * p_next ) / p_size ; double * const v = t->x_sum ; double partial[4] = { 0 , 0 , 0 , 0 }; add_array( partial , xb , xe ); TPI_Lock( pool , 0 ); xdsum_add_dsum( v , partial ); TPI_Unlock( pool , 0 ); } }
static void test_reduce_via_lock( TPI_Work * work ) { int * const value = * ((int *const*) work->info ); int result ; if ( ( result = TPI_Lock(0) ) ) { fprintf(stderr,"TPI_Lock(0) = %d : FAILED\n", result); abort(); } *value += 1 ; if ( ( result = TPI_Unlock(0) ) ) { fprintf(stderr,"TPI_Unlock(0) = %d : FAILED\n", result); abort(); } }
~LockGuard() { TPI_Unlock( m_value ); }
inline int Unlock( int n ) { return TPI_Unlock( n ); }
inline int Unlock( ThreadPool pool , int n ) { return TPI_Unlock( pool , n ); }