int search_GPU(int s) { int i; int find = -1; a = (float *) malloc(sizeof(float) * SIZE); c = (float)s-5; init(s); double start, finish, elapsed; start = (double) clock() / (CLOCKS_PER_SEC*1000); #pragma acc data copyin(a[0:s],c) copy(find) { #pragma acc kernels { #pragma acc loop independent { for (i = 0; i < s; ++i) { if(a[i] == c) { find = i; i=s; } } } } } acc_free(acc_deviceptr(a)); finish = (double) clock() / (CLOCKS_PER_SEC*1000); elapsed = finish - start; fprintf(out,"%.6lf,",elapsed); //print_result(s,find); free(a); return find; }
int main (int argc, char* argv[]) { int x[N], *xp2; #pragma acc data copyin (x[0:N]) { int *xp; #pragma acc host_data use_device (x) { #pragma acc data { xp = x; } xp2 = x; } if (xp != acc_deviceptr (x) || xp2 != xp) abort (); } return 0; }
void nrn_init(NrnThread* _nt, Memb_list* _ml, int _type){ double* _p; Datum* _ppvar; ThreadDatum* _thread; double _v, v; int* _ni; int _iml, _cntml_padded, _cntml_actual; _ni = _ml->_nodeindices; _cntml_actual = _ml->_nodecount; _cntml_padded = _ml->_nodecount_padded; _thread = _ml->_thread; #pragma acc update device (_mechtype) if(_nt->compute_gpu) if (!_thread[_spth1]._pvoid) { _thread[_spth1]._pvoid = nrn_cons_sparseobj(_kinetic_kstates_NMDA16_2, 16, _ml, _threadargs_); #ifdef _OPENACC if (_nt->compute_gpu) { void* _d_so = (void*) acc_deviceptr(_thread[_spth1]._pvoid); ThreadDatum* _d_td = (ThreadDatum*)acc_deviceptr(_thread); acc_memcpy_to_device(&(_d_td[_spth1]._pvoid), &_d_so, sizeof(void*)); } #endif } _acc_globals_update(); double * _nt_data = _nt->_data; double * _vec_v = _nt->_actual_v; int stream_id = _nt->stream_id; if (_nrn_skip_initmodel == 0) { #if LAYOUT == 1 /*AoS*/ for (_iml = 0; _iml < _cntml_actual; ++_iml) { _p = _ml->_data + _iml*_psize; _ppvar = _ml->_pdata + _iml*_ppsize; #elif LAYOUT == 0 /*SoA*/ _p = _ml->_data; _ppvar = _ml->_pdata; /* insert compiler dependent ivdep like pragma */ _PRAGMA_FOR_VECTOR_LOOP_ _PRAGMA_FOR_INIT_ACC_LOOP_ for (_iml = 0; _iml < _cntml_actual; ++_iml) { #else /* LAYOUT > 1 */ /*AoSoA*/ #error AoSoA not implemented. for (;;) { /* help clang-format properly indent */ #endif int _nd_idx = _ni[_iml]; _tsav = -1e20; _v = _vec_v[_nd_idx]; _PRCELLSTATE_V v = _v; _PRCELLSTATE_V nao = _ion_nao; initmodel(_threadargs_); } } #if NET_RECEIVE_BUFFERING NetSendBuffer_t* _nsb = _ml->_net_send_buffer; #if defined(_OPENACC) && !defined(DISABLE_OPENACC) #pragma acc wait(stream_id) #pragma acc update self(_nsb->_cnt) if(_nt->compute_gpu) update_net_send_buffer_on_host(_nt, _nsb); #endif {int _i; for (_i=0; _i < _nsb->_cnt; ++_i) { net_sem_from_gpu(_nsb->_sendtype[_i], _nsb->_vdata_index[_i], _nsb->_weight_index[_i], _nt->_id, _nsb->_pnt_index[_i], _nsb->_nsb_t[_i], _nsb->_nsb_flag[_i]); }} _nsb->_cnt = 0; #if defined(_OPENACC) && !defined(DISABLE_OPENACC) #pragma acc update device(_nsb->_cnt) if(_nt->compute_gpu) #endif #endif } static double _nrn_current(_threadargsproto_, double _v){double _current=0.;v=_v;{ { g = w * gmax * O ; i = g * ( v - Erev ) ; } _current += i; } return _current; } #if defined(ENABLE_CUDA_INTERFACE) && defined(_OPENACC) void nrn_state_launcher(NrnThread*, Memb_list*, int, int); void nrn_jacob_launcher(NrnThread*, Memb_list*, int, int); void nrn_cur_launcher(NrnThread*, Memb_list*, int, int); #endif void nrn_cur(NrnThread* _nt, Memb_list* _ml, int _type) { double* _p; Datum* _ppvar; ThreadDatum* _thread; int* _ni; double _rhs, _g, _v, v; int _iml, _cntml_padded, _cntml_actual; _ni = _ml->_nodeindices; _cntml_actual = _ml->_nodecount; _cntml_padded = _ml->_nodecount_padded; _thread = _ml->_thread; double * _vec_rhs = _nt->_actual_rhs; double * _vec_d = _nt->_actual_d; double * _vec_shadow_rhs = _nt->_shadow_rhs; double * _vec_shadow_d = _nt->_shadow_d; #if defined(ENABLE_CUDA_INTERFACE) && defined(_OPENACC) && !defined(DISABLE_OPENACC) NrnThread* d_nt = acc_deviceptr(_nt); Memb_list* d_ml = acc_deviceptr(_ml); nrn_cur_launcher(d_nt, d_ml, _type, _cntml_actual); return; #endif double * _nt_data = _nt->_data; double * _vec_v = _nt->_actual_v; int stream_id = _nt->stream_id; #if LAYOUT == 1 /*AoS*/ for (_iml = 0; _iml < _cntml_actual; ++_iml) { _p = _ml->_data + _iml*_psize; _ppvar = _ml->_pdata + _iml*_ppsize; #elif LAYOUT == 0 /*SoA*/ _p = _ml->_data; _ppvar = _ml->_pdata; /* insert compiler dependent ivdep like pragma */ _PRAGMA_FOR_VECTOR_LOOP_ _PRAGMA_FOR_CUR_SYN_ACC_LOOP_ for (_iml = 0; _iml < _cntml_actual; ++_iml) { #else /* LAYOUT > 1 */ /*AoSoA*/ #error AoSoA not implemented. for (;;) { /* help clang-format properly indent */ #endif int _nd_idx = _ni[_iml]; _v = _vec_v[_nd_idx]; _PRCELLSTATE_V nao = _ion_nao; _g = _nrn_current(_threadargs_, _v + .001); { _rhs = _nrn_current(_threadargs_, _v); } _g = (_g - _rhs)/.001; double _mfact = 1.e2/(_nd_area); _g *= _mfact; _rhs *= _mfact; _PRCELLSTATE_G #ifdef _OPENACC if(_nt->compute_gpu) { #pragma acc atomic update _vec_rhs[_nd_idx] -= _rhs; #pragma acc atomic update _vec_d[_nd_idx] += _g; } else { _vec_shadow_rhs[_iml] = _rhs; _vec_shadow_d[_iml] = _g; } #else _vec_shadow_rhs[_iml] = _rhs; _vec_shadow_d[_iml] = _g; #endif } #ifdef _OPENACC if(!(_nt->compute_gpu)) { for (_iml = 0; _iml < _cntml_actual; ++_iml) { int _nd_idx = _ni[_iml]; _vec_rhs[_nd_idx] -= _vec_shadow_rhs[_iml]; _vec_d[_nd_idx] += _vec_shadow_d[_iml]; } #else for (_iml = 0; _iml < _cntml_actual; ++_iml) { int _nd_idx = _ni[_iml]; _vec_rhs[_nd_idx] -= _vec_shadow_rhs[_iml]; _vec_d[_nd_idx] += _vec_shadow_d[_iml]; #endif } } void nrn_state(NrnThread* _nt, Memb_list* _ml, int _type) { double* _p; Datum* _ppvar; ThreadDatum* _thread; double v, _v = 0.0; int* _ni; int _iml, _cntml_padded, _cntml_actual; _ni = _ml->_nodeindices; _cntml_actual = _ml->_nodecount; _cntml_padded = _ml->_nodecount_padded; _thread = _ml->_thread; #if defined(ENABLE_CUDA_INTERFACE) && defined(_OPENACC) && !defined(DISABLE_OPENACC) NrnThread* d_nt = acc_deviceptr(_nt); Memb_list* d_ml = acc_deviceptr(_ml); nrn_state_launcher(d_nt, d_ml, _type, _cntml_actual); return; #endif double * _nt_data = _nt->_data; double * _vec_v = _nt->_actual_v; int stream_id = _nt->stream_id; #if LAYOUT == 1 /*AoS*/ for (_iml = 0; _iml < _cntml_actual; ++_iml) { _p = _ml->_data + _iml*_psize; _ppvar = _ml->_pdata + _iml*_ppsize; #elif LAYOUT == 0 /*SoA*/ _p = _ml->_data; _ppvar = _ml->_pdata; /* insert compiler dependent ivdep like pragma */ _PRAGMA_FOR_VECTOR_LOOP_ _PRAGMA_FOR_STATE_ACC_LOOP_ for (_iml = 0; _iml < _cntml_actual; ++_iml) { #else /* LAYOUT > 1 */ /*AoSoA*/ #error AoSoA not implemented. for (;;) { /* help clang-format properly indent */ #endif int _nd_idx = _ni[_iml]; _v = _vec_v[_nd_idx]; _PRCELLSTATE_V v=_v; { nao = _ion_nao; { #if !defined(_kinetic_kstates_NMDA16_2) #define _kinetic_kstates_NMDA16_2 0 #endif sparse_thread((SparseObj*)_thread[_spth1]._pvoid, 16, _slist1, _dlist1, &t, dt, _kinetic_kstates_NMDA16_2, _linmat1, _threadargs_); }}} } static void terminal(){} static void _initlists(){ double _x; double* _p = &_x; int _i; static int _first = 1; int _cntml_actual=1; int _cntml_padded=1; int _iml=0; if (!_first) return; _slist1 = (int*)malloc(sizeof(int)*16); _dlist1 = (int*)malloc(sizeof(int)*16); _slist1[0] = &(RA2sMg) - _p; _dlist1[0] = &(DRA2sMg) - _p; _slist1[1] = &(OMg) - _p; _dlist1[1] = &(DOMg) - _p; _slist1[2] = &(O) - _p; _dlist1[2] = &(DO) - _p; _slist1[3] = &(RA2fMg) - _p; _dlist1[3] = &(DRA2fMg) - _p; _slist1[4] = &(RA2d2Mg) - _p; _dlist1[4] = &(DRA2d2Mg) - _p; _slist1[5] = &(RA2d1Mg) - _p; _dlist1[5] = &(DRA2d1Mg) - _p; _slist1[6] = &(RA2Mg) - _p; _dlist1[6] = &(DRA2Mg) - _p; _slist1[7] = &(RAMg) - _p; _dlist1[7] = &(DRAMg) - _p; _slist1[8] = &(RMg) - _p; _dlist1[8] = &(DRMg) - _p; _slist1[9] = &(RA2s) - _p; _dlist1[9] = &(DRA2s) - _p; _slist1[10] = &(RA2f) - _p; _dlist1[10] = &(DRA2f) - _p; _slist1[11] = &(RA2d2) - _p; _dlist1[11] = &(DRA2d2) - _p; _slist1[12] = &(RA2d1) - _p; _dlist1[12] = &(DRA2d1) - _p; _slist1[13] = &(RA2) - _p; _dlist1[13] = &(DRA2) - _p; _slist1[14] = &(RA) - _p; _dlist1[14] = &(DRA) - _p; _slist1[15] = &(R) - _p; _dlist1[15] = &(DR) - _p; #pragma acc enter data copyin(_slist1[0:16]) #pragma acc enter data copyin(_dlist1[0:16]) _first = 0; } } // namespace coreneuron_lib