static SkImageFilter* make_image_filter(bool canBeNull = true) { SkImageFilter* filter = 0; // Add a 1 in 3 chance to get a NULL input if (canBeNull && (R(3) == 1)) { return filter; } enum { ALPHA_THRESHOLD, MERGE, COLOR, BLUR, MAGNIFIER, DOWN_SAMPLE, XFERMODE, OFFSET, MATRIX, MATRIX_CONVOLUTION, COMPOSE, DISTANT_LIGHT, POINT_LIGHT, SPOT_LIGHT, NOISE, DROP_SHADOW, MORPHOLOGY, BITMAP, DISPLACE, TILE, PICTURE, NUM_FILTERS }; switch (R(NUM_FILTERS)) { case ALPHA_THRESHOLD: filter = SkAlphaThresholdFilter::Create(make_region(), make_scalar(), make_scalar()); break; case MERGE: filter = SkMergeImageFilter::Create(make_image_filter(), make_image_filter(), make_xfermode()); break; case COLOR: { SkAutoTUnref<SkColorFilter> cf((R(2) == 1) ? SkColorFilter::CreateModeFilter(make_color(), make_xfermode()) : SkColorFilter::CreateLightingFilter(make_color(), make_color())); filter = cf.get() ? SkColorFilterImageFilter::Create(cf, make_image_filter()) : 0; } break; case BLUR: filter = SkBlurImageFilter::Create(make_scalar(true), make_scalar(true), make_image_filter()); break; case MAGNIFIER: filter = SkMagnifierImageFilter::Create(make_rect(), make_scalar(true)); break; case DOWN_SAMPLE: filter = SkDownSampleImageFilter::Create(make_scalar()); break; case XFERMODE: { SkAutoTUnref<SkXfermode> mode(SkXfermode::Create(make_xfermode())); filter = SkXfermodeImageFilter::Create(mode, make_image_filter(), make_image_filter()); } break; case OFFSET: filter = SkOffsetImageFilter::Create(make_scalar(), make_scalar(), make_image_filter()); break; case MATRIX: filter = SkMatrixImageFilter::Create(make_matrix(), (SkPaint::FilterLevel)R(4), make_image_filter()); break; case MATRIX_CONVOLUTION: { SkImageFilter::CropRect cropR(SkRect::MakeWH(SkIntToScalar(kBitmapSize), SkIntToScalar(kBitmapSize))); SkISize size = SkISize::Make(R(10)+1, R(10)+1); int arraySize = size.width() * size.height(); SkTArray<SkScalar> kernel(arraySize); for (int i = 0; i < arraySize; ++i) { kernel.push_back() = make_scalar(); } SkIPoint kernelOffset = SkIPoint::Make(R(SkIntToScalar(size.width())), R(SkIntToScalar(size.height()))); filter = SkMatrixConvolutionImageFilter::Create(size, kernel.begin(), make_scalar(), make_scalar(), kernelOffset, (SkMatrixConvolutionImageFilter::TileMode)R(3), R(2) == 1, make_image_filter(), &cropR); } break; case COMPOSE: filter = SkComposeImageFilter::Create(make_image_filter(), make_image_filter()); break; case DISTANT_LIGHT: filter = (R(2) == 1) ? SkLightingImageFilter::CreateDistantLitDiffuse(make_point(), make_color(), make_scalar(), make_scalar(), make_image_filter()) : SkLightingImageFilter::CreateDistantLitSpecular(make_point(), make_color(), make_scalar(), make_scalar(), SkIntToScalar(R(10)), make_image_filter()); break; case POINT_LIGHT: filter = (R(2) == 1) ? SkLightingImageFilter::CreatePointLitDiffuse(make_point(), make_color(), make_scalar(), make_scalar(), make_image_filter()) : SkLightingImageFilter::CreatePointLitSpecular(make_point(), make_color(), make_scalar(), make_scalar(), SkIntToScalar(R(10)), make_image_filter()); break; case SPOT_LIGHT: filter = (R(2) == 1) ? SkLightingImageFilter::CreateSpotLitDiffuse(SkPoint3(0, 0, 0), make_point(), make_scalar(), make_scalar(), make_color(), make_scalar(), make_scalar(), make_image_filter()) : SkLightingImageFilter::CreateSpotLitSpecular(SkPoint3(0, 0, 0), make_point(), make_scalar(), make_scalar(), make_color(), make_scalar(), make_scalar(), SkIntToScalar(R(10)), make_image_filter()); break; case NOISE: { SkAutoTUnref<SkShader> shader((R(2) == 1) ? SkPerlinNoiseShader::CreateFractalNoise( make_scalar(true), make_scalar(true), R(10.0f), make_scalar()) : SkPerlinNoiseShader::CreateTurbulence( make_scalar(true), make_scalar(true), R(10.0f), make_scalar())); SkImageFilter::CropRect cropR(SkRect::MakeWH(SkIntToScalar(kBitmapSize), SkIntToScalar(kBitmapSize))); filter = SkRectShaderImageFilter::Create(shader, &cropR); } break; case DROP_SHADOW: filter = SkDropShadowImageFilter::Create(make_scalar(), make_scalar(), make_scalar(true), make_scalar(true), make_color(), make_image_filter()); break; case MORPHOLOGY: if (R(2) == 1) { filter = SkDilateImageFilter::Create(R(static_cast<float>(kBitmapSize)), R(static_cast<float>(kBitmapSize)), make_image_filter()); } else { filter = SkErodeImageFilter::Create(R(static_cast<float>(kBitmapSize)), R(static_cast<float>(kBitmapSize)), make_image_filter()); } break; case BITMAP: if (R(2) == 1) { filter = SkBitmapSource::Create(make_bitmap(), make_rect(), make_rect()); } else { filter = SkBitmapSource::Create(make_bitmap()); } break; case DISPLACE: filter = SkDisplacementMapEffect::Create(make_channel_selector_type(), make_channel_selector_type(), make_scalar(), make_image_filter(false), make_image_filter()); break; case TILE: filter = SkTileImageFilter::Create(make_rect(), make_rect(), make_image_filter(false)); break; case PICTURE: { SkRTreeFactory factory; SkPictureRecorder recorder; SkCanvas* recordingCanvas = recorder.beginRecording(SkIntToScalar(kBitmapSize), SkIntToScalar(kBitmapSize), &factory, 0); drawSomething(recordingCanvas); SkAutoTUnref<SkPicture> pict(recorder.endRecording()); filter = SkPictureImageFilter::Create(pict.get(), make_rect()); } break; default: break; } return (filter || canBeNull) ? filter : make_image_filter(canBeNull); }
inline void nest::ConnBuilder::single_connect_( index sgid, Node& target, thread target_thread, librandom::RngPtr& rng ) { if ( param_dicts_.empty() ) // indicates we have no synapse params { if ( default_weight_and_delay_ ) kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_ ); else if ( default_weight_ ) kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_, delay_->value_double( target_thread, rng ) ); else { double delay = delay_->value_double( target_thread, rng ); double weight = weight_->value_double( target_thread, rng ); kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_, delay, weight ); } } else { assert( kernel().vp_manager.get_num_threads() == param_dicts_.size() ); for ( ConnParameterMap::const_iterator it = synapse_params_.begin(); it != synapse_params_.end(); ++it ) { if ( it->first == names::receptor_type || it->first == names::music_channel || it->first == names::synapse_label ) { try { // change value of dictionary entry without allocating new datum IntegerDatum* id = static_cast< IntegerDatum* >( ( ( *param_dicts_[ target_thread ] )[ it->first ] ).datum() ); ( *id ) = it->second->value_int( target_thread, rng ); } catch ( KernelException& e ) { if ( it->first == names::receptor_type ) { throw BadProperty( "Receptor type must be of type integer." ); } else if ( it->first == names::music_channel ) { throw BadProperty( "Music channel type must be of type integer." ); } else if ( it->first == names::synapse_label ) { throw BadProperty( "Synapse label must be of type integer." ); } } } else { // change value of dictionary entry without allocating new datum DoubleDatum* dd = static_cast< DoubleDatum* >( ( ( *param_dicts_[ target_thread ] )[ it->first ] ).datum() ); ( *dd ) = it->second->value_double( target_thread, rng ); } } if ( default_weight_and_delay_ ) kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_, param_dicts_[ target_thread ] ); else if ( default_weight_ ) kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_, param_dicts_[ target_thread ], delay_->value_double( target_thread, rng ) ); else { double delay = delay_->value_double( target_thread, rng ); double weight = weight_->value_double( target_thread, rng ); kernel().connection_manager.connect( sgid, &target, target_thread, synapse_model_, param_dicts_[ target_thread ], delay, weight ); } } }
void nest::OneToOneBuilder::connect_() { // make sure that target and source population have the same size if ( sources_->size() != targets_->size() ) { LOG( M_ERROR, "Connect", "Source and Target population must be of the same size." ); throw DimensionMismatch(); } #pragma omp parallel { // get thread id const int tid = kernel().vp_manager.get_thread_id(); try { // allocate pointer to thread specific random generator librandom::RngPtr rng = kernel().rng_manager.get_rng( tid ); for ( GIDCollection::const_iterator tgid = targets_->begin(), sgid = sources_->begin(); tgid != targets_->end(); ++tgid, ++sgid ) { assert( sgid != sources_->end() ); if ( *sgid == *tgid and not autapses_ ) continue; // check whether the target is on this mpi machine if ( not kernel().node_manager.is_local_gid( *tgid ) ) { skip_conn_parameter_( tid ); continue; } Node* const target = kernel().node_manager.get_node( *tgid ); const thread target_thread = target->get_thread(); // check whether the target is on our thread if ( tid != target_thread ) { skip_conn_parameter_( tid ); continue; } single_connect_( *sgid, *target, target_thread, rng ); } } catch ( std::exception& err ) { // We must create a new exception here, err's lifetime ends at // the end of the catch block. exceptions_raised_.at( tid ) = lockPTR< WrappedThreadException >( new WrappedThreadException( err ) ); } } }
// returns the done value bool EventDeliveryManager::deliver_events( thread t ) { // are we done? bool done = true; // deliver only at beginning of time slice if ( kernel().simulation_manager.get_from_step() > 0 ) return done; SpikeEvent se; std::vector< int > pos( displacements_ ); if ( !off_grid_spiking_ ) // on_grid_spiking { // prepare Time objects for every possible time stamp within min_delay_ std::vector< Time > prepared_timestamps( kernel().connection_manager.get_min_delay() ); for ( size_t lag = 0; lag < ( size_t ) kernel().connection_manager.get_min_delay(); lag++ ) { prepared_timestamps[ lag ] = kernel().simulation_manager.get_clock() - Time::step( lag ); } for ( size_t vp = 0; vp < ( size_t ) kernel().vp_manager.get_num_virtual_processes(); ++vp ) { size_t pid = kernel().mpi_manager.get_process_id( vp ); int pos_pid = pos[ pid ]; int lag = kernel().connection_manager.get_min_delay() - 1; while ( lag >= 0 ) { index nid = global_grid_spikes_[ pos_pid ]; if ( nid != static_cast< index >( comm_marker_ ) ) { // tell all local nodes about spikes on remote machines. se.set_stamp( prepared_timestamps[ lag ] ); se.set_sender_gid( nid ); kernel().connection_manager.send( t, nid, se ); } else { --lag; } ++pos_pid; } pos[ pid ] = pos_pid; } // here we are done with the spiking events // pos[pid] for each pid now points to the first entry of // the secondary events for ( size_t pid = 0; pid < ( size_t ) kernel().mpi_manager.get_num_processes(); ++pid ) { std::vector< unsigned int >::iterator readpos = global_grid_spikes_.begin() + pos[ pid ]; while ( true ) { // we must not use unsigned int for the type, otherwise // the encoding will be different on JUQUEEN for the // index written into the buffer and read out of it synindex synid; read_from_comm_buffer( synid, readpos ); if ( synid == invalid_synindex ) break; --readpos; kernel().model_manager.assert_valid_syn_id( synid ); kernel().model_manager.get_secondary_event_prototype( synid, t ) << readpos; kernel().connection_manager.send_secondary( t, kernel().model_manager.get_secondary_event_prototype( synid, t ) ); } // of while (true) // read the done value of the p-th num_process // must be a bool (same type as on the sending side) // otherwise the encoding will be inconsistent on JUQUEEN bool done_p; read_from_comm_buffer( done_p, readpos ); done = done && done_p; } } else // off grid spiking { // prepare Time objects for every possible time stamp within min_delay_ std::vector< Time > prepared_timestamps( kernel().connection_manager.get_min_delay() ); for ( size_t lag = 0; lag < ( size_t ) kernel().connection_manager.get_min_delay(); lag++ ) { prepared_timestamps[ lag ] = kernel().simulation_manager.get_clock() - Time::step( lag ); } for ( size_t vp = 0; vp < ( size_t ) kernel().vp_manager.get_num_virtual_processes(); ++vp ) { size_t pid = kernel().mpi_manager.get_process_id( vp ); int pos_pid = pos[ pid ]; int lag = kernel().connection_manager.get_min_delay() - 1; while ( lag >= 0 ) { index nid = global_offgrid_spikes_[ pos_pid ].get_gid(); if ( nid != static_cast< index >( comm_marker_ ) ) { // tell all local nodes about spikes on remote machines. se.set_stamp( prepared_timestamps[ lag ] ); se.set_sender_gid( nid ); se.set_offset( global_offgrid_spikes_[ pos_pid ].get_offset() ); kernel().connection_manager.send( t, nid, se ); } else { --lag; } ++pos_pid; } pos[ pid ] = pos_pid; } } return done; }
void nest::FixedTotalNumberBuilder::connect_() { const int_t M = kernel().vp_manager.get_num_virtual_processes(); const long_t size_sources = sources_->size(); const long_t size_targets = targets_->size(); // drawing connection ids // Compute the distribution of targets over processes using the modulo // function std::vector< std::vector< size_t > > targets_on_vp( M ); for ( size_t t = 0; t < targets_->size(); t++ ) { targets_on_vp[ kernel().vp_manager.suggest_vp( ( *targets_ )[ t ] ) ] .push_back( ( *targets_ )[ t ] ); } // We use the multinomial distribution to determine the number of // connections that will be made on one virtual process, i.e. we // partition the set of edges into n_vps subsets. The number of // edges on one virtual process is binomially distributed with // the boundary condition that the sum of all edges over virtual // processes is the total number of edges. // To obtain the num_conns_on_vp we adapt the gsl // implementation of the multinomial distribution. // K from gsl is equivalent to M = n_vps // N is already taken from stack // p[] is targets_on_vp std::vector< long_t > num_conns_on_vp( M, 0 ); // corresponds to n[] // calculate exact multinomial distribution // get global rng that is tested for synchronization for all threads librandom::RngPtr grng = kernel().rng_manager.get_grng(); // HEP: instead of counting upwards, we might count remaining_targets and // remaining_partitions down. why? // begin code adapted from gsl 1.8 // double_t sum_dist = 0.0; // corresponds to sum_p // norm is equivalent to size_targets uint_t sum_partitions = 0; // corresponds to sum_n // substituting gsl_ran call #ifdef HAVE_GSL librandom::GSL_BinomialRandomDev bino( grng, 0, 0 ); #else librandom::BinomialRandomDev bino( grng, 0, 0 ); #endif for ( int k = 0; k < M; k++ ) { if ( targets_on_vp[ k ].size() > 0 ) { double_t num_local_targets = static_cast< double_t >( targets_on_vp[ k ].size() ); double_t p_local = num_local_targets / ( size_targets - sum_dist ); bino.set_p( p_local ); bino.set_n( N_ - sum_partitions ); num_conns_on_vp[ k ] = bino.ldev(); } sum_dist += static_cast< double_t >( targets_on_vp[ k ].size() ); sum_partitions += static_cast< uint_t >( num_conns_on_vp[ k ] ); } // end code adapted from gsl 1.8 #pragma omp parallel { // get thread id const int tid = kernel().vp_manager.get_thread_id(); try { // allocate pointer to thread specific random generator const int_t vp_id = kernel().vp_manager.thread_to_vp( tid ); if ( kernel().vp_manager.is_local_vp( vp_id ) ) { librandom::RngPtr rng = kernel().rng_manager.get_rng( tid ); while ( num_conns_on_vp[ vp_id ] > 0 ) { // draw random numbers for source node from all source neurons const long_t s_index = rng->ulrand( size_sources ); // draw random numbers for target node from // targets_on_vp on this virtual process const long_t t_index = rng->ulrand( targets_on_vp[ vp_id ].size() ); // map random number of source node to gid corresponding to // the source_adr vector const long_t sgid = ( *sources_ )[ s_index ]; // map random number of target node to gid using the // targets_on_vp vector const long_t tgid = targets_on_vp[ vp_id ][ t_index ]; Node* const target = kernel().node_manager.get_node( tgid ); const thread target_thread = target->get_thread(); if ( autapses_ or sgid != tgid ) { single_connect_( sgid, *target, target_thread, rng ); num_conns_on_vp[ vp_id ]--; } } } } catch ( std::exception& err ) { // We must create a new exception here, err's lifetime ends at // the end of the catch block. exceptions_raised_.at( tid ) = lockPTR< WrappedThreadException >( new WrappedThreadException( err ) ); } } }
inline kernel_call bluestein_mul_out( const backend::command_queue &queue, size_t batch, size_t p, size_t radix, size_t threads, size_t stride, const backend::device_vector<T2> &data, const backend::device_vector<T2> &exp, const backend::device_vector<T2> &out ) { backend::source_generator o; kernel_common<T>(o, queue); mul_code<T2>(o, false); o.function<T2>("scale").open("(") .template parameter<T2>("x") .template parameter<T >("a") .close(")").open("{"); o.new_line() << type_name<T2>() << " r = {x.x * a, x.y * a};"; o.new_line() << "return r;"; o.close("}"); o.kernel("bluestein_mul_out").open("(") .template parameter< global_ptr<const T2> >("data") .template parameter< global_ptr<const T2> >("exp") .template parameter< global_ptr< T2> >("output") .template parameter< T >("div") .template parameter< cl_uint >("p") .template parameter< cl_uint >("in_stride") .template parameter< cl_uint >("radix") .close(")").open("{"); o.new_line() << "const size_t i = " << o.global_id(0) << ";"; o.new_line() << "const size_t threads = " << o.global_size(0) << ";"; o.new_line() << "const size_t b = " << o.global_id(1) << ";"; o.new_line() << "const size_t l = " << o.global_id(2) << ";"; o.new_line() << "if(l < radix)"; o.open("{"); o.new_line() << "const size_t k = i % p;"; o.new_line() << "const size_t j = k + (i - k) * radix;"; o.new_line() << "const size_t in_off = i * in_stride + b * in_stride * threads + l;"; o.new_line() << "const size_t out_off = j + b * threads * radix + l * p;"; o.new_line() << "output[out_off] = mul(scale(data[in_off], div), exp[l]);"; o.close("}"); o.close("}"); backend::kernel kernel(queue, o.str(), "bluestein_mul_out"); kernel.push_arg(data); kernel.push_arg(exp); kernel.push_arg(out); kernel.push_arg(static_cast<T>(1.0 / stride)); kernel.push_arg(static_cast<cl_uint>(p)); kernel.push_arg(static_cast<cl_uint>(stride)); kernel.push_arg(static_cast<cl_uint>(radix)); const size_t wg = kernel.preferred_work_group_size_multiple(queue); const size_t radix_pad = (radix + wg - 1) / wg; kernel.config( backend::ndrange(threads, batch, radix_pad), backend::ndrange( 1, 1, wg) ); std::ostringstream desc; desc << "bluestein_mul_out{r=" << radix << "(" << radix_pad << "), wg=" << wg << ", batch=" << batch << ", p=" << p << ", thr=" << threads << ", stride=" << stride << "}"; return kernel_call(false, desc.str(), kernel); }
void EventDeliveryManager::configure_spike_buffers() { assert( kernel().connection_manager.get_min_delay() != 0 ); spike_register_.clear(); // the following line does not compile with gcc <= 3.3.5 spike_register_.resize( kernel().vp_manager.get_num_threads(), std::vector< std::vector< unsigned int > >( kernel().connection_manager.get_min_delay() ) ); for ( size_t j = 0; j < spike_register_.size(); ++j ) for ( size_t k = 0; k < spike_register_[ j ].size(); ++k ) spike_register_[ j ][ k ].clear(); offgrid_spike_register_.clear(); // the following line does not compile with gcc <= 3.3.5 offgrid_spike_register_.resize( kernel().vp_manager.get_num_threads(), std::vector< std::vector< OffGridSpike > >( kernel().connection_manager.get_min_delay() ) ); for ( size_t j = 0; j < offgrid_spike_register_.size(); ++j ) for ( size_t k = 0; k < offgrid_spike_register_[ j ].size(); ++k ) offgrid_spike_register_[ j ][ k ].clear(); // this should also clear all contained elements // so no loop required secondary_events_buffer_.clear(); secondary_events_buffer_.resize( kernel().vp_manager.get_num_threads() ); // send_buffer must be >= 2 as the 'overflow' signal takes up 2 spaces // plus the fiunal marker and the done flag for iterations // + 1 for the final markers of each thread (invalid_synindex) of secondary // events // + 1 for the done flag (true) of each process int send_buffer_size = kernel().vp_manager.get_num_threads() * kernel().connection_manager.get_min_delay() + 2 > 4 ? kernel().vp_manager.get_num_threads() * kernel().connection_manager.get_min_delay() + 2 : 4; int recv_buffer_size = send_buffer_size * kernel().mpi_manager.get_num_processes(); kernel().mpi_manager.set_buffer_sizes( send_buffer_size, recv_buffer_size ); // DEC cxx required 0U literal, HEP 2007-03-26 local_grid_spikes_.clear(); local_grid_spikes_.resize( send_buffer_size, 0U ); local_offgrid_spikes_.clear(); local_offgrid_spikes_.resize( send_buffer_size, OffGridSpike( 0, 0.0 ) ); global_grid_spikes_.clear(); global_grid_spikes_.resize( recv_buffer_size, 0U ); // insert the end marker for payload event (==invalid_synindex) // and insert the done flag (==true) // after min_delay 0's (== comm_marker) // use the template functions defined in event.h // this only needs to be done for one process, because displacements is set to // 0 so all processes initially read out the same positions in the global // spike buffer std::vector< unsigned int >::iterator pos = global_grid_spikes_.begin() + kernel().vp_manager.get_num_threads() * kernel().connection_manager.get_min_delay(); write_to_comm_buffer( invalid_synindex, pos ); write_to_comm_buffer( true, pos ); global_offgrid_spikes_.clear(); global_offgrid_spikes_.resize( recv_buffer_size, OffGridSpike( 0, 0.0 ) ); displacements_.clear(); displacements_.resize( kernel().mpi_manager.get_num_processes(), 0 ); }
/* ============================================================== Main MEX function - interface to Matlab. ============================================================== */ void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { long i, j, k, m; long nsv, new_dim, num_data; double *Alpha; double *b; double *Y; double k_ij; ker_cnt = 0; /* Y = kernelproj_mex(X, Alpha, b, sv_X, ker, arg ) */ /* ------------------------------------------- */ if( nrhs == 6) { /* data matrix [dim x num_data] */ if( !mxIsNumeric(prhs[0]) || !mxIsDouble(prhs[0]) || mxIsEmpty(prhs[0]) || mxIsComplex(prhs[0]) ) mexErrMsgTxt("Input data must be a real matrix."); /* multipliers Alpha [nsv x new_dim] */ if( !mxIsNumeric(prhs[1]) || !mxIsDouble(prhs[1]) || mxIsEmpty(prhs[1]) || mxIsComplex(prhs[1]) ) mexErrMsgTxt("Input Alpha must be a real matrix."); /* vector b [nsv x 1] */ if( !mxIsNumeric(prhs[2]) || !mxIsDouble(prhs[2]) || mxIsEmpty(prhs[2]) || mxIsComplex(prhs[2]) ) mexErrMsgTxt("Input b must be a real vector."); /* kernel identifier */ ker = kernel_id( prhs[4] ); if( ker == -1 ) mexErrMsgTxt("Improper kernel identifier."); /* get pointer to arguments */ arg1 = mxGetPr(prhs[5]); /* get pointer at input vectors */ dataA = mxGetPr(prhs[0]); Alpha = mxGetPr(prhs[1]); b = mxGetPr(prhs[2]); dataB = mxGetPr(prhs[3]); /* get data dimensions */ dim = mxGetM(prhs[0]); num_data = mxGetN(prhs[0]); nsv = mxGetM(prhs[1]); new_dim = mxGetN(prhs[1]); if( mxGetM(prhs[2]) != new_dim) mexErrMsgTxt("Number of rows of Alpha must equal to size of vector b."); /* creates output kernel matrix. */ plhs[0] = mxCreateDoubleMatrix(new_dim,num_data,mxREAL); Y = mxGetPr(plhs[0]); /* computes kernel projection */ for( i = 0; i < num_data; i++ ) { for( k = 0; k < new_dim; k++) { Y[k+i*new_dim] = b[k]; } for( j = 0; j < nsv; j++ ) { k_ij = kernel(i,j); for( k = 0; k < new_dim; k++) { if(Alpha[j+k*nsv] != 0 ) Y[k+i*new_dim] += k_ij*Alpha[j+k*nsv]; } } } } else { mexErrMsgTxt("Wrong number of input arguments."); } return; }
void nest::iaf_psc_alpha_presc::update( Time const& origin, const long_t from, const long_t to ) { assert( to >= 0 ); assert( static_cast< delay >( from ) < kernel().connection_manager.get_min_delay() ); assert( from < to ); /* Neurons may have been initialized to superthreshold potentials. We need to check for this here and issue spikes at the beginning of the interval. */ if ( S_.y3_ >= P_.U_th_ ) { S_.last_spike_step_ = origin.get_steps() + from + 1; S_.last_spike_offset_ = V_.h_ms_ * ( 1 - std::numeric_limits< double_t >::epsilon() ); // reset neuron and make it refractory S_.y3_ = P_.U_reset_; S_.r_ = V_.refractory_steps_; // send spike set_spiketime( Time::step( S_.last_spike_step_ ), S_.last_spike_offset_ ); SpikeEvent se; se.set_offset( S_.last_spike_offset_ ); kernel().event_delivery_manager.send( *this, se, from ); } for ( long_t lag = from; lag < to; ++lag ) { // time at start of update step const long_t T = origin.get_steps() + lag; // save state at beginning of interval for spike-time interpolation V_.y0_before_ = S_.y0_; V_.y1_before_ = S_.y1_; V_.y2_before_ = S_.y2_; V_.y3_before_ = S_.y3_; /* obtain input to y3_ We need to collect this value even while the neuron is refractory, since we need to clear any spikes that have come in from the ring buffer. */ const double_t dy3 = B_.spike_y3_.get_value( lag ); if ( S_.r_ == 0 ) { // neuron is not refractory S_.y3_ = V_.P30_ * ( P_.I_e_ + S_.y0_ ) + V_.P31_ * S_.y1_ + V_.P32_ * S_.y2_ + V_.expm1_tau_m_ * S_.y3_ + S_.y3_; S_.y3_ += dy3; // add input // enforce lower bound S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ ); } else if ( S_.r_ == 1 ) { // neuron returns from refractoriness during interval S_.r_ = 0; // Iterate third component (membrane pot) from end of // refractory period to end of interval. As first-order // approximation, add a proportion of the effect of synaptic // input during the interval to membrane pot. The proportion // is given by the part of the interval after the end of the // refractory period. S_.y3_ = P_.U_reset_ + // try fix 070623, md update_y3_delta_() + dy3 - dy3 * ( 1 - S_.last_spike_offset_ / V_.h_ms_ ); // enforce lower bound S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ ); } else { // neuron is refractory // y3_ remains unchanged at 0.0 --S_.r_; } // update synaptic currents S_.y2_ = V_.expm1_tau_syn_ * V_.h_ms_ * S_.y1_ + V_.expm1_tau_syn_ * S_.y2_ + V_.h_ms_ * S_.y1_ + S_.y2_; S_.y1_ = V_.expm1_tau_syn_ * S_.y1_ + S_.y1_; // add synaptic inputs from the ring buffer // this must happen BEFORE threshold-crossing interpolation, // since synaptic inputs occured during the interval S_.y1_ += B_.spike_y1_.get_value( lag ); S_.y2_ += B_.spike_y2_.get_value( lag ); // neuron spikes if ( S_.y3_ >= P_.U_th_ ) { // compute spike time S_.last_spike_step_ = T + 1; // The time for the threshpassing S_.last_spike_offset_ = V_.h_ms_ - thresh_find_( V_.h_ms_ ); // reset AFTER spike-time interpolation S_.y3_ = P_.U_reset_; S_.r_ = V_.refractory_steps_; // sent event set_spiketime( Time::step( S_.last_spike_step_ ), S_.last_spike_offset_ ); SpikeEvent se; se.set_offset( S_.last_spike_offset_ ); kernel().event_delivery_manager.send( *this, se, lag ); } // Set new input current. The current change occurs at the // end of the interval and thus must come AFTER the threshold- // crossing interpolation S_.y0_ = B_.currents_.get_value( lag ); // logging B_.logger_.record_data( origin.get_steps() + lag ); } // from lag = from ... }
int main(int argc, char* argv[]) { const int m = (1 < argc ? atoi(argv[1]) : 16); const int n = (2 < argc ? atoi(argv[2]) : m); const int unsigned ldi = LIBXSMM_MAX(3 < argc ? atoi(argv[3]) : 0, m); const int unsigned ldo = LIBXSMM_MAX(4 < argc ? atoi(argv[4]) : 0, m); const int unroll = (5 < argc ? atoi(argv[5]) : 1); const int prefetch = (6 < argc ? atoi(argv[6]) : 0); const int flags = ((7 < argc && 0 != atoi(argv[7])) ? LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE : 0); const int iters = (8 < argc ? atoi(argv[8]) : 1); /* we should modify to test all data-types */ const libxsmm_mcopy_descriptor* desc; libxsmm_xmcopyfunction kernel; libxsmm_descriptor_blob blob; libxsmm_timer_tickint l_start; libxsmm_timer_tickint l_end; int error = 0, i, j; ELEM_TYPE *a, *b; double copy_time; printf("This is a tester for JIT matcopy kernels!\n"); desc = libxsmm_mcopy_descriptor_init(&blob, sizeof(ELEM_TYPE), m, n, ldo, ldi, flags, prefetch, &unroll); a = (ELEM_TYPE*)malloc(n * ldi * sizeof(ELEM_TYPE)); b = (ELEM_TYPE*)malloc(n * ldo * sizeof(ELEM_TYPE)); for (i = 0; i < n; i++) { for (j = 0; j < m; j++) { a[j+ldi*i] = (ELEM_TYPE)rand(); if (0 != (LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE & flags)) { b[j+ldo*i] = (ELEM_TYPE)rand(); } } } /* test dispatch call */ kernel = libxsmm_dispatch_mcopy(desc); if (kernel == 0) { printf("JIT error -> exit!!!!\n"); exit(EXIT_FAILURE); } /* let's call */ kernel(a, &ldi, b, &ldo, &a[128]); l_start = libxsmm_timer_tick(); for (i = 0; i < iters; ++i) { kernel(a, &ldi, b, &ldo, &a[128]); } l_end = libxsmm_timer_tick(); copy_time = libxsmm_timer_duration(l_start, l_end); for (i = 0; i < n; ++i) { for (j = 0; j < m; ++j) { if (0 != (LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE & flags)) { if (LIBXSMM_NEQ(b[j+ldo*i], 0)) { printf("ERROR!!!\n"); i = n; error = 1; break; } } else if (LIBXSMM_NEQ(a[j+ldi*i], b[j+ldo*i])) { printf("ERROR!!!\n"); i = n; error = 1; break; } } } if (error == 0) { printf("CORRECT copy!!!!\n"); printf("Time taken is\t%.5f seconds\n", copy_time); } return EXIT_SUCCESS; }
void Points::GenerateKernel(int L, int point_count, std::string title) { int g=point_count, m=0; LEGENDRE P_lm; LEGENDRE Y_P; LEGENDRE dP_lm; std::vector<std::vector<double> > d_kern(g); //kernel for derivative reconstruction std::vector<std::vector<double> > f_kern(g); //kernel for function (test) reconstruction std::vector<std::vector<double> > WT(g); std::complex<double> Y(0,0), Ylm(0,0), dYlm(0,0), Ymp1(0,0), ej(0,0), function(0,0), derivative(0,0); std::complex<double> im(0,1); double th1=0, ph1=0, sign=0; std::cout << title << std::endl; std::ofstream kernel(title); kernel.precision(15); for(int i=0; i<g; i++) { d_kern[i].resize(g); f_kern[i].resize(g); WT[i].resize(g); for(int j=0; j<g; j++) { for(double l=0; l<=L; l++) { for(double m_it=0; m_it<=(2*l); m_it++) { m=0; m = l-m_it; //std::cout << "m = " << m << ", l = " << l << std::endl; ej = m; sign=pow(-1.0,m_it); std::complex<double> exponential_prime(cos( Points::Phi[i]), (-1)*sin(Points::Phi[i])); std::complex<double> exponential(cos(m*Points::Phi[j]), sin(m*Points::Phi[j])); Ylm = P_lm.Yml(m, l, Points::Theta[i], Points::Phi[i]); Y = Y_P.Yml(m, l, Points::Theta[j], Points::Phi[j]); if( Theta[i] != 0 && ((m+1)<=l) ) { Ymp1 = m * (1.0/tan(Points::Theta[i])) * dP_lm.Yml(m, l, Points::Theta[i], Points::Phi[i]) + sqrt( (l-m)*(l+m+1) ) * exponential_prime * dP_lm.Yml(m+1, l, Points::Theta[i], Points::Phi[i]); } ///fill arrays with f=Y*Y for the function kernel and derivative kernel f_kern[i][j] += (conj(Y)*Ylm).real();//Y_real*Y_prime_real; d_kern[i][j] += (conj(Y)*Ymp1).real(); } } ///absorb weights into kernel WT[i][j] = Points::Weight[j]*4.0*PI; kernel << d_kern[i][j]*Points::Weight[j]*4.0*PI << " " << f_kern[i][j]*Points::Weight[j]*4.0*PI << " " << WT[i][j] << std::endl; } } kernel.close(); }
void nest::iaf_cond_exp::update( Time const& origin, const long_t from, const long_t to ) { assert( to >= 0 && ( delay ) from < kernel().connection_manager.get_min_delay() ); assert( from < to ); for ( long_t lag = from; lag < to; ++lag ) { double t = 0.0; // numerical integration with adaptive step size control: // ------------------------------------------------------ // gsl_odeiv_evolve_apply performs only a single numerical // integration step, starting from t and bounded by step; // the while-loop ensures integration over the whole simulation // step (0, step] if more than one integration step is needed due // to a small integration step size; // note that (t+IntegrationStep > step) leads to integration over // (t, step] and afterwards setting t to step, but it does not // enforce setting IntegrationStep to step-t; this is of advantage // for a consistent and efficient integration across subsequent // simulation intervals while ( t < B_.step_ ) { const int status = gsl_odeiv_evolve_apply( B_.e_, B_.c_, B_.s_, &B_.sys_, // system of ODE &t, // from t B_.step_, // to t <= step &B_.IntegrationStep_, // integration step size S_.y_ ); // neuronal state if ( status != GSL_SUCCESS ) throw GSLSolverFailure( get_name(), status ); } S_.y_[ State_::G_EXC ] += B_.spike_exc_.get_value( lag ); S_.y_[ State_::G_INH ] += B_.spike_inh_.get_value( lag ); // absolute refractory period if ( S_.r_ ) { // neuron is absolute refractory --S_.r_; S_.y_[ State_::V_M ] = P_.V_reset_; } else // neuron is not absolute refractory if ( S_.y_[ State_::V_M ] >= P_.V_th_ ) { S_.r_ = V_.RefractoryCounts_; S_.y_[ State_::V_M ] = P_.V_reset_; set_spiketime( Time::step( origin.get_steps() + lag + 1 ) ); SpikeEvent se; kernel().event_delivery_manager.send( *this, se, lag ); } // set new input current B_.I_stim_ = B_.currents_.get_value( lag ); // log state data B_.logger_.record_data( origin.get_steps() + lag ); } }
ocl::Kernel& ocl::Program::kernel(const std::string &name) const { const utl::Type& t = utl::Type::type<T>(); return kernel(name, t); }
void ProcessingThread::run() { while(1) { // Check if paused pauseMutex.lock(); if (paused) { pauseMutex.unlock(); sleep(3); continue; } pauseMutex.unlock(); ///////////////////////////////// // Stop thread if stopped=TRUE // ///////////////////////////////// stoppedMutex.lock(); if (stopped) { stopped = false; stoppedMutex.unlock(); break; } stoppedMutex.unlock(); ///////////////////////////////// ///////////////////////////////// inputMutex.lock(); if (inputMode != INPUT_IMAGE) { inputMutex.unlock(); currentFrame = outputBuffer->getFrame(); } else { inputMutex.unlock(); if (outputBuffer->getSizeOfImageBuffer() > 0) { currentFrame = outputBuffer->getFrame(); } msleep(50); } inputMutex.unlock(); updM.lock(); //////////////////////////////////// // PERFORM IMAGE PROCESSING BELOW // //////////////////////////////////// cv::Mat outputIm = currentFrame.clone(); if (filters.flags[ImageProcessingFlags::ConvertColorspace]) { switch (settings.colorSpace) { case 0: { // Gray cv::cvtColor(currentFrame,outputIm, CV_RGB2GRAY); } break; case 1: { // HSV cv::cvtColor(currentFrame,outputIm, CV_RGB2HSV); } break; case 3: { // Lba cv::cvtColor(currentFrame,outputIm, CV_RGB2Lab); } break; } } if (filters.flags[ImageProcessingFlags::SaltPepperNoise]) { for (int i=0; i<settings.saltPepperNoiseDensity; i+=1) { // adding noise // generate randomly the col and row int m = qrand() % outputIm.rows; int n = qrand() % outputIm.cols; // generate randomly the value {black, white} int color_ = ((qrand() % 100) > 50) ? 255 : 0; if (outputIm.channels() == 1) { // gray-level image outputIm.at<uchar>(m, n)= color_; } else if (outputIm.channels() == 3) { // color image outputIm.at<cv::Vec3b>(m, n)[0]= color_; outputIm.at<cv::Vec3b>(m, n)[1]= color_; outputIm.at<cv::Vec3b>(m, n)[2]= color_; } } } if (filters.flags[ImageProcessingFlags::Dilate]) { cv::dilate(outputIm, outputIm, cv::Mat(), cv::Point(-1, -1), settings.dilateIterations); } if (filters.flags[ImageProcessingFlags::Erode]) { cv::erode(outputIm, outputIm, cv::Mat(), cv::Point(-1, -1), settings.erodeIterations); } if (filters.flags[ImageProcessingFlags::Open]) { cv::morphologyEx(outputIm, outputIm, cv::MORPH_OPEN, cv::Mat(), cv::Point(-1, -1), settings.openIterations); } if (filters.flags[ImageProcessingFlags::Close]) { cv::morphologyEx(outputIm, outputIm, cv::MORPH_CLOSE, cv::Mat(), cv::Point(-1, -1), settings.openIterations); } if (filters.flags[ImageProcessingFlags::Blur]) { cv::GaussianBlur(outputIm, outputIm, cv::Size(settings.blurSize, settings.blurSize), settings.blurSigma); } if (filters.flags[ImageProcessingFlags::Sobel]) { int scale = 1; int delta = 0; int ddepth = CV_16S; // check the direction switch (settings.sobelDirection) { case 0: { // horizontal cv::Mat grad_x; cv::Sobel( outputIm, grad_x, ddepth, 1, 0, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT ); cv::convertScaleAbs( grad_x, outputIm ); } break; case 1: { // vertical cv::Mat grad_y; cv::Sobel( outputIm, grad_y, ddepth, 0, 1, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT ); cv::convertScaleAbs( grad_y, outputIm ); } break; case 2: { // both directions cv::Mat grad_x; cv::Mat grad_y; cv::Mat abs_grad_x; cv::Mat abs_grad_y; cv::Sobel( outputIm, grad_x, ddepth, 1, 0, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT ); cv::Sobel( outputIm, grad_y, ddepth, 0, 1, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT ); cv::convertScaleAbs( grad_x, abs_grad_x ); cv::convertScaleAbs( grad_y, abs_grad_y ); cv::addWeighted( abs_grad_x, 0.5, abs_grad_y, 0.5, 0, outputIm ); } break; } } if (filters.flags[ImageProcessingFlags::Laplacian]) { int scale = 1; int delta = 0; int ddepth = CV_16S; cv::Laplacian( outputIm, outputIm, ddepth, settings.laplacianKernelSize, scale, delta, BORDER_DEFAULT ); cv::convertScaleAbs( outputIm, outputIm ); } if (filters.flags[ImageProcessingFlags::SharpByKernel]) { cv::Mat kernel(3,3,CV_32F,cv::Scalar(0));// init the kernel with zeros // assigns kernel values kernel.at<float>(1,1)= settings.sharpKernelCenter; kernel.at<float>(0,1)= -1.0; kernel.at<float>(2,1)= -1.0; kernel.at<float>(1,0)= -1.0; kernel.at<float>(1,2)= -1.0; //filter the image cv::filter2D(outputIm,outputIm,outputIm.depth(),kernel); } if (filters.flags[ImageProcessingFlags::EdgeDetection]) { // with canny cv::Canny(outputIm, outputIm, settings.cannyLowThres, settings.cannyHighThres); } if (filters.flags[ImageProcessingFlags::LinesHough]) { // Apply Canny algorithm cv::Mat contours; cv::Canny(outputIm,contours,125,350); // Hough tranform for line detection vector<cv::Vec2f> lines; cv::HoughLines(contours,lines, 1, PI/180, settings.linesHoughVotes); vector<cv::Vec2f>::const_iterator it= lines.begin(); while (it!=lines.end()) { float rho = (*it)[0]; // first element is distance rho float theta = (*it)[1]; // second element is angle theta if (theta < PI/4. || theta > 3.*PI/4.) {// ~vertical line // point of intersection of the line with first row cv::Point pt1(rho/cos(theta),0); // point of intersection of the line with last row cv::Point pt2((rho-contours.rows*sin(theta))/cos(theta),contours.rows); // draw a white line cv::line( outputIm, pt1, pt2, cv::Scalar(255), 1); } else { // ~horizontal line // point of intersection of the line with first column cv::Point pt1(0,rho/sin(theta)); // point of intersection of the line with last column cv::Point pt2(contours.cols, (rho-contours.cols*cos(theta))/sin(theta)); // draw a white line cv::line(outputIm, pt1, pt2, cv::Scalar(255), 1); } ++it; } } if (filters.flags[ImageProcessingFlags::CirclesHough]) { cv::Mat temp; if (outputIm.channels() > 1) { cv::cvtColor(outputIm, temp, CV_RGB2GRAY); } else { temp = outputIm; } cv::GaussianBlur(temp, temp, cv::Size(5,5), 1.5); vector<cv::Vec3f> circles; cv::HoughCircles(temp, circles, CV_HOUGH_GRADIENT, 2, // accumulator resolution (size of the image / 2) 50, // minimum distance between two circles 200, // Canny high threshold 60, // minimum number of votes settings.circlesHoughMin, settings.circlesHoughMax); std::vector<cv::Vec3f>::const_iterator itc= circles.begin(); while (itc!=circles.end()) { cv::circle(outputIm, cv::Point((*itc)[0], (*itc)[1]), // circle centre (*itc)[2], // circle radius cv::Scalar(255), // color 2); // thickness ++itc; } } if (filters.flags[ImageProcessingFlags::Countours]) { cv::Mat temp; if (outputIm.channels() > 1) { cv::cvtColor(outputIm, temp, CV_RGB2GRAY); } else { temp = outputIm; } cv::blur(temp, temp, Size(3,3)); cv::Canny(temp, temp, settings.contoursThres, settings.contoursThres+30); vector< vector<cv::Point> > contours; cv::findContours(temp, contours, // a vector of contours CV_RETR_TREE, // retrieve all contours, reconstructs a full hierarchy CV_CHAIN_APPROX_NONE); // all pixels of each contours cv::drawContours(outputIm,contours, -1, // draw all contours cv::Scalar(255, 255, 255), // in white 1); // with a thickness of 1 } if (filters.flags[ImageProcessingFlags::BoundingBox]) { cv::Mat temp; if (outputIm.channels() > 1) { cv::cvtColor(outputIm, temp, CV_RGB2GRAY); } else { temp = outputIm; } cv::blur(temp, temp, Size(3,3)); cv::Canny(temp, temp, settings.boundingBoxThres, settings.boundingBoxThres*2); vector< vector<cv::Point> > contours; cv::findContours(temp, contours, // a vector of contours CV_RETR_TREE, // retrieve all contours, reconstructs a full hierarchy CV_CHAIN_APPROX_NONE); // all pixels of each contours vector< vector<cv::Point> >::iterator itc = contours.begin(); while (itc != contours.end()) { cv::Rect r0 = cv::boundingRect(cv::Mat(*itc)); cv::rectangle(outputIm,r0,cv::Scalar(255, 0, 0), 2); ++itc; } } if (filters.flags[ImageProcessingFlags::enclosingCircle]) { cv::Mat temp; if (outputIm.channels() > 1) { cv::cvtColor(outputIm, temp, CV_RGB2GRAY); } else { temp = outputIm; } cv::blur(temp, temp, Size(3,3)); cv::Canny(temp, temp, settings.enclosingCircleThres, settings.enclosingCircleThres*2); vector< vector<cv::Point> > contours; cv::findContours(temp, contours, // a vector of contours CV_RETR_TREE, // retrieve all contours, reconstructs a full hierarchy CV_CHAIN_APPROX_NONE); // all pixels of each contours vector< vector<cv::Point> >::iterator itc = contours.begin(); while (itc != contours.end()) { float radius; cv::Point2f center; cv::minEnclosingCircle(cv::Mat(*itc),center,radius); cv::circle(outputIm, center, static_cast<int>(radius), cv::Scalar(0, 255, 0), 2); ++itc; } } if (filters.flags[ImageProcessingFlags::harris]) { cv::Mat temp; if (outputIm.channels() > 1) { cv::cvtColor(outputIm, temp, CV_RGB2GRAY); } else { temp = outputIm; } // Detector parameters int blockSize = 2; int apertureSize = 3; double k = 0.04; // Detecting corners cv::cornerHarris(temp, temp, blockSize, apertureSize, k, BORDER_DEFAULT); // Normalizing normalize(temp,temp, 0, 255, NORM_MINMAX, CV_32FC1, Mat()); // Drawing a circle around corners for( int j = 0; j < temp.rows ; j++ ) { for( int i = 0; i < temp.cols; i++ ) { if( (int) temp.at<float>(j,i) > settings.harrisCornerThres) { circle(outputIm, Point( i, j ), 5, Scalar(0, 0 , 255), 2, 8, 0); } } } } if (filters.flags[ImageProcessingFlags::FAST]) { // vector of keypoints vector<cv::KeyPoint> keypoints; // Construction of the Fast feature detector object cv::FastFeatureDetector fast(settings.fastThreshold); // threshold for detection // feature point detection fast.detect(outputIm,keypoints); cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255), cv::DrawMatchesFlags::DRAW_OVER_OUTIMG); } if (filters.flags[ImageProcessingFlags::SURF]) { // vector of keypoints vector<cv::KeyPoint> keypoints; // Construct the SURF feature detector object cv::SurfFeatureDetector surf((double) settings.surfThreshold); // threshold // Detect the SURF features surf.detect(outputIm,keypoints); // Draw the keypoints with scale and orientation information cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255),cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); } if (filters.flags[ImageProcessingFlags::SIFT]) { vector<cv::KeyPoint> keypoints; // Construct the SURF feature detector object cv::SiftFeatureDetector sift( settings.siftContrastThres, // feature threshold (double) settings.siftEdgeThres); // threshold to reduce sens. to lines sift.detect(outputIm,keypoints); // Draw the keypoints with scale and orientation information cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255),cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); } if (filters.flags[ImageProcessingFlags::EqualizeHistogram]) { // converting the image to gray if (outputIm.channels() == 3) { vector<Mat> bgr_planes; split( outputIm, bgr_planes ); equalizeHist( bgr_planes[0], bgr_planes[0] ); equalizeHist( bgr_planes[1], bgr_planes[1] ); equalizeHist( bgr_planes[2], bgr_planes[2] ); merge( bgr_planes, outputIm ); } else { equalizeHist( outputIm, outputIm ); } } // Computing histogram if (filters.flags[ImageProcessingFlags::ComputeHistogram]) { cv::Mat grayIm; cv::Mat hist; // converting the image to gray if (outputIm.channels() == 3) { cv::cvtColor(outputIm,grayIm, CV_RGB2GRAY); } else { grayIm = outputIm; } int histSize = 256; // number of bins float range [] = {0, 256}; // ranges const float* histRange = { range }; bool uniform = true, accumulate = false; // compute histogram cv::calcHist(&grayIm, 1, // using just one image 0, // using just one layer cv::Mat(), hist, 1, &histSize, &histRange, uniform, accumulate); int hist_w = 691; int hist_h =161; Mat result( hist_h, hist_w, CV_8UC3, Scalar( 255,255,255) ); int bin_w = cvRound( (double) hist_w/histSize ); normalize(hist, hist, 0, result.rows, NORM_MINMAX, -1, Mat()); /// Draw for each channel for( int i = 1; i < histSize; i++ ) { line(result, Point( bin_w*(i-1), hist_h - cvRound(hist.at<float>(i-1)) ), Point( bin_w*(i), hist_h - cvRound(hist.at<float>(i)) ), Scalar( 0, 0, 0), 2, 8, 0 ); } // emit signal emit newProcessedHistogram(MatToQImage(result)); } updM.unlock(); processedFrame = outputIm; // Inform GUI thread of new frame (QImage) emit newProcessedFrame(MatToQImage(outputIm)); } }
inline kernel_call transpose_kernel( const backend::command_queue &queue, size_t width, size_t height, const backend::device_vector<T2> &in, const backend::device_vector<T2> &out ) { backend::source_generator o; kernel_common<T>(o, queue); // determine max block size to fit into local memory/workgroup size_t block_size = 128; { #ifndef VEXCL_BACKEND_CUDA cl_device_id dev = backend::get_device_id(queue); cl_ulong local_size; size_t workgroup; clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &local_size, NULL); clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &workgroup, NULL); #else const auto local_size = queue.device().max_shared_memory_per_block(); const auto workgroup = queue.device().max_threads_per_block(); #endif while(block_size * block_size * sizeof(T) * 2 > local_size) block_size /= 2; while(block_size * block_size > workgroup) block_size /= 2; } // from NVIDIA SDK. o.kernel("transpose").open("(") .template parameter< global_ptr<const T2> >("input") .template parameter< global_ptr< T2> >("output") .template parameter< cl_uint >("width") .template parameter< cl_uint >("height") .close(")").open("{"); o.new_line() << "const size_t global_x = " << o.global_id(0) << ";"; o.new_line() << "const size_t global_y = " << o.global_id(1) << ";"; o.new_line() << "const size_t local_x = " << o.local_id(0) << ";"; o.new_line() << "const size_t local_y = " << o.local_id(1) << ";"; o.new_line() << "const size_t group_x = " << o.group_id(0) << ";"; o.new_line() << "const size_t group_y = " << o.group_id(1) << ";"; o.new_line() << "const size_t target_x = local_y + group_y * " << block_size << ";"; o.new_line() << "const size_t target_y = local_x + group_x * " << block_size << ";"; o.new_line() << "const bool range = global_x < width && global_y < height;"; // local memory { std::ostringstream s; s << "block[" << block_size * block_size << "]"; o.smem_static_var(type_name<T2>(), s.str()); } // copy from input to local memory o.new_line() << "if(range) " << "block[local_x + local_y * " << block_size << "] = input[global_x + global_y * width];"; // wait until the whole block is filled o.new_line().barrier(); // transpose local block to target o.new_line() << "if(range) " << "output[target_x + target_y * height] = block[local_x + local_y * " << block_size << "];"; o.close("}"); backend::kernel kernel(queue, o.str(), "transpose"); kernel.push_arg(in); kernel.push_arg(out); kernel.push_arg(static_cast<cl_uint>(width)); kernel.push_arg(static_cast<cl_uint>(height)); // range multiple of wg size, last block maybe not completely filled. size_t r_w = (width + block_size - 1) / block_size; size_t r_h = (height + block_size - 1) / block_size; kernel.config(backend::ndrange(r_w, r_h), backend::ndrange(block_size, block_size)); std::ostringstream desc; desc << "transpose{" << "w=" << width << "(" << r_w << "), " << "h=" << height << "(" << r_h << "), " << "bs=" << block_size << "}"; return kernel_call(false, desc.str(), kernel); }
index NodeManager::add_node( index mod, long n ) // no_p { assert( current_ != 0 ); assert( root_ != 0 ); if ( mod >= kernel().model_manager.get_num_node_models() ) { throw UnknownModelID( mod ); } if ( n < 1 ) { throw BadProperty(); } const thread n_threads = kernel().vp_manager.get_num_threads(); assert( n_threads > 0 ); const index min_gid = local_nodes_.get_max_gid() + 1; const index max_gid = min_gid + n; Model* model = kernel().model_manager.get_model( mod ); assert( model != 0 ); model->deprecation_warning( "Create" ); /* current_ points to the instance of the current subnet on thread 0. The following code makes subnet a pointer to the wrapper container containing the instances of the current subnet on all threads. */ const index subnet_gid = current_->get_gid(); Node* subnet_node = local_nodes_.get_node_by_gid( subnet_gid ); assert( subnet_node != 0 ); SiblingContainer* subnet_container = dynamic_cast< SiblingContainer* >( subnet_node ); assert( subnet_container != 0 ); assert( subnet_container->num_thread_siblings() == static_cast< size_t >( n_threads ) ); assert( subnet_container->get_thread_sibling( 0 ) == current_ ); if ( max_gid > local_nodes_.max_size() || max_gid < min_gid ) { LOG( M_ERROR, "NodeManager::add:node", "Requested number of nodes will overflow the memory." ); LOG( M_ERROR, "NodeManager::add:node", "No nodes were created." ); throw KernelException( "OutOfMemory" ); } kernel().modelrange_manager.add_range( mod, min_gid, max_gid - 1 ); if ( model->potential_global_receiver() and kernel().mpi_manager.get_num_rec_processes() > 0 ) { // In this branch we create nodes for global receivers const int n_per_process = n / kernel().mpi_manager.get_num_rec_processes(); const int n_per_thread = n_per_process / n_threads + 1; // We only need to reserve memory on the ranks on which we // actually create nodes. In this if-branch ---> Only on recording // processes if ( kernel().mpi_manager.get_rank() >= kernel().mpi_manager.get_num_sim_processes() ) { local_nodes_.reserve( std::ceil( static_cast< double >( max_gid ) / kernel().mpi_manager.get_num_sim_processes() ) ); for ( thread t = 0; t < n_threads; ++t ) { // Model::reserve() reserves memory for n ADDITIONAL nodes on thread t model->reserve_additional( t, n_per_thread ); } } for ( size_t gid = min_gid; gid < max_gid; ++gid ) { const thread vp = kernel().vp_manager.suggest_rec_vp( get_n_gsd() ); const thread t = kernel().vp_manager.vp_to_thread( vp ); if ( kernel().vp_manager.is_local_vp( vp ) ) { Node* newnode = model->allocate( t ); newnode->set_gid_( gid ); newnode->set_model_id( mod ); newnode->set_thread( t ); newnode->set_vp( vp ); newnode->set_has_proxies( true ); newnode->set_local_receiver( false ); local_nodes_.add_local_node( *newnode ); // put into local nodes list current_->add_node( newnode ); // and into current subnet, thread 0. } else { local_nodes_.add_remote_node( gid ); // ensures max_gid is correct current_->add_remote_node( gid, mod ); } increment_n_gsd(); } } else if ( model->has_proxies() ) { // In this branch we create nodes for all GIDs which are on a local thread const int n_per_process = n / kernel().mpi_manager.get_num_sim_processes(); const int n_per_thread = n_per_process / n_threads + 1; // We only need to reserve memory on the ranks on which we // actually create nodes. In this if-branch ---> Only on // simulation processes if ( kernel().mpi_manager.get_rank() < kernel().mpi_manager.get_num_sim_processes() ) { // TODO: This will work reasonably for round-robin. The extra 50 entries // are for subnets and devices. local_nodes_.reserve( std::ceil( static_cast< double >( max_gid ) / kernel().mpi_manager.get_num_sim_processes() ) + 50 ); for ( thread t = 0; t < n_threads; ++t ) { // Model::reserve() reserves memory for n ADDITIONAL nodes on thread t // reserves at least one entry on each thread, nobody knows why model->reserve_additional( t, n_per_thread ); } } size_t gid; if ( kernel().vp_manager.is_local_vp( kernel().vp_manager.suggest_vp( min_gid ) ) ) { gid = min_gid; } else { gid = next_local_gid_( min_gid ); } size_t next_lid = current_->global_size() + gid - min_gid; // The next loop will not visit every node, if more than one rank is // present. // Since we already know what range of gids will be created, we can tell the // current subnet the range and subsequent calls to // `current_->add_remote_node()` // become irrelevant. current_->add_gid_range( min_gid, max_gid - 1 ); // min_gid is first valid gid i should create, hence ask for the first local // gid after min_gid-1 while ( gid < max_gid ) { const thread vp = kernel().vp_manager.suggest_vp( gid ); const thread t = kernel().vp_manager.vp_to_thread( vp ); if ( kernel().vp_manager.is_local_vp( vp ) ) { Node* newnode = model->allocate( t ); newnode->set_gid_( gid ); newnode->set_model_id( mod ); newnode->set_thread( t ); newnode->set_vp( vp ); local_nodes_.add_local_node( *newnode ); // put into local nodes list current_->add_node( newnode ); // and into current subnet, thread 0. // lid setting is wrong, if a range is set, as the subnet already // assumes, // the nodes are available. newnode->set_lid_( next_lid ); const size_t next_gid = next_local_gid_( gid ); next_lid += next_gid - gid; gid = next_gid; } else { ++gid; // brutal fix, next_lid has been set in if-branch } } // if last gid is not on this process, we need to add it as a remote node if ( not kernel().vp_manager.is_local_vp( kernel().vp_manager.suggest_vp( max_gid - 1 ) ) ) { local_nodes_.add_remote_node( max_gid - 1 ); // ensures max_gid is correct current_->add_remote_node( max_gid - 1, mod ); } } else if ( not model->one_node_per_process() ) { // We allocate space for n containers which will hold the threads // sorted. We use SiblingContainers to store the instances for // each thread to exploit the very efficient memory allocation for // nodes. // // These containers are registered in the global nodes_ array to // provide access to the instances both for manipulation by SLI // functions and so that NodeManager::calibrate() can discover the // instances and register them for updating. // // The instances are also registered with the instance of the // current subnet for the thread to which the created instance // belongs. This is mainly important so that the subnet structure // is preserved on all VPs. Node enumeration is done on by the // registration with the per-thread instances. // // The wrapper container can be addressed under the GID assigned // to no-proxy node created. If this no-proxy node is NOT a // container (e.g. a device), then each instance can be retrieved // by giving the respective thread-id to get_node(). Instances of // SiblingContainers cannot be addressed individually. // // The allocation of the wrapper containers is spread over threads // to balance memory load. size_t container_per_thread = n / n_threads + 1; // since we create the n nodes on each thread, we reserve the full load. for ( thread t = 0; t < n_threads; ++t ) { model->reserve_additional( t, n ); siblingcontainer_model_->reserve_additional( t, container_per_thread ); static_cast< Subnet* >( subnet_container->get_thread_sibling( t ) ) ->reserve( n ); } // The following loop creates n nodes. For each node, a wrapper is created // and filled with one instance per thread, in total n * n_thread nodes in // n wrappers. local_nodes_.reserve( std::ceil( static_cast< double >( max_gid ) / kernel().mpi_manager.get_num_sim_processes() ) + 50 ); for ( index gid = min_gid; gid < max_gid; ++gid ) { thread thread_id = kernel().vp_manager.vp_to_thread( kernel().vp_manager.suggest_vp( gid ) ); // Create wrapper and register with nodes_ array. SiblingContainer* container = static_cast< SiblingContainer* >( siblingcontainer_model_->allocate( thread_id ) ); container->set_model_id( -1 ); // mark as pseudo-container wrapping replicas, see reset_network() container->reserve( n_threads ); // space for one instance per thread container->set_gid_( gid ); local_nodes_.add_local_node( *container ); // Generate one instance of desired model per thread for ( thread t = 0; t < n_threads; ++t ) { Node* newnode = model->allocate( t ); newnode->set_gid_( gid ); // all instances get the same global id. newnode->set_model_id( mod ); newnode->set_thread( t ); newnode->set_vp( kernel().vp_manager.thread_to_vp( t ) ); // Register instance with wrapper // container has one entry for each thread container->push_back( newnode ); // Register instance with per-thread instance of enclosing subnet. static_cast< Subnet* >( subnet_container->get_thread_sibling( t ) ) ->add_node( newnode ); } } } else { // no proxies and one node per process // this is used by MUSIC proxies // Per r9700, this case is only relevant for music_*_proxy models, // which have a single instance per MPI process. for ( index gid = min_gid; gid < max_gid; ++gid ) { Node* newnode = model->allocate( 0 ); newnode->set_gid_( gid ); newnode->set_model_id( mod ); newnode->set_thread( 0 ); newnode->set_vp( kernel().vp_manager.thread_to_vp( 0 ) ); // Register instance local_nodes_.add_local_node( *newnode ); // and into current subnet, thread 0. current_->add_node( newnode ); } } // set off-grid spike communication if necessary if ( model->is_off_grid() ) { kernel().event_delivery_manager.set_off_grid_communication( true ); LOG( M_INFO, "NodeManager::add_node", "Neuron models emitting precisely timed spikes exist: " "the kernel property off_grid_spiking has been set to true.\n\n" "NOTE: Mixing precise-spiking and normal neuron models may " "lead to inconsistent results." ); } return max_gid - 1; }
inline kernel_call bluestein_mul_in( const backend::command_queue &queue, bool inverse, size_t batch, size_t radix, size_t p, size_t threads, size_t stride, const backend::device_vector<T2> &data, const backend::device_vector<T2> &exp, const backend::device_vector<T2> &out ) { backend::source_generator o; kernel_common<T>(o, queue); mul_code<T2>(o, false); twiddle_code<T, T2>(o); o.kernel("bluestein_mul_in").open("(") .template parameter< global_ptr<const T2> >("data") .template parameter< global_ptr<const T2> >("exp") .template parameter< global_ptr< T2> >("output") .template parameter< cl_uint >("radix") .template parameter< cl_uint >("p") .template parameter< cl_uint >("out_stride") .close(")").open("{"); o.new_line() << "const size_t thread = " << o.global_id(0) << ";"; o.new_line() << "const size_t threads = " << o.global_size(0) << ";"; o.new_line() << "const size_t batch = " << o.global_id(1) << ";"; o.new_line() << "const size_t element = " << o.global_id(2) << ";"; o.new_line() << "if(element < out_stride)"; o.open("{"); o.new_line() << "const size_t in_off = thread + batch * radix * threads + element * threads;"; o.new_line() << "const size_t out_off = thread * out_stride + batch * out_stride * threads + element;"; o.new_line() << "if(element < radix)"; o.open("{"); o.new_line() << type_name<T2>() << " w = exp[element];"; o.new_line() << "if(p != 1)"; o.open("{"); o.new_line() << "ulong a = (ulong)element * (thread % p);"; o.new_line() << "ulong b = (ulong)radix * p;"; o.new_line() << type_name<T2>() << " t = twiddle(" << std::setprecision(16) << (inverse ? 1 : -1) * boost::math::constants::two_pi<T>() << " * (a % (2 * b)) / b);"; o.new_line() << "w = mul(w, t);"; o.close("}"); o.new_line() << "output[out_off] = mul(data[in_off], w);"; o.close("}"); o.new_line() << "else"; o.open("{"); o.new_line() << type_name<T2>() << " r = {0,0};"; o.new_line() << "output[out_off] = r;"; o.close("}"); o.close("}"); o.close("}"); backend::kernel kernel(queue, o.str(), "bluestein_mul_in"); kernel.push_arg(data); kernel.push_arg(exp); kernel.push_arg(out); kernel.push_arg(static_cast<cl_uint>(radix)); kernel.push_arg(static_cast<cl_uint>(p)); kernel.push_arg(static_cast<cl_uint>(stride)); const size_t wg = kernel.preferred_work_group_size_multiple(queue); const size_t stride_pad = (stride + wg - 1) / wg; kernel.config( backend::ndrange(threads, batch, stride_pad), backend::ndrange( 1, 1, wg) ); std::ostringstream desc; desc << "bluestein_mul_in{batch=" << batch << ", radix=" << radix << ", p=" << p << ", threads=" << threads << ", stride=" << stride << "(" << stride_pad << "), wg=" << wg << "}"; return kernel_call(false, desc.str(), kernel); }
bool NodeManager::is_local_node( Node* n ) const { return kernel().vp_manager.is_local_vp( n->get_vp() ); }
void TopologyModule::init( SLIInterpreter* i ) { // Register the topology functions as SLI commands. i->createcommand( "CreateLayer_D", &createlayer_Dfunction ); i->createcommand( "GetPosition_i", &getposition_ifunction ); i->createcommand( "Displacement_a_i", &displacement_a_ifunction ); i->createcommand( "Distance_a_i", &distance_a_ifunction ); i->createcommand( "CreateMask_D", &createmask_Dfunction ); i->createcommand( "Inside_a_M", &inside_a_Mfunction ); i->createcommand( "and_M_M", &and_M_Mfunction ); i->createcommand( "or_M_M", &or_M_Mfunction ); i->createcommand( "sub_M_M", &sub_M_Mfunction ); i->createcommand( "mul_P_P", &mul_P_Pfunction ); i->createcommand( "div_P_P", &div_P_Pfunction ); i->createcommand( "add_P_P", &add_P_Pfunction ); i->createcommand( "sub_P_P", &sub_P_Pfunction ); i->createcommand( "GetGlobalChildren_i_M_a", &getglobalchildren_i_M_afunction ); i->createcommand( "ConnectLayers_i_i_D", &connectlayers_i_i_Dfunction ); i->createcommand( "CreateParameter_D", &createparameter_Dfunction ); i->createcommand( "GetValue_a_P", &getvalue_a_Pfunction ); i->createcommand( "DumpLayerNodes_os_i", &dumplayernodes_os_ifunction ); i->createcommand( "DumpLayerConnections_os_i_l", &dumplayerconnections_os_i_lfunction ); i->createcommand( "GetElement_i_ia", &getelement_i_iafunction ); i->createcommand( "cvdict_M", &cvdict_Mfunction ); i->createcommand( "SelectNodesByMask_L_a_M", &selectnodesbymask_L_a_Mfunction ); kernel().model_manager.register_node_model< FreeLayer< 2 > >( "topology_layer_free" ); kernel().model_manager.register_node_model< FreeLayer< 3 > >( "topology_layer_free_3d" ); kernel().model_manager.register_node_model< GridLayer< 2 > >( "topology_layer_grid" ); kernel().model_manager.register_node_model< GridLayer< 3 > >( "topology_layer_grid_3d" ); // Register mask types register_mask< BallMask< 2 > >(); register_mask< BallMask< 3 > >(); register_mask< EllipseMask< 2 > >(); register_mask< EllipseMask< 3 > >(); register_mask< BoxMask< 2 > >(); register_mask< BoxMask< 3 > >(); register_mask< BoxMask< 3 > >( "volume" ); // For compatibility with topo 2.0 register_mask( "doughnut", create_doughnut ); register_mask< GridMask< 2 > >(); // Register parameter types register_parameter< ConstantParameter >( "constant" ); register_parameter< LinearParameter >( "linear" ); register_parameter< ExponentialParameter >( "exponential" ); register_parameter< GaussianParameter >( "gaussian" ); register_parameter< Gaussian2DParameter >( "gaussian2D" ); register_parameter< GammaParameter >( "gamma" ); register_parameter< UniformParameter >( "uniform" ); register_parameter< NormalParameter >( "normal" ); register_parameter< LognormalParameter >( "lognormal" ); }
void NodeManager::ensure_valid_thread_local_ids() { // Check if the network size changed, in order to not enter // the critical region if it is not necessary. Note that this // test also covers that case that nodes have been deleted // by reset. if ( size() == nodes_vec_network_size_ ) { return; } #ifdef _OPENMP #pragma omp critical( update_nodes_vec ) { // This code may be called from a thread-parallel context, when it is // invoked by TargetIdentifierIndex::set_target() during parallel // wiring. Nested OpenMP parallelism is problematic, therefore, we // enforce single threading here. This should be unproblematic wrt // performance, because the nodes_vec_ is rebuilt only once after // changes in network size. #endif // Check again, if the network size changed, since a previous thread // can have updated nodes_vec_ before. if ( size() != nodes_vec_network_size_ ) { /* We clear the existing nodes_vec_ and then rebuild it. */ nodes_vec_.clear(); nodes_vec_.resize( kernel().vp_manager.get_num_threads() ); wfr_nodes_vec_.clear(); wfr_nodes_vec_.resize( kernel().vp_manager.get_num_threads() ); for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t ) { nodes_vec_[ t ].clear(); wfr_nodes_vec_[ t ].clear(); // Loops below run from index 1, because index 0 is always the root // network, which is never updated. size_t num_thread_local_nodes = 0; size_t num_thread_local_wfr_nodes = 0; for ( size_t idx = 1; idx < local_nodes_.size(); ++idx ) { Node* node = local_nodes_.get_node_by_index( idx ); if ( not node->is_subnet() && ( static_cast< index >( node->get_thread() ) == t || node->num_thread_siblings() > 0 ) ) { num_thread_local_nodes++; if ( node->node_uses_wfr() ) { num_thread_local_wfr_nodes++; } } } nodes_vec_[ t ].reserve( num_thread_local_nodes ); wfr_nodes_vec_[ t ].reserve( num_thread_local_wfr_nodes ); for ( size_t idx = 1; idx < local_nodes_.size(); ++idx ) { Node* node = local_nodes_.get_node_by_index( idx ); // Subnets are never updated and therefore not included. if ( node->is_subnet() ) { continue; } // If a node has thread siblings, it is a sibling container, and we // need to add the replica for the current thread. Otherwise, we have // a normal node, which is added only on the thread it belongs to. if ( node->num_thread_siblings() > 0 ) { node->get_thread_sibling( t )->set_thread_lid( nodes_vec_[ t ].size() ); nodes_vec_[ t ].push_back( node->get_thread_sibling( t ) ); } else if ( static_cast< index >( node->get_thread() ) == t ) { // these nodes cannot be subnets node->set_thread_lid( nodes_vec_[ t ].size() ); nodes_vec_[ t ].push_back( node ); if ( node->node_uses_wfr() ) { wfr_nodes_vec_[ t ].push_back( node ); } } } } // end of for threads nodes_vec_network_size_ = size(); wfr_is_used_ = false; // wfr_is_used_ indicates, whether at least one // of the threads has a neuron that uses waveform relaxtion // all threads then need to perform a wfr_update // step, because gather_events() has to be done in a // openmp single section for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t ) { if ( wfr_nodes_vec_[ t ].size() > 0 ) { wfr_is_used_ = true; } } } #ifdef _OPENMP } // end of omp critical region #endif }
void EventDeliveryManager::collocate_buffers_( bool done ) { // count number of spikes in registers int num_spikes = 0; int num_grid_spikes = 0; int num_offgrid_spikes = 0; int uintsize_secondary_events = 0; std::vector< std::vector< std::vector< unsigned int > > >::iterator i; std::vector< std::vector< unsigned int > >::iterator j; for ( i = spike_register_.begin(); i != spike_register_.end(); ++i ) for ( j = i->begin(); j != i->end(); ++j ) num_grid_spikes += j->size(); std::vector< std::vector< std::vector< OffGridSpike > > >::iterator it; std::vector< std::vector< OffGridSpike > >::iterator jt; for ( it = offgrid_spike_register_.begin(); it != offgrid_spike_register_.end(); ++it ) for ( jt = it->begin(); jt != it->end(); ++jt ) num_offgrid_spikes += jt->size(); // accumulate number of generated spikes in the local spike counter local_spike_counter_ += num_grid_spikes + num_offgrid_spikes; // here we need to count the secondary events and take them // into account in the size of the buffers // assume that we already serialized all secondary // events into the secondary_events_buffer_ // and that secondary_events_buffer_.size() contains the correct size // of this buffer in units of unsigned int for ( j = secondary_events_buffer_.begin(); j != secondary_events_buffer_.end(); ++j ) uintsize_secondary_events += j->size(); // +1 because we need one end marker invalid_synindex // +1 for bool-value done num_spikes = num_grid_spikes + num_offgrid_spikes + uintsize_secondary_events + 2; if ( !off_grid_spiking_ ) // on grid spiking { // make sure buffers are correctly sized if ( global_grid_spikes_.size() != static_cast< unsigned int >( kernel().mpi_manager.get_recv_buffer_size() ) ) global_grid_spikes_.resize( kernel().mpi_manager.get_recv_buffer_size(), 0 ); if ( num_spikes + ( kernel().vp_manager.get_num_threads() * kernel().connection_manager.get_min_delay() ) > static_cast< unsigned int >( kernel().mpi_manager.get_send_buffer_size() ) ) local_grid_spikes_.resize( ( num_spikes + ( kernel().connection_manager.get_min_delay() * kernel().vp_manager.get_num_threads() ) ), 0 ); else if ( local_grid_spikes_.size() < static_cast< unsigned int >( kernel().mpi_manager.get_send_buffer_size() ) ) local_grid_spikes_.resize( kernel().mpi_manager.get_send_buffer_size(), 0 ); // collocate the entries of spike_registers into local_grid_spikes__ std::vector< unsigned int >::iterator pos = local_grid_spikes_.begin(); if ( num_offgrid_spikes == 0 ) { for ( i = spike_register_.begin(); i != spike_register_.end(); ++i ) for ( j = i->begin(); j != i->end(); ++j ) { pos = std::copy( j->begin(), j->end(), pos ); *pos = comm_marker_; ++pos; } } else { std::vector< OffGridSpike >::iterator n; it = offgrid_spike_register_.begin(); for ( i = spike_register_.begin(); i != spike_register_.end(); ++i ) { jt = it->begin(); for ( j = i->begin(); j != i->end(); ++j ) { pos = std::copy( j->begin(), j->end(), pos ); for ( n = jt->begin(); n != jt->end(); ++n ) { *pos = n->get_gid(); ++pos; } *pos = comm_marker_; ++pos; ++jt; } ++it; } for ( it = offgrid_spike_register_.begin(); it != offgrid_spike_register_.end(); ++it ) for ( jt = it->begin(); jt != it->end(); ++jt ) jt->clear(); } // remove old spikes from the spike_register_ for ( i = spike_register_.begin(); i != spike_register_.end(); ++i ) for ( j = i->begin(); j != i->end(); ++j ) j->clear(); // here all spikes have been written to the local_grid_spikes buffer // pos points to next position in this outgoing communication buffer for ( j = secondary_events_buffer_.begin(); j != secondary_events_buffer_.end(); ++j ) { pos = std::copy( j->begin(), j->end(), pos ); j->clear(); } // end marker after last secondary event // made sure in resize that this position is still allocated write_to_comm_buffer( invalid_synindex, pos ); // append the boolean value indicating whether we are done here write_to_comm_buffer( done, pos ); } else // off_grid_spiking { // make sure buffers are correctly sized if ( global_offgrid_spikes_.size() != static_cast< unsigned int >( kernel().mpi_manager.get_recv_buffer_size() ) ) global_offgrid_spikes_.resize( kernel().mpi_manager.get_recv_buffer_size(), OffGridSpike( 0, 0.0 ) ); if ( num_spikes + ( kernel().vp_manager.get_num_threads() * kernel().connection_manager.get_min_delay() ) > static_cast< unsigned int >( kernel().mpi_manager.get_send_buffer_size() ) ) local_offgrid_spikes_.resize( ( num_spikes + ( kernel().connection_manager.get_min_delay() * kernel().vp_manager.get_num_threads() ) ), OffGridSpike( 0, 0.0 ) ); else if ( local_offgrid_spikes_.size() < static_cast< unsigned int >( kernel().mpi_manager.get_send_buffer_size() ) ) local_offgrid_spikes_.resize( kernel().mpi_manager.get_send_buffer_size(), OffGridSpike( 0, 0.0 ) ); // collocate the entries of spike_registers into local_offgrid_spikes__ std::vector< OffGridSpike >::iterator pos = local_offgrid_spikes_.begin(); if ( num_grid_spikes == 0 ) for ( it = offgrid_spike_register_.begin(); it != offgrid_spike_register_.end(); ++it ) for ( jt = it->begin(); jt != it->end(); ++jt ) { pos = std::copy( jt->begin(), jt->end(), pos ); pos->set_gid( comm_marker_ ); ++pos; } else { std::vector< unsigned int >::iterator n; i = spike_register_.begin(); for ( it = offgrid_spike_register_.begin(); it != offgrid_spike_register_.end(); ++it ) { j = i->begin(); for ( jt = it->begin(); jt != it->end(); ++jt ) { pos = std::copy( jt->begin(), jt->end(), pos ); for ( n = j->begin(); n != j->end(); ++n ) { *pos = OffGridSpike( *n, 0 ); ++pos; } pos->set_gid( comm_marker_ ); ++pos; ++j; } ++i; } for ( i = spike_register_.begin(); i != spike_register_.end(); ++i ) for ( j = i->begin(); j != i->end(); ++j ) j->clear(); } // empty offgrid_spike_register_ for ( it = offgrid_spike_register_.begin(); it != offgrid_spike_register_.end(); ++it ) for ( jt = it->begin(); jt != it->end(); ++jt ) jt->clear(); } }
void NodeManager::prepare_nodes() { assert( kernel().is_initialized() ); /* We initialize the buffers of each node and calibrate it. */ size_t num_active_nodes = 0; // counts nodes that will be updated size_t num_active_wfr_nodes = 0; // counts nodes that use waveform relaxation std::vector< lockPTR< WrappedThreadException > > exceptions_raised( kernel().vp_manager.get_num_threads() ); #ifdef _OPENMP #pragma omp parallel reduction( + : num_active_nodes, num_active_wfr_nodes ) { size_t t = kernel().vp_manager.get_thread_id(); #else for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t ) { #endif // We prepare nodes in a parallel region. Therefore, we need to catch // exceptions here and then handle them after the parallel region. try { for ( std::vector< Node* >::iterator it = nodes_vec_[ t ].begin(); it != nodes_vec_[ t ].end(); ++it ) { prepare_node_( *it ); if ( not( *it )->is_frozen() ) { ++num_active_nodes; if ( ( *it )->node_uses_wfr() ) { ++num_active_wfr_nodes; } } } } catch ( std::exception& e ) { // so throw the exception after parallel region exceptions_raised.at( t ) = lockPTR< WrappedThreadException >( new WrappedThreadException( e ) ); } } // end of parallel section / end of for threads // check if any exceptions have been raised for ( index thr = 0; thr < kernel().vp_manager.get_num_threads(); ++thr ) { if ( exceptions_raised.at( thr ).valid() ) { throw WrappedThreadException( *( exceptions_raised.at( thr ) ) ); } } std::ostringstream os; std::string tmp_str = num_active_nodes == 1 ? " node" : " nodes"; os << "Preparing " << num_active_nodes << tmp_str << " for simulation."; if ( num_active_wfr_nodes != 0 ) { tmp_str = num_active_wfr_nodes == 1 ? " uses " : " use "; os << " " << num_active_wfr_nodes << " of them" << tmp_str << "iterative solution techniques."; } num_active_nodes_ = num_active_nodes; LOG( M_INFO, "NodeManager::prepare_nodes", os.str() ); } void NodeManager::post_run_cleanup() { #ifdef _OPENMP #pragma omp parallel { index t = kernel().vp_manager.get_thread_id(); #else // clang-format off for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t ) { #endif // clang-format on for ( size_t idx = 0; idx < local_nodes_.size(); ++idx ) { Node* node = local_nodes_.get_node_by_index( idx ); if ( node != 0 ) { if ( node->num_thread_siblings() > 0 ) { node->get_thread_sibling( t )->post_run_cleanup(); } else { if ( static_cast< index >( node->get_thread() ) == t ) { node->post_run_cleanup(); } } } } } }
void nest::FixedOutDegreeBuilder::connect_() { librandom::RngPtr grng = kernel().rng_manager.get_grng(); for ( GIDCollection::const_iterator sgid = sources_->begin(); sgid != sources_->end(); ++sgid ) { std::set< long > ch_ids; std::vector< index > tgt_ids_; const long n_rnd = targets_->size(); for ( long j = 0; j < outdegree_; ++j ) { unsigned long t_id; index tgid; do { t_id = grng->ulrand( n_rnd ); tgid = ( *targets_ )[ t_id ]; } while ( ( not autapses_ and tgid == *sgid ) || ( not multapses_ and ch_ids.find( t_id ) != ch_ids.end() ) ); if ( not multapses_ ) ch_ids.insert( t_id ); tgt_ids_.push_back( tgid ); } #pragma omp parallel { // get thread id const int tid = kernel().vp_manager.get_thread_id(); try { // allocate pointer to thread specific random generator librandom::RngPtr rng = kernel().rng_manager.get_rng( tid ); for ( std::vector< index >::const_iterator tgid = tgt_ids_.begin(); tgid != tgt_ids_.end(); ++tgid ) { // check whether the target is on this mpi machine if ( not kernel().node_manager.is_local_gid( *tgid ) ) continue; Node* const target = kernel().node_manager.get_node( *tgid ); const thread target_thread = target->get_thread(); // check whether the target is on our thread if ( tid != target_thread ) continue; single_connect_( *sgid, *target, target_thread, rng ); } } catch ( std::exception& err ) { // We must create a new exception here, err's lifetime ends at // the end of the catch block. exceptions_raised_.at( tid ) = lockPTR< WrappedThreadException >( new WrappedThreadException( err ) ); } } } }
/** * Compute a distance substitution kernel * @param x first string * @param y second string * @return distance substitution kernel */ float kern_distance_compare(hstring_t x, hstring_t y) { float k = kernel(x, y); return knorm(norm, k, x, y, kernel); }
inline void nest::ConnBuilder::check_synapse_params_( std::string syn_name, const DictionaryDatum& syn_spec ) { // throw error if weight is specified with static_synapse_hom_w if ( syn_name == "static_synapse_hom_w" ) { if ( syn_spec->known( names::weight ) ) throw BadProperty( "Weight cannot be specified since it needs to be equal " "for all connections when static_synapse_hom_w is used." ); return; } // throw error if n or a are set in quantal_stp_synapse, Connect cannot handle // them since they are integer if ( syn_name == "quantal_stp_synapse" ) { if ( syn_spec->known( names::n ) ) throw NotImplemented( "Connect doesn't support the setting of parameter " "n in quantal_stp_synapse. Use SetDefaults() or CopyModel()." ); if ( syn_spec->known( names::a ) ) throw NotImplemented( "Connect doesn't support the setting of parameter " "a in quantal_stp_synapse. Use SetDefaults() or CopyModel()." ); return; } // print warning if delay is specified outside cont_delay_synapse if ( syn_name == "cont_delay_synapse" ) { if ( syn_spec->known( names::delay ) ) LOG( M_WARNING, "Connect", "The delay will be rounded to the next multiple of the time step. " "To use a more precise time delay it needs to be defined within " "the synapse, e.g. with CopyModel()." ); return; } // throw error if no volume transmitter is defined or parameters are specified // that need to be introduced via CopyModel or SetDefaults if ( syn_name == "stdp_dopamine_synapse" ) { if ( syn_spec->known( "vt" ) ) throw NotImplemented( "Connect doesn't support the direct specification of the " "volume transmitter of stdp_dopamine_synapse in syn_spec." "Use SetDefaults() or CopyModel()." ); // setting of parameter c and n not thread save if ( kernel().vp_manager.get_num_threads() > 1 ) { if ( syn_spec->known( names::c ) ) throw NotImplemented( "For multi-threading Connect doesn't support the setting " "of parameter c in stdp_dopamine_synapse. " "Use SetDefaults() or CopyModel()." ); if ( syn_spec->known( names::n ) ) throw NotImplemented( "For multi-threading Connect doesn't support the setting " "of parameter n in stdp_dopamine_synapse. " "Use SetDefaults() or CopyModel()." ); } std::string param_arr[] = { "A_minus", "A_plus", "Wmax", "Wmin", "b", "tau_c", "tau_n", "tau_plus" }; std::vector< std::string > param_vec( param_arr, param_arr + 8 ); for ( std::vector< std::string >::iterator it = param_vec.begin(); it != param_vec.end(); it++ ) { if ( syn_spec->known( *it ) ) throw NotImplemented( "Connect doesn't support the setting of parameter " + *it + " in stdp_dopamine_synapse. Use SetDefaults() or CopyModel()." ); } return; } }
void nest::iaf_psc_alpha_canon::update( Time const& origin, const long from, const long to ) { assert( to >= 0 ); assert( static_cast< delay >( from ) < kernel().connection_manager.get_min_delay() ); assert( from < to ); // at start of slice, tell input queue to prepare for delivery if ( from == 0 ) { B_.events_.prepare_delivery(); } /* Neurons may have been initialized to superthreshold potentials. We need to check for this here and issue spikes at the beginning of the interval. */ if ( S_.y3_ >= P_.U_th_ ) { emit_instant_spike_( origin, from, V_.h_ms_ * ( 1 - std::numeric_limits< double >::epsilon() ) ); } for ( long lag = from; lag < to; ++lag ) { // time at start of update step const long T = origin.get_steps() + lag; // if neuron returns from refractoriness during this step, place // pseudo-event in queue to mark end of refractory period if ( S_.is_refractory_ && ( T + 1 - S_.last_spike_step_ == V_.refractory_steps_ ) ) { B_.events_.add_refractory( T, S_.last_spike_offset_ ); } // save state at beginning of interval for spike-time interpolation V_.y0_before_ = S_.y0_; V_.y2_before_ = S_.y2_; V_.y3_before_ = S_.y3_; // get first event double ev_offset; double ev_weight; bool end_of_refract; if ( not B_.events_.get_next_spike( T, true, ev_offset, ev_weight, end_of_refract ) ) { // No incoming spikes, handle with fixed propagator matrix. // Handling this case separately improves performance significantly // if there are many steps without input spikes. // update membrane potential if ( not S_.is_refractory_ ) { S_.y3_ = V_.P30_ * ( P_.I_e_ + S_.y0_ ) + V_.P31_ * S_.y1_ + V_.P32_ * S_.y2_ + V_.expm1_tau_m_ * S_.y3_ + S_.y3_; // lower bound of membrane potential S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ ); } // update synaptic currents S_.y2_ = V_.expm1_tau_syn_ * V_.h_ms_ * S_.y1_ + V_.expm1_tau_syn_ * S_.y2_ + V_.h_ms_ * S_.y1_ + S_.y2_; S_.y1_ = V_.expm1_tau_syn_ * S_.y1_ + S_.y1_; /* The following must not be moved before the y1_, y2_ update, since the spike-time interpolation within emit_spike_ depends on all state variables having their values at the end of the interval. */ if ( S_.y3_ >= P_.U_th_ ) { emit_spike_( origin, lag, 0, V_.h_ms_ ); } } else { // We only get here if there is at least on event, // which has been read above. We can therefore use // a do-while loop. // Time within step is measured by offsets, which are h at the beginning // and 0 at the end of the step. double last_offset = V_.h_ms_; // start of step do { // time is measured backward: inverse order in difference const double ministep = last_offset - ev_offset; propagate_( ministep ); // check for threshold crossing during ministep // this must be done before adding the input, since // interpolation requires continuity if ( S_.y3_ >= P_.U_th_ ) { emit_spike_( origin, lag, V_.h_ms_ - last_offset, ministep ); } // handle event if ( end_of_refract ) { S_.is_refractory_ = false; } // return from refractoriness else { S_.y1_ += V_.PSCInitialValue_ * ev_weight; } // spike input // store state V_.y2_before_ = S_.y2_; V_.y3_before_ = S_.y3_; last_offset = ev_offset; } while ( B_.events_.get_next_spike( T, true, ev_offset, ev_weight, end_of_refract ) ); // no events remaining, plain update step across remainder // of interval if ( last_offset > 0 ) // not at end of step, do remainder { propagate_( last_offset ); if ( S_.y3_ >= P_.U_th_ ) { emit_spike_( origin, lag, V_.h_ms_ - last_offset, last_offset ); } } } // else // Set new input current. The current change occurs at the // end of the interval and thus must come AFTER the threshold- // crossing interpolation S_.y0_ = B_.currents_.get_value( lag ); // logging B_.logger_.record_data( origin.get_steps() + lag ); } // from lag = from ... }
nest::ConnBuilder::ConnBuilder( const GIDCollection& sources, const GIDCollection& targets, const DictionaryDatum& conn_spec, const DictionaryDatum& syn_spec ) : sources_( &sources ) , targets_( &targets ) , autapses_( true ) , multapses_( true ) , symmetric_( false ) , exceptions_raised_( kernel().vp_manager.get_num_threads() ) , synapse_model_( kernel().model_manager.get_synapsedict()->lookup( "static_synapse" ) ) , weight_( 0 ) , delay_( 0 ) , param_dicts_() , parameters_requiring_skipping_() { // read out rule-related parameters ------------------------- // - /rule has been taken care of above // - rule-specific params are handled by subclass c'tor updateValue< bool >( conn_spec, names::autapses, autapses_ ); updateValue< bool >( conn_spec, names::multapses, multapses_ ); updateValue< bool >( conn_spec, names::symmetric, symmetric_ ); // read out synapse-related parameters ---------------------- if ( !syn_spec->known( names::model ) ) throw BadProperty( "Synapse spec must contain synapse model." ); const std::string syn_name = ( *syn_spec )[ names::model ]; if ( not kernel().model_manager.get_synapsedict()->known( syn_name ) ) throw UnknownSynapseType( syn_name ); // if another synapse than static_synapse is defined we need to make // sure that Connect can process all parameter specified if ( syn_name != "static_synapse" ) check_synapse_params_( syn_name, syn_spec ); synapse_model_ = kernel().model_manager.get_synapsedict()->lookup( syn_name ); DictionaryDatum syn_defaults = kernel().model_manager.get_connector_defaults( synapse_model_ ); // All synapse models have the possibility to set the delay (see // SynIdDelay), but some have homogeneous weights, hence it should // be possible to set the delay without the weight. default_weight_ = !syn_spec->known( names::weight ); default_delay_ = !syn_spec->known( names::delay ); // If neither weight nor delay are given in the dict, we handle this // separately. Important for hom_w synapses, on which weight cannot // be set. However, we use default weight and delay for _all_ types // of synapses. default_weight_and_delay_ = ( default_weight_ && default_delay_ ); #ifdef HAVE_MUSIC // We allow music_channel as alias for receptor_type during // connection setup ( *syn_defaults )[ names::music_channel ] = 0; #endif if ( !default_weight_and_delay_ ) { weight_ = syn_spec->known( names::weight ) ? ConnParameter::create( ( *syn_spec )[ names::weight ], kernel().vp_manager.get_num_threads() ) : ConnParameter::create( ( *syn_defaults )[ names::weight ], kernel().vp_manager.get_num_threads() ); register_parameters_requiring_skipping_( *weight_ ); delay_ = syn_spec->known( names::delay ) ? ConnParameter::create( ( *syn_spec )[ names::delay ], kernel().vp_manager.get_num_threads() ) : ConnParameter::create( ( *syn_defaults )[ names::delay ], kernel().vp_manager.get_num_threads() ); } else if ( default_weight_ ) { delay_ = syn_spec->known( names::delay ) ? ConnParameter::create( ( *syn_spec )[ names::delay ], kernel().vp_manager.get_num_threads() ) : ConnParameter::create( ( *syn_defaults )[ names::delay ], kernel().vp_manager.get_num_threads() ); } register_parameters_requiring_skipping_( *delay_ ); // Structural plasticity parameters // Check if both pre and post synaptic element are provided if ( syn_spec->known( names::pre_synaptic_element ) && syn_spec->known( names::post_synaptic_element ) ) { pre_synaptic_element_name = getValue< std::string >( syn_spec, names::pre_synaptic_element ); post_synaptic_element_name = getValue< std::string >( syn_spec, names::post_synaptic_element ); } else { if ( syn_spec->known( names::pre_synaptic_element ) || syn_spec->known( names::post_synaptic_element ) ) { throw BadProperty( "In order to use structural plasticity, both a pre and post synaptic " "element must be specified" ); } pre_synaptic_element_name = ""; post_synaptic_element_name = ""; } // synapse-specific parameters // TODO: Can we create this set once and for all? // Should not be done as static initialization, since // that might conflict with static initialization of // Name system. std::set< Name > skip_set; skip_set.insert( names::weight ); skip_set.insert( names::delay ); skip_set.insert( Name( "min_delay" ) ); skip_set.insert( Name( "max_delay" ) ); skip_set.insert( Name( "num_connections" ) ); skip_set.insert( Name( "num_connectors" ) ); skip_set.insert( Name( "property_object" ) ); skip_set.insert( Name( "synapsemodel" ) ); for ( Dictionary::const_iterator default_it = syn_defaults->begin(); default_it != syn_defaults->end(); ++default_it ) { const Name param_name = default_it->first; if ( skip_set.find( param_name ) != skip_set.end() ) continue; // weight, delay or not-settable parameter if ( syn_spec->known( param_name ) ) { synapse_params_[ param_name ] = ConnParameter::create( ( *syn_spec )[ param_name ], kernel().vp_manager.get_num_threads() ); register_parameters_requiring_skipping_( *synapse_params_[ param_name ] ); } } // Now create dictionary with dummy values that we will use // to pass settings to the synapses created. We create it here // once to avoid re-creating the object over and over again. if ( synapse_params_.size() > 0 ) { for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t ) { param_dicts_.push_back( new Dictionary() ); for ( ConnParameterMap::const_iterator it = synapse_params_.begin(); it != synapse_params_.end(); ++it ) { if ( it->first == names::receptor_type || it->first == names::music_channel || it->first == names::synapse_label ) ( *param_dicts_[ t ] )[ it->first ] = Token( new IntegerDatum( 0 ) ); else ( *param_dicts_[ t ] )[ it->first ] = Token( new DoubleDatum( 0.0 ) ); } } } // If symmetric_ is requested call reset on all parameters in order // to check if all parameters support symmetric connections if ( symmetric_ ) { if ( weight_ ) { weight_->reset(); } if ( delay_ ) { delay_->reset(); } for ( ConnParameterMap::const_iterator it = synapse_params_.begin(); it != synapse_params_.end(); ++it ) { it->second->reset(); } } }
int main(int argc, char *argv[]) { if (argc != 4) { cout << "Usage: " << argv[0] << " cpu|gpu out_func out_prefix" << endl; return 1; } ImageParam input(UInt(8), 3, "input"); Func clamped("clamped"), grayscale("grayscale"); Func g_x("g_x"), g_y("g_y"), g_mag("g_mag"); Func sobel("sobel"); Var c("c"), x("x"), y("y"); // Algorithm clamped(x, y, c) = input( clamp(x, 0, input.width()-1), clamp(y, 0, input.height()-1), c) / 255.f; grayscale(x, y) = clamped(x, y, 0)*0.299f + clamped(x, y, 1)*0.587f + clamped(x, y, 2)*0.114f; Image<int16_t> kernel(3, 3); kernel(0, 0) = -1; kernel(0, 1) = -2; kernel(0, 2) = -1; kernel(1, 0) = 0; kernel(1, 1) = 0; kernel(1, 2) = 0; kernel(2, 0) = 1; kernel(2, 1) = 2; kernel(2, 2) = 1; RDom r(kernel); g_x(x, y) += kernel(r.x, r.y) * grayscale(x + r.x - 1, y + r.y - 1); g_y(x, y) += kernel(r.y, r.x) * grayscale(x + r.x - 1, y + r.y - 1); g_mag(x, y) = sqrt(g_x(x, y)*g_x(x, y) + g_y(x, y)*g_y(x, y)); sobel(x, y, c) = select(c==3, 255, u8(clamp(g_mag(x, y), 0, 1)*255)); // Channel order input.set_stride(0, 4); input.set_extent(2, 4); sobel.reorder_storage(c, x, y); sobel.output_buffer().set_stride(0, 4); sobel.output_buffer().set_extent(2, 4); // Schedules if (!strcmp(argv[1], "cpu")) { sobel.parallel(y).vectorize(c, 4); } else if (!strcmp(argv[1], "gpu")) { sobel.cuda_tile(x, y, 16, 4); } else { cout << "Invalid schedule type '" << argv[1] << "'" << endl; return 1; } compile(sobel, input, argv[2], argv[3]); return 0; }
void nest::AllToAllBuilder::connect_() { #pragma omp parallel { // get thread id const int tid = kernel().vp_manager.get_thread_id(); try { // allocate pointer to thread specific random generator librandom::RngPtr rng = kernel().rng_manager.get_rng( tid ); for ( GIDCollection::const_iterator tgid = targets_->begin(); tgid != targets_->end(); ++tgid ) { // check whether the target is on this mpi machine if ( not kernel().node_manager.is_local_gid( *tgid ) ) { for ( GIDCollection::const_iterator sgid = sources_->begin(); sgid != sources_->end(); ++sgid ) skip_conn_parameter_( tid ); continue; } Node* const target = kernel().node_manager.get_node( *tgid ); const thread target_thread = target->get_thread(); // check whether the target is on our thread if ( tid != target_thread ) { for ( GIDCollection::const_iterator sgid = sources_->begin(); sgid != sources_->end(); ++sgid ) skip_conn_parameter_( tid ); continue; } for ( GIDCollection::const_iterator sgid = sources_->begin(); sgid != sources_->end(); ++sgid ) { if ( not autapses_ and *sgid == *tgid ) { skip_conn_parameter_( target_thread ); continue; } single_connect_( *sgid, *target, target_thread, rng ); } } } catch ( std::exception& err ) { // We must create a new exception here, err's lifetime ends at // the end of the catch block. exceptions_raised_.at( tid ) = lockPTR< WrappedThreadException >( new WrappedThreadException( err ) ); } } }
int main(int argc, char**argv){ namespace po = boost::program_options; std::string spectra_filename; std::string orig_filename; std::string output_omega_filename; std::string output_diff_filename; std::string output_tau_filename; std::string kernel_name; int n_matsubara, n_tau; kernel_type k_type=standard; double beta; bool multiply_m1divpi=false; po::options_description desc("Allowed options"); desc.add_options() ("help", "show this help") ("beta", po::value<double>(&beta), "inverse temperature") ("n_matsubara", po::value<int>(&n_matsubara)->default_value(-1), "number of matsubara frequencies") ("n_tau", po::value<int>(&n_tau)->default_value(20000), "number of imaginary time points") ("imag_freq_file", po::value<std::string>(&orig_filename)->default_value("G_omega_av.dat"), "input G(i omega_n) to maxent") ("real_freq_file", po::value<std::string>(&spectra_filename)->default_value("spectra.dat"), "output A=-1/pi*ImG(omega) from maxent") ("output_freq_file", po::value<std::string>(&output_omega_filename)->default_value("G_omega_back.dat"), "backcontinued output G(omega) with errors") ("diff_freq_file", po::value<std::string>(&output_diff_filename)->default_value("G_omega_diff.dat"), "difference to input file") ("output_tau_file", po::value<std::string>(&output_tau_filename)->default_value("G_tau_back.dat"), "backcontinued output G(tau) with errors") ("kernel", po::value<std::string>(&kernel_name)->default_value("standard"), "kernel type: standard, anomalous, ...") ("multiply_m1divpi", "if not specified: scales results by -pi, as required if converting ImG to A. If specified: standard Kramers Kronig (required for Sigma/Anomalous/etc backcont)") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); if (vm.count("help")) { std::cout<<desc; return 1; } //toggle between continuation of G to A and continuation of any other quantity with standard Kramers Kronig relation if (vm.count("multiply_m1divpi")) { multiply_m1divpi=true; } std::ifstream orig_file(orig_filename.c_str()); if(!orig_file.good()) throw std::invalid_argument("imag freq file: "+orig_filename+" could not be opened. specify with --imag_freq_file"); std::ifstream spectra_file(spectra_filename.c_str()); if(!spectra_file.good()) throw std::invalid_argument("real freq file: "+spectra_filename+" could not be opened. specify with --real_freq_file"); if(!vm.count("beta")) throw std::runtime_error("you need to specify the inverse temperature with --beta."); if(vm.count("kernel")){ if(kernel_name==std::string("standard")){ k_type=standard; std::cout<<"using standard kernel."<<std::endl; }else if(kernel_name==std::string("anomalous")){ k_type=anomalous; std::cout<<"using anomalous kernel."<<std::endl; }else if(kernel_name==std::string("bosonic")){ k_type=bosonic; std::cout<<"using bosonic kernel."<<std::endl; }else if(kernel_name==std::string("me_bosonic")){ k_type=me_bosonic; std::cout<<"using maxent's bosonic kernel."<<std::endl; }else if(kernel_name==std::string("me_anomalous")){ k_type=me_anomalous; std::cout<<"using maxent's anomalous kernel."<<std::endl; }else{ throw std::runtime_error("kernel type not recognized."); } } std::vector<std::complex<double> > imag_freq_data; std::vector<std::complex<double> > imag_freq_error; std::vector<double > real_freq_data; std::vector<double > real_freq_freq; do{ double dummy, imag_freq_data_real, imag_freq_data_imag, imag_freq_sigma_real, imag_freq_sigma_imag; orig_file>>dummy>>imag_freq_data_real>>imag_freq_data_imag>>imag_freq_sigma_real>>imag_freq_sigma_imag>>std::ws; imag_freq_data.push_back(std::complex<double>(imag_freq_data_real, imag_freq_data_imag)); imag_freq_error.push_back(std::complex<double>(imag_freq_sigma_real, imag_freq_sigma_imag)); }while(orig_file.good()); do{ double frequency, value, defaultm; spectra_file>>frequency>>value>>defaultm>>std::ws; real_freq_data.push_back(value); real_freq_freq.push_back(frequency); }while(spectra_file.good()); std::cout<<"read in files: "<<imag_freq_data.size()<<" matsubara freqs and "<<real_freq_data.size()<<" real frequency points."<<std::endl; if(n_matsubara ==-1) n_matsubara=imag_freq_data.size(); if(real_freq_data[0]+real_freq_data.back() > 1.e-4) std::cerr<<"problem with spectra: does not go to zero at boundary?\n"; std::cout<<real_freq_data[0]<<" "<<real_freq_data.back()<<std::endl; //back-continue to the imaginary axis if(k_type==standard){ std::vector<double > imag_time_back(n_tau,0.); std::ofstream gtau_file(output_tau_filename.c_str()); gtau_file.precision(14); for(int i=0;i<n_tau;++i){ double tau=i/(double)n_tau*beta; imag_time_back[i]=0.; for(int w=1;w<real_freq_freq.size()-1;++w){ double freq =real_freq_freq[w]; double value=real_freq_data[w]; double delta=(real_freq_freq[w+1]-real_freq_freq[w-1])/2.; double kernel=-std::exp(-freq*tau)/(std::exp(-freq*beta)+1); if(!std::isnan(kernel)) imag_time_back[i]+=kernel*value*delta; //std::cout<<freq<<" "<<value<<" "<<delta<<" "<<std::exp(-freq*tau)/(std::exp(-freq*beta)+1)*value*delta<<" "<<imag_time_back[i]<<std::endl; } double kernel1=-std::exp(-real_freq_freq[0]*tau )/(std::exp(-real_freq_freq[0] *beta)+1); double kernel2=-std::exp(-real_freq_freq.back()*tau)/(std::exp(-real_freq_freq.back()*beta)+1); if(!std::isnan(kernel1)) imag_time_back[i]+=kernel1*real_freq_data[0]*(real_freq_freq[1]-real_freq_freq[0])/2.; if(!std::isnan(kernel2)) imag_time_back[i]+=kernel2*real_freq_data.back()*(real_freq_freq.back()-real_freq_freq[real_freq_freq.size()-2])/2.; if(multiply_m1divpi) imag_time_back[i]*=-1./M_PI; gtau_file<<tau<<" "<<imag_time_back[i]<<std::endl; } } std::vector<std::complex<double> > imag_freq_data_back(n_matsubara); std::ofstream gomega_file(output_omega_filename.c_str()); gomega_file.precision(14); int n=0; //don't compute bosonic singular value @ n=0 if(k_type==bosonic || k_type==anomalous){ n++; std::cout<< "Warning: kernel is singular at iomega_n=0. Skipping..." <<std::endl; } for(;n<n_matsubara;++n){ double omega_n; if(k_type==standard) omega_n=(2.*n+1)*M_PI/beta; else omega_n=(2.*n)*M_PI/beta; imag_freq_data_back[n]=0.; for(int w=1;w<real_freq_freq.size()-1;++w){ double freq =real_freq_freq[w]; double value=real_freq_data[w]; double delta=(real_freq_freq[w+1]-real_freq_freq[w-1])/2.; std::complex<double> kernel_val=kernel(omega_n, freq,k_type); imag_freq_data_back[n]+=kernel_val*value*delta; } std::complex<double> kernel1=kernel(omega_n, real_freq_freq[0],k_type); std::complex<double> kernel2=kernel(omega_n, real_freq_freq.back(),k_type); imag_freq_data_back[n]+=kernel1*real_freq_data[0]*(real_freq_freq[1]-real_freq_freq[0])/2.; imag_freq_data_back[n]+=kernel2*real_freq_data.back()*(real_freq_freq.back()-real_freq_freq[real_freq_freq.size()-2])/2.; if(multiply_m1divpi) imag_freq_data_back[n]*=-1./M_PI; gomega_file<<omega_n<<" "<<imag_freq_data_back[n].real()<<" "<<imag_freq_data_back[n].imag()<<std::endl; } std::ofstream gomega_diff_file(output_diff_filename.c_str()); for(int n=0;n<n_matsubara;++n){ double diff_real=imag_freq_data[n].real()-imag_freq_data_back[n].real(); double diff_imag=imag_freq_data[n].imag()-imag_freq_data_back[n].imag(); gomega_diff_file<<(2.*n+1)*M_PI/beta<<" "<<diff_real<<" "<<diff_imag<<" "<<imag_freq_error[n].real()<<" "<<imag_freq_error[n].imag()<<std::endl; } }