Beispiel #1
0
static SkImageFilter* make_image_filter(bool canBeNull = true) {
    SkImageFilter* filter = 0;

    // Add a 1 in 3 chance to get a NULL input
    if (canBeNull && (R(3) == 1)) { return filter; }

    enum { ALPHA_THRESHOLD, MERGE, COLOR, BLUR, MAGNIFIER,
           DOWN_SAMPLE, XFERMODE, OFFSET, MATRIX, MATRIX_CONVOLUTION, COMPOSE,
           DISTANT_LIGHT, POINT_LIGHT, SPOT_LIGHT, NOISE, DROP_SHADOW,
           MORPHOLOGY, BITMAP, DISPLACE, TILE, PICTURE, NUM_FILTERS };

    switch (R(NUM_FILTERS)) {
    case ALPHA_THRESHOLD:
        filter = SkAlphaThresholdFilter::Create(make_region(), make_scalar(), make_scalar());
        break;
    case MERGE:
        filter = SkMergeImageFilter::Create(make_image_filter(), make_image_filter(), make_xfermode());
        break;
    case COLOR:
    {
        SkAutoTUnref<SkColorFilter> cf((R(2) == 1) ?
                 SkColorFilter::CreateModeFilter(make_color(), make_xfermode()) :
                 SkColorFilter::CreateLightingFilter(make_color(), make_color()));
        filter = cf.get() ? SkColorFilterImageFilter::Create(cf, make_image_filter()) : 0;
    }
        break;
    case BLUR:
        filter = SkBlurImageFilter::Create(make_scalar(true), make_scalar(true), make_image_filter());
        break;
    case MAGNIFIER:
        filter = SkMagnifierImageFilter::Create(make_rect(), make_scalar(true));
        break;
    case DOWN_SAMPLE:
        filter = SkDownSampleImageFilter::Create(make_scalar());
        break;
    case XFERMODE:
    {
        SkAutoTUnref<SkXfermode> mode(SkXfermode::Create(make_xfermode()));
        filter = SkXfermodeImageFilter::Create(mode, make_image_filter(), make_image_filter());
    }
        break;
    case OFFSET:
        filter = SkOffsetImageFilter::Create(make_scalar(), make_scalar(), make_image_filter());
        break;
    case MATRIX:
        filter = SkMatrixImageFilter::Create(make_matrix(),
                                             (SkPaint::FilterLevel)R(4),
                                             make_image_filter());
        break;
    case MATRIX_CONVOLUTION:
    {
        SkImageFilter::CropRect cropR(SkRect::MakeWH(SkIntToScalar(kBitmapSize),
                                                     SkIntToScalar(kBitmapSize)));
        SkISize size = SkISize::Make(R(10)+1, R(10)+1);
        int arraySize = size.width() * size.height();
        SkTArray<SkScalar> kernel(arraySize);
        for (int i = 0; i < arraySize; ++i) {
            kernel.push_back() = make_scalar();
        }
        SkIPoint kernelOffset = SkIPoint::Make(R(SkIntToScalar(size.width())),
                                               R(SkIntToScalar(size.height())));
        filter = SkMatrixConvolutionImageFilter::Create(size,
                                                        kernel.begin(),
                                                        make_scalar(),
                                                        make_scalar(),
                                                        kernelOffset,
                                                        (SkMatrixConvolutionImageFilter::TileMode)R(3),
                                                        R(2) == 1,
                                                        make_image_filter(),
                                                        &cropR);
    }
        break;
    case COMPOSE:
        filter = SkComposeImageFilter::Create(make_image_filter(), make_image_filter());
        break;
    case DISTANT_LIGHT:
        filter = (R(2) == 1) ?
                 SkLightingImageFilter::CreateDistantLitDiffuse(make_point(),
                 make_color(), make_scalar(), make_scalar(), make_image_filter()) :
                 SkLightingImageFilter::CreateDistantLitSpecular(make_point(),
                 make_color(), make_scalar(), make_scalar(), SkIntToScalar(R(10)),
                 make_image_filter());
        break;
    case POINT_LIGHT:
        filter = (R(2) == 1) ?
                 SkLightingImageFilter::CreatePointLitDiffuse(make_point(),
                 make_color(), make_scalar(), make_scalar(), make_image_filter()) :
                 SkLightingImageFilter::CreatePointLitSpecular(make_point(),
                 make_color(), make_scalar(), make_scalar(), SkIntToScalar(R(10)),
                 make_image_filter());
        break;
    case SPOT_LIGHT:
        filter = (R(2) == 1) ?
                 SkLightingImageFilter::CreateSpotLitDiffuse(SkPoint3(0, 0, 0),
                 make_point(), make_scalar(), make_scalar(), make_color(),
                 make_scalar(), make_scalar(), make_image_filter()) :
                 SkLightingImageFilter::CreateSpotLitSpecular(SkPoint3(0, 0, 0),
                 make_point(), make_scalar(), make_scalar(), make_color(),
                 make_scalar(), make_scalar(), SkIntToScalar(R(10)), make_image_filter());
        break;
    case NOISE:
    {
        SkAutoTUnref<SkShader> shader((R(2) == 1) ?
            SkPerlinNoiseShader::CreateFractalNoise(
                make_scalar(true), make_scalar(true), R(10.0f), make_scalar()) :
            SkPerlinNoiseShader::CreateTurbulence(
                make_scalar(true), make_scalar(true), R(10.0f), make_scalar()));
        SkImageFilter::CropRect cropR(SkRect::MakeWH(SkIntToScalar(kBitmapSize),
                                                     SkIntToScalar(kBitmapSize)));
        filter = SkRectShaderImageFilter::Create(shader, &cropR);
    }
        break;
    case DROP_SHADOW:
        filter = SkDropShadowImageFilter::Create(make_scalar(), make_scalar(),
                     make_scalar(true), make_scalar(true), make_color(), make_image_filter());
        break;
    case MORPHOLOGY:
        if (R(2) == 1) {
            filter = SkDilateImageFilter::Create(R(static_cast<float>(kBitmapSize)),
                R(static_cast<float>(kBitmapSize)), make_image_filter());
        } else {
            filter = SkErodeImageFilter::Create(R(static_cast<float>(kBitmapSize)),
                R(static_cast<float>(kBitmapSize)), make_image_filter());
        }
        break;
    case BITMAP:
        if (R(2) == 1) {
            filter = SkBitmapSource::Create(make_bitmap(), make_rect(), make_rect());
        } else {
            filter = SkBitmapSource::Create(make_bitmap());
        }
        break;
    case DISPLACE:
        filter = SkDisplacementMapEffect::Create(make_channel_selector_type(),
                                                 make_channel_selector_type(), make_scalar(),
                                                 make_image_filter(false), make_image_filter());
        break;
    case TILE:
        filter = SkTileImageFilter::Create(make_rect(), make_rect(), make_image_filter(false));
        break;
    case PICTURE:
    {
        SkRTreeFactory factory;
        SkPictureRecorder recorder;
        SkCanvas* recordingCanvas = recorder.beginRecording(SkIntToScalar(kBitmapSize), 
                                                            SkIntToScalar(kBitmapSize), 
                                                            &factory, 0);
        drawSomething(recordingCanvas);
        SkAutoTUnref<SkPicture> pict(recorder.endRecording());
        filter = SkPictureImageFilter::Create(pict.get(), make_rect());
    }
        break;
    default:
        break;
    }
    return (filter || canBeNull) ? filter : make_image_filter(canBeNull);
}
inline void
nest::ConnBuilder::single_connect_( index sgid,
  Node& target,
  thread target_thread,
  librandom::RngPtr& rng )
{
  if ( param_dicts_.empty() ) // indicates we have no synapse params
  {
    if ( default_weight_and_delay_ )
      kernel().connection_manager.connect(
        sgid, &target, target_thread, synapse_model_ );
    else if ( default_weight_ )
      kernel().connection_manager.connect( sgid,
        &target,
        target_thread,
        synapse_model_,
        delay_->value_double( target_thread, rng ) );
    else
    {
      double delay = delay_->value_double( target_thread, rng );
      double weight = weight_->value_double( target_thread, rng );
      kernel().connection_manager.connect(
        sgid, &target, target_thread, synapse_model_, delay, weight );
    }
  }
  else
  {
    assert( kernel().vp_manager.get_num_threads() == param_dicts_.size() );

    for ( ConnParameterMap::const_iterator it = synapse_params_.begin();
          it != synapse_params_.end();
          ++it )
    {
      if ( it->first == names::receptor_type
        || it->first == names::music_channel
        || it->first == names::synapse_label )
      {
        try
        {
          // change value of dictionary entry without allocating new datum
          IntegerDatum* id = static_cast< IntegerDatum* >(
            ( ( *param_dicts_[ target_thread ] )[ it->first ] ).datum() );
          ( *id ) = it->second->value_int( target_thread, rng );
        }
        catch ( KernelException& e )
        {
          if ( it->first == names::receptor_type )
          {
            throw BadProperty( "Receptor type must be of type integer." );
          }
          else if ( it->first == names::music_channel )
          {
            throw BadProperty( "Music channel type must be of type integer." );
          }
          else if ( it->first == names::synapse_label )
          {
            throw BadProperty( "Synapse label must be of type integer." );
          }
        }
      }
      else
      {
        // change value of dictionary entry without allocating new datum
        DoubleDatum* dd = static_cast< DoubleDatum* >(
          ( ( *param_dicts_[ target_thread ] )[ it->first ] ).datum() );
        ( *dd ) = it->second->value_double( target_thread, rng );
      }
    }

    if ( default_weight_and_delay_ )
      kernel().connection_manager.connect( sgid,
        &target,
        target_thread,
        synapse_model_,
        param_dicts_[ target_thread ] );
    else if ( default_weight_ )
      kernel().connection_manager.connect( sgid,
        &target,
        target_thread,
        synapse_model_,
        param_dicts_[ target_thread ],
        delay_->value_double( target_thread, rng ) );
    else
    {
      double delay = delay_->value_double( target_thread, rng );
      double weight = weight_->value_double( target_thread, rng );
      kernel().connection_manager.connect( sgid,
        &target,
        target_thread,
        synapse_model_,
        param_dicts_[ target_thread ],
        delay,
        weight );
    }
  }
}
void
nest::OneToOneBuilder::connect_()
{
  // make sure that target and source population have the same size
  if ( sources_->size() != targets_->size() )
  {
    LOG( M_ERROR,
      "Connect",
      "Source and Target population must be of the same size." );
    throw DimensionMismatch();
  }

#pragma omp parallel
  {
    // get thread id
    const int tid = kernel().vp_manager.get_thread_id();

    try
    {
      // allocate pointer to thread specific random generator
      librandom::RngPtr rng = kernel().rng_manager.get_rng( tid );

      for ( GIDCollection::const_iterator tgid = targets_->begin(),
                                          sgid = sources_->begin();
            tgid != targets_->end();
            ++tgid, ++sgid )
      {
        assert( sgid != sources_->end() );

        if ( *sgid == *tgid and not autapses_ )
          continue;

        // check whether the target is on this mpi machine
        if ( not kernel().node_manager.is_local_gid( *tgid ) )
        {
          skip_conn_parameter_( tid );
          continue;
        }

        Node* const target = kernel().node_manager.get_node( *tgid );
        const thread target_thread = target->get_thread();

        // check whether the target is on our thread
        if ( tid != target_thread )
        {
          skip_conn_parameter_( tid );
          continue;
        }

        single_connect_( *sgid, *target, target_thread, rng );
      }
    }
    catch ( std::exception& err )
    {
      // We must create a new exception here, err's lifetime ends at
      // the end of the catch block.
      exceptions_raised_.at( tid ) =
        lockPTR< WrappedThreadException >( new WrappedThreadException( err ) );
    }
  }
}
// returns the done value
bool
EventDeliveryManager::deliver_events( thread t )
{
  // are we done?
  bool done = true;

  // deliver only at beginning of time slice
  if ( kernel().simulation_manager.get_from_step() > 0 )
    return done;

  SpikeEvent se;

  std::vector< int > pos( displacements_ );

  if ( !off_grid_spiking_ ) // on_grid_spiking
  {
    // prepare Time objects for every possible time stamp within min_delay_
    std::vector< Time > prepared_timestamps(
      kernel().connection_manager.get_min_delay() );
    for ( size_t lag = 0;
          lag < ( size_t ) kernel().connection_manager.get_min_delay();
          lag++ )
    {
      prepared_timestamps[ lag ] =
        kernel().simulation_manager.get_clock() - Time::step( lag );
    }

    for ( size_t vp = 0;
          vp < ( size_t ) kernel().vp_manager.get_num_virtual_processes();
          ++vp )
    {
      size_t pid = kernel().mpi_manager.get_process_id( vp );
      int pos_pid = pos[ pid ];
      int lag = kernel().connection_manager.get_min_delay() - 1;
      while ( lag >= 0 )
      {
        index nid = global_grid_spikes_[ pos_pid ];
        if ( nid != static_cast< index >( comm_marker_ ) )
        {
          // tell all local nodes about spikes on remote machines.
          se.set_stamp( prepared_timestamps[ lag ] );
          se.set_sender_gid( nid );
          kernel().connection_manager.send( t, nid, se );
        }
        else
        {
          --lag;
        }
        ++pos_pid;
      }
      pos[ pid ] = pos_pid;
    }

    // here we are done with the spiking events
    // pos[pid] for each pid now points to the first entry of
    // the secondary events

    for ( size_t pid = 0;
          pid < ( size_t ) kernel().mpi_manager.get_num_processes();
          ++pid )
    {
      std::vector< unsigned int >::iterator readpos =
        global_grid_spikes_.begin() + pos[ pid ];

      while ( true )
      {
        // we must not use unsigned int for the type, otherwise
        // the encoding will be different on JUQUEEN for the
        // index written into the buffer and read out of it
        synindex synid;
        read_from_comm_buffer( synid, readpos );

        if ( synid == invalid_synindex )
          break;
        --readpos;

        kernel().model_manager.assert_valid_syn_id( synid );

        kernel().model_manager.get_secondary_event_prototype( synid, t )
          << readpos;

        kernel().connection_manager.send_secondary(
          t, kernel().model_manager.get_secondary_event_prototype( synid, t ) );
      } // of while (true)

      // read the done value of the p-th num_process

      // must be a bool (same type as on the sending side)
      // otherwise the encoding will be inconsistent on JUQUEEN
      bool done_p;
      read_from_comm_buffer( done_p, readpos );
      done = done && done_p;
    }
  }
  else // off grid spiking
  {
    // prepare Time objects for every possible time stamp within min_delay_
    std::vector< Time > prepared_timestamps(
      kernel().connection_manager.get_min_delay() );
    for ( size_t lag = 0;
          lag < ( size_t ) kernel().connection_manager.get_min_delay();
          lag++ )
    {
      prepared_timestamps[ lag ] =
        kernel().simulation_manager.get_clock() - Time::step( lag );
    }

    for ( size_t vp = 0;
          vp < ( size_t ) kernel().vp_manager.get_num_virtual_processes();
          ++vp )
    {
      size_t pid = kernel().mpi_manager.get_process_id( vp );
      int pos_pid = pos[ pid ];
      int lag = kernel().connection_manager.get_min_delay() - 1;
      while ( lag >= 0 )
      {
        index nid = global_offgrid_spikes_[ pos_pid ].get_gid();
        if ( nid != static_cast< index >( comm_marker_ ) )
        {
          // tell all local nodes about spikes on remote machines.
          se.set_stamp( prepared_timestamps[ lag ] );
          se.set_sender_gid( nid );
          se.set_offset( global_offgrid_spikes_[ pos_pid ].get_offset() );
          kernel().connection_manager.send( t, nid, se );
        }
        else
        {
          --lag;
        }
        ++pos_pid;
      }
      pos[ pid ] = pos_pid;
    }
  }

  return done;
}
void
nest::FixedTotalNumberBuilder::connect_()
{
  const int_t M = kernel().vp_manager.get_num_virtual_processes();
  const long_t size_sources = sources_->size();
  const long_t size_targets = targets_->size();

  // drawing connection ids

  // Compute the distribution of targets over processes using the modulo
  // function
  std::vector< std::vector< size_t > > targets_on_vp( M );
  for ( size_t t = 0; t < targets_->size(); t++ )
  {
    targets_on_vp[ kernel().vp_manager.suggest_vp( ( *targets_ )[ t ] ) ]
      .push_back( ( *targets_ )[ t ] );
  }

  // We use the multinomial distribution to determine the number of
  // connections that will be made on one virtual process, i.e. we
  // partition the set of edges into n_vps subsets. The number of
  // edges on one virtual process is binomially distributed with
  // the boundary condition that the sum of all edges over virtual
  // processes is the total number of edges.
  // To obtain the num_conns_on_vp we adapt the gsl
  // implementation of the multinomial distribution.

  // K from gsl is equivalent to M = n_vps
  // N is already taken from stack
  // p[] is targets_on_vp
  std::vector< long_t > num_conns_on_vp( M, 0 ); // corresponds to n[]

  // calculate exact multinomial distribution
  // get global rng that is tested for synchronization for all threads
  librandom::RngPtr grng = kernel().rng_manager.get_grng();

  // HEP: instead of counting upwards, we might count remaining_targets and
  // remaining_partitions down. why?
  // begin code adapted from gsl 1.8 //
  double_t sum_dist = 0.0; // corresponds to sum_p
  // norm is equivalent to size_targets
  uint_t sum_partitions = 0; // corresponds to sum_n
// substituting gsl_ran call
#ifdef HAVE_GSL
  librandom::GSL_BinomialRandomDev bino( grng, 0, 0 );
#else
  librandom::BinomialRandomDev bino( grng, 0, 0 );
#endif

  for ( int k = 0; k < M; k++ )
  {
    if ( targets_on_vp[ k ].size() > 0 )
    {
      double_t num_local_targets =
        static_cast< double_t >( targets_on_vp[ k ].size() );
      double_t p_local = num_local_targets / ( size_targets - sum_dist );
      bino.set_p( p_local );
      bino.set_n( N_ - sum_partitions );
      num_conns_on_vp[ k ] = bino.ldev();
    }

    sum_dist += static_cast< double_t >( targets_on_vp[ k ].size() );
    sum_partitions += static_cast< uint_t >( num_conns_on_vp[ k ] );
  }

// end code adapted from gsl 1.8

#pragma omp parallel
  {
    // get thread id
    const int tid = kernel().vp_manager.get_thread_id();

    try
    {
      // allocate pointer to thread specific random generator
      const int_t vp_id = kernel().vp_manager.thread_to_vp( tid );

      if ( kernel().vp_manager.is_local_vp( vp_id ) )
      {
        librandom::RngPtr rng = kernel().rng_manager.get_rng( tid );

        while ( num_conns_on_vp[ vp_id ] > 0 )
        {

          // draw random numbers for source node from all source neurons
          const long_t s_index = rng->ulrand( size_sources );
          // draw random numbers for target node from
          // targets_on_vp on this virtual process
          const long_t t_index = rng->ulrand( targets_on_vp[ vp_id ].size() );
          // map random number of source node to gid corresponding to
          // the source_adr vector
          const long_t sgid = ( *sources_ )[ s_index ];
          // map random number of target node to gid using the
          // targets_on_vp vector
          const long_t tgid = targets_on_vp[ vp_id ][ t_index ];

          Node* const target = kernel().node_manager.get_node( tgid );
          const thread target_thread = target->get_thread();

          if ( autapses_ or sgid != tgid )
          {
            single_connect_( sgid, *target, target_thread, rng );
            num_conns_on_vp[ vp_id ]--;
          }
        }
      }
    }
    catch ( std::exception& err )
    {
      // We must create a new exception here, err's lifetime ends at
      // the end of the catch block.
      exceptions_raised_.at( tid ) =
        lockPTR< WrappedThreadException >( new WrappedThreadException( err ) );
    }
  }
}
Beispiel #6
0
inline kernel_call bluestein_mul_out(
        const backend::command_queue &queue, size_t batch, size_t p,
        size_t radix, size_t threads, size_t stride,
        const backend::device_vector<T2> &data,
        const backend::device_vector<T2> &exp,
        const backend::device_vector<T2> &out
        )
{
    backend::source_generator o;
    kernel_common<T>(o, queue);
    mul_code<T2>(o, false);

    o.function<T2>("scale").open("(")
        .template parameter<T2>("x")
        .template parameter<T >("a")
    .close(")").open("{");

    o.new_line() << type_name<T2>() << " r = {x.x * a, x.y * a};";
    o.new_line() << "return r;";
    o.close("}");

    o.kernel("bluestein_mul_out").open("(")
        .template parameter< global_ptr<const T2> >("data")
        .template parameter< global_ptr<const T2> >("exp")
        .template parameter< global_ptr<      T2> >("output")
        .template parameter< T                    >("div")
        .template parameter< cl_uint              >("p")
        .template parameter< cl_uint              >("in_stride")
        .template parameter< cl_uint              >("radix")
    .close(")").open("{");

    o.new_line() << "const size_t i = " << o.global_id(0) << ";";
    o.new_line() << "const size_t threads = " << o.global_size(0) << ";";
    o.new_line() << "const size_t b = " << o.global_id(1) << ";";
    o.new_line() << "const size_t l = " << o.global_id(2) << ";";

    o.new_line() << "if(l < radix)";
    o.open("{");

    o.new_line() << "const size_t k = i % p;";
    o.new_line() << "const size_t j = k + (i - k) * radix;";
    o.new_line() << "const size_t in_off = i * in_stride + b * in_stride * threads + l;";
    o.new_line() << "const size_t out_off = j + b * threads * radix + l * p;";

    o.new_line() << "output[out_off] = mul(scale(data[in_off], div), exp[l]);";

    o.close("}");
    o.close("}");

    backend::kernel kernel(queue, o.str(), "bluestein_mul_out");
    kernel.push_arg(data);
    kernel.push_arg(exp);
    kernel.push_arg(out);
    kernel.push_arg(static_cast<T>(1.0 / stride));
    kernel.push_arg(static_cast<cl_uint>(p));
    kernel.push_arg(static_cast<cl_uint>(stride));
    kernel.push_arg(static_cast<cl_uint>(radix));

    const size_t wg = kernel.preferred_work_group_size_multiple(queue);
    const size_t radix_pad = (radix + wg - 1) / wg;

    kernel.config(
            backend::ndrange(threads, batch, radix_pad),
            backend::ndrange(      1,     1,        wg)
            );

    std::ostringstream desc;
    desc << "bluestein_mul_out{r=" << radix << "(" << radix_pad << "), wg=" << wg << ", batch=" << batch << ", p=" << p << ", thr=" << threads << ", stride=" << stride << "}";
    return kernel_call(false, desc.str(), kernel);
}
void
EventDeliveryManager::configure_spike_buffers()
{
  assert( kernel().connection_manager.get_min_delay() != 0 );

  spike_register_.clear();
  // the following line does not compile with gcc <= 3.3.5
  spike_register_.resize( kernel().vp_manager.get_num_threads(),
    std::vector< std::vector< unsigned int > >(
                            kernel().connection_manager.get_min_delay() ) );
  for ( size_t j = 0; j < spike_register_.size(); ++j )
    for ( size_t k = 0; k < spike_register_[ j ].size(); ++k )
      spike_register_[ j ][ k ].clear();

  offgrid_spike_register_.clear();
  // the following line does not compile with gcc <= 3.3.5
  offgrid_spike_register_.resize(
    kernel().vp_manager.get_num_threads(),
    std::vector< std::vector< OffGridSpike > >(
      kernel().connection_manager.get_min_delay() ) );
  for ( size_t j = 0; j < offgrid_spike_register_.size(); ++j )
    for ( size_t k = 0; k < offgrid_spike_register_[ j ].size(); ++k )
      offgrid_spike_register_[ j ][ k ].clear();


  // this should also clear all contained elements
  // so no loop required
  secondary_events_buffer_.clear();
  secondary_events_buffer_.resize( kernel().vp_manager.get_num_threads() );


  // send_buffer must be >= 2 as the 'overflow' signal takes up 2 spaces
  // plus the fiunal marker and the done flag for iterations
  // + 1 for the final markers of each thread (invalid_synindex) of secondary
  // events
  // + 1 for the done flag (true) of each process
  int send_buffer_size = kernel().vp_manager.get_num_threads()
          * kernel().connection_manager.get_min_delay()
        + 2
      > 4
    ? kernel().vp_manager.get_num_threads()
        * kernel().connection_manager.get_min_delay()
      + 2
    : 4;
  int recv_buffer_size =
    send_buffer_size * kernel().mpi_manager.get_num_processes();
  kernel().mpi_manager.set_buffer_sizes( send_buffer_size, recv_buffer_size );

  // DEC cxx required 0U literal, HEP 2007-03-26
  local_grid_spikes_.clear();
  local_grid_spikes_.resize( send_buffer_size, 0U );
  local_offgrid_spikes_.clear();
  local_offgrid_spikes_.resize( send_buffer_size, OffGridSpike( 0, 0.0 ) );

  global_grid_spikes_.clear();
  global_grid_spikes_.resize( recv_buffer_size, 0U );

  // insert the end marker for payload event (==invalid_synindex)
  // and insert the done flag (==true)
  // after min_delay 0's (== comm_marker)
  // use the template functions defined in event.h
  // this only needs to be done for one process, because displacements is set to
  // 0 so all processes initially read out the same positions in the global
  // spike buffer
  std::vector< unsigned int >::iterator pos = global_grid_spikes_.begin()
    + kernel().vp_manager.get_num_threads()
      * kernel().connection_manager.get_min_delay();
  write_to_comm_buffer( invalid_synindex, pos );
  write_to_comm_buffer( true, pos );

  global_offgrid_spikes_.clear();
  global_offgrid_spikes_.resize( recv_buffer_size, OffGridSpike( 0, 0.0 ) );

  displacements_.clear();
  displacements_.resize( kernel().mpi_manager.get_num_processes(), 0 );
}
Beispiel #8
0
/* ==============================================================
 Main MEX function - interface to Matlab.
============================================================== */
void mexFunction( int nlhs, mxArray *plhs[],
		  int nrhs, const mxArray *prhs[] )
{
   long i, j, k, m;
   long nsv, new_dim, num_data;
   double *Alpha;
   double *b;
   double *Y;
   double k_ij;

  
   ker_cnt = 0;

   /* Y = kernelproj_mex(X, Alpha, b, sv_X, ker, arg ) */
   /* ------------------------------------------- */
   if( nrhs == 6) 
   {
      /* data matrix [dim x num_data] */
      if( !mxIsNumeric(prhs[0]) || !mxIsDouble(prhs[0]) ||
        mxIsEmpty(prhs[0])    || mxIsComplex(prhs[0]) )
        mexErrMsgTxt("Input data must be a real matrix.");

      /* multipliers Alpha [nsv  x new_dim] */
      if( !mxIsNumeric(prhs[1]) || !mxIsDouble(prhs[1]) ||
        mxIsEmpty(prhs[1])    || mxIsComplex(prhs[1]) )
        mexErrMsgTxt("Input Alpha must be a real matrix.");

      /* vector b [nsv  x 1] */
      if( !mxIsNumeric(prhs[2]) || !mxIsDouble(prhs[2]) ||
        mxIsEmpty(prhs[2])    || mxIsComplex(prhs[2]) )
        mexErrMsgTxt("Input b must be a real vector.");

      /* kernel identifier */
      ker = kernel_id( prhs[4] );
      if( ker == -1 ) 
        mexErrMsgTxt("Improper kernel identifier.");
      
     /*  get pointer to arguments  */
     arg1 = mxGetPr(prhs[5]);

     /* get pointer at input vectors */
     dataA = mxGetPr(prhs[0]);   
     Alpha = mxGetPr(prhs[1]);
     b = mxGetPr(prhs[2]);
     dataB = mxGetPr(prhs[3]);  

     /* get data dimensions */ 
     dim = mxGetM(prhs[0]);      
     num_data = mxGetN(prhs[0]);       
     nsv = mxGetM(prhs[1]);
     new_dim = mxGetN(prhs[1]);

     if( mxGetM(prhs[2]) != new_dim)
        mexErrMsgTxt("Number of rows of Alpha must equal to size of vector b.");

     /* creates output kernel matrix. */
     plhs[0] = mxCreateDoubleMatrix(new_dim,num_data,mxREAL);
     Y = mxGetPr(plhs[0]);

     /* computes kernel projection */
     for( i = 0; i < num_data; i++ ) {

       for( k = 0; k < new_dim; k++) { 
         Y[k+i*new_dim] = b[k]; 
       }

       for( j = 0; j < nsv; j++ ) {
         k_ij = kernel(i,j);

         for( k = 0; k < new_dim; k++) { 
           if(Alpha[j+k*nsv] != 0 )
              Y[k+i*new_dim] += k_ij*Alpha[j+k*nsv]; 
         }
       }
     }
   } 
   else
   {
      mexErrMsgTxt("Wrong number of input arguments.");
   }

   return;
}
void
nest::iaf_psc_alpha_presc::update( Time const& origin,
  const long_t from,
  const long_t to )
{
  assert( to >= 0 );
  assert( static_cast< delay >( from )
    < kernel().connection_manager.get_min_delay() );
  assert( from < to );

  /* Neurons may have been initialized to superthreshold potentials.
     We need to check for this here and issue spikes at the beginning of
     the interval.
  */
  if ( S_.y3_ >= P_.U_th_ )
  {
    S_.last_spike_step_ = origin.get_steps() + from + 1;
    S_.last_spike_offset_ =
      V_.h_ms_ * ( 1 - std::numeric_limits< double_t >::epsilon() );

    // reset neuron and make it refractory
    S_.y3_ = P_.U_reset_;
    S_.r_ = V_.refractory_steps_;

    // send spike
    set_spiketime( Time::step( S_.last_spike_step_ ), S_.last_spike_offset_ );

    SpikeEvent se;
    se.set_offset( S_.last_spike_offset_ );
    kernel().event_delivery_manager.send( *this, se, from );
  }

  for ( long_t lag = from; lag < to; ++lag )
  {
    // time at start of update step
    const long_t T = origin.get_steps() + lag;

    // save state at beginning of interval for spike-time interpolation
    V_.y0_before_ = S_.y0_;
    V_.y1_before_ = S_.y1_;
    V_.y2_before_ = S_.y2_;
    V_.y3_before_ = S_.y3_;

    /* obtain input to y3_
       We need to collect this value even while the neuron is refractory,
       since we need to clear any spikes that have come in from the
       ring buffer.
    */
    const double_t dy3 = B_.spike_y3_.get_value( lag );

    if ( S_.r_ == 0 )
    {
      // neuron is not refractory
      S_.y3_ = V_.P30_ * ( P_.I_e_ + S_.y0_ ) + V_.P31_ * S_.y1_
        + V_.P32_ * S_.y2_ + V_.expm1_tau_m_ * S_.y3_ + S_.y3_;

      S_.y3_ += dy3; // add input
      // enforce lower bound
      S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ );
    }
    else if ( S_.r_ == 1 )
    {
      // neuron returns from refractoriness during interval
      S_.r_ = 0;

      // Iterate third component (membrane pot) from end of
      // refractory period to end of interval.  As first-order
      // approximation, add a proportion of the effect of synaptic
      // input during the interval to membrane pot.  The proportion
      // is given by the part of the interval after the end of the
      // refractory period.
      S_.y3_ = P_.U_reset_ + // try fix 070623, md
        update_y3_delta_() + dy3
        - dy3 * ( 1 - S_.last_spike_offset_ / V_.h_ms_ );

      // enforce lower bound
      S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ );
    }
    else
    {
      // neuron is refractory
      // y3_ remains unchanged at 0.0
      --S_.r_;
    }

    // update synaptic currents
    S_.y2_ = V_.expm1_tau_syn_ * V_.h_ms_ * S_.y1_ + V_.expm1_tau_syn_ * S_.y2_
      + V_.h_ms_ * S_.y1_ + S_.y2_;
    S_.y1_ = V_.expm1_tau_syn_ * S_.y1_ + S_.y1_;

    // add synaptic inputs from the ring buffer
    // this must happen BEFORE threshold-crossing interpolation,
    // since synaptic inputs occured during the interval
    S_.y1_ += B_.spike_y1_.get_value( lag );
    S_.y2_ += B_.spike_y2_.get_value( lag );


    // neuron spikes
    if ( S_.y3_ >= P_.U_th_ )
    {
      // compute spike time
      S_.last_spike_step_ = T + 1;

      // The time for the threshpassing
      S_.last_spike_offset_ = V_.h_ms_ - thresh_find_( V_.h_ms_ );

      // reset AFTER spike-time interpolation
      S_.y3_ = P_.U_reset_;
      S_.r_ = V_.refractory_steps_;

      // sent event
      set_spiketime( Time::step( S_.last_spike_step_ ), S_.last_spike_offset_ );

      SpikeEvent se;
      se.set_offset( S_.last_spike_offset_ );
      kernel().event_delivery_manager.send( *this, se, lag );
    }

    // Set new input current. The current change occurs at the
    // end of the interval and thus must come AFTER the threshold-
    // crossing interpolation
    S_.y0_ = B_.currents_.get_value( lag );

    // logging
    B_.logger_.record_data( origin.get_steps() + lag );

  } // from lag = from ...
}
Beispiel #10
0
int main(int argc, char* argv[])
{
  const int m = (1 < argc ? atoi(argv[1]) : 16);
  const int n = (2 < argc ? atoi(argv[2]) : m);
  const int unsigned ldi = LIBXSMM_MAX(3 < argc ? atoi(argv[3]) : 0, m);
  const int unsigned ldo = LIBXSMM_MAX(4 < argc ? atoi(argv[4]) : 0, m);
  const int unroll = (5 < argc ? atoi(argv[5]) : 1);
  const int prefetch = (6 < argc ? atoi(argv[6]) : 0);
  const int flags = ((7 < argc && 0 != atoi(argv[7])) ? LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE : 0);
  const int iters = (8 < argc ? atoi(argv[8]) : 1);

  /* we should modify to test all data-types */
  const libxsmm_mcopy_descriptor* desc;
  libxsmm_xmcopyfunction kernel;
  libxsmm_descriptor_blob blob;
  libxsmm_timer_tickint l_start;
  libxsmm_timer_tickint l_end;
  int error = 0, i, j;
  ELEM_TYPE *a, *b;
  double copy_time;

  printf("This is a tester for JIT matcopy kernels!\n");
  desc = libxsmm_mcopy_descriptor_init(&blob, sizeof(ELEM_TYPE),
    m, n, ldo, ldi, flags, prefetch, &unroll);

  a = (ELEM_TYPE*)malloc(n * ldi * sizeof(ELEM_TYPE));
  b = (ELEM_TYPE*)malloc(n * ldo * sizeof(ELEM_TYPE));

  for (i = 0; i < n; i++) {
    for (j = 0; j < m; j++) {
      a[j+ldi*i] = (ELEM_TYPE)rand();
      if (0 != (LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE & flags)) {
        b[j+ldo*i] = (ELEM_TYPE)rand();
      }
    }
  }

  /* test dispatch call */
  kernel = libxsmm_dispatch_mcopy(desc);
  if (kernel == 0) {
    printf("JIT error -> exit!!!!\n");
    exit(EXIT_FAILURE);
  }

  /* let's call */
  kernel(a, &ldi, b, &ldo, &a[128]);

  l_start = libxsmm_timer_tick();
  for (i = 0; i < iters; ++i) {
    kernel(a, &ldi, b, &ldo, &a[128]);
  }
  l_end = libxsmm_timer_tick();
  copy_time = libxsmm_timer_duration(l_start, l_end);

  for (i = 0; i < n; ++i) {
    for (j = 0; j < m; ++j) {
      if (0 != (LIBXSMM_MATCOPY_FLAG_ZERO_SOURCE & flags)) {
        if (LIBXSMM_NEQ(b[j+ldo*i], 0)) {
          printf("ERROR!!!\n");
          i = n;
          error = 1;
          break;
        }
      }
      else if (LIBXSMM_NEQ(a[j+ldi*i], b[j+ldo*i])) {
        printf("ERROR!!!\n");
        i = n;
        error = 1;
        break;
      }
    }
  }

  if (error == 0) {
    printf("CORRECT copy!!!!\n");
    printf("Time taken is\t%.5f seconds\n", copy_time);
  }

  return EXIT_SUCCESS;
}
Beispiel #11
0
void Points::GenerateKernel(int L, int point_count, std::string title)
{

    int g=point_count, m=0;

    LEGENDRE P_lm;
    LEGENDRE Y_P;
    LEGENDRE dP_lm;

    std::vector<std::vector<double> > d_kern(g); //kernel for derivative reconstruction
    std::vector<std::vector<double> > f_kern(g); //kernel for function (test) reconstruction
    std::vector<std::vector<double> > WT(g);

    std::complex<double> Y(0,0), Ylm(0,0), dYlm(0,0), Ymp1(0,0), ej(0,0), function(0,0), derivative(0,0);
    std::complex<double> im(0,1);

    double th1=0, ph1=0, sign=0;



    std::cout << title << std::endl;
    std::ofstream kernel(title);
    kernel.precision(15);




    for(int i=0; i<g; i++)
    {
        d_kern[i].resize(g);
        f_kern[i].resize(g);
        WT[i].resize(g);

        for(int j=0; j<g; j++)
        {
            for(double l=0; l<=L; l++)
            {
                for(double m_it=0; m_it<=(2*l); m_it++)
                {

                    m=0;

                    m = l-m_it;

                    //std::cout << "m = " <<  m << ", l = " << l <<  std::endl;

                    ej = m;
                    sign=pow(-1.0,m_it);

                    std::complex<double> exponential_prime(cos( Points::Phi[i]), (-1)*sin(Points::Phi[i]));
                    std::complex<double> exponential(cos(m*Points::Phi[j]), sin(m*Points::Phi[j]));


                    Ylm = P_lm.Yml(m, l, Points::Theta[i], Points::Phi[i]);
                    Y = Y_P.Yml(m, l, Points::Theta[j], Points::Phi[j]);

                    if( Theta[i] != 0 && ((m+1)<=l) )
                    {
                        Ymp1 = m * (1.0/tan(Points::Theta[i])) * dP_lm.Yml(m, l, Points::Theta[i], Points::Phi[i]) + sqrt( (l-m)*(l+m+1) ) * exponential_prime * dP_lm.Yml(m+1, l, Points::Theta[i], Points::Phi[i]);

                    }

                    ///fill arrays with f=Y*Y for the function kernel and derivative kernel

                    f_kern[i][j] += (conj(Y)*Ylm).real();//Y_real*Y_prime_real;
                    d_kern[i][j] += (conj(Y)*Ymp1).real();

                }

            }

            ///absorb weights into kernel

            WT[i][j] = Points::Weight[j]*4.0*PI;
            kernel << d_kern[i][j]*Points::Weight[j]*4.0*PI  << "      " << f_kern[i][j]*Points::Weight[j]*4.0*PI << "      " << WT[i][j] << std::endl;

        }
    }

    kernel.close();

}
void
nest::iaf_cond_exp::update( Time const& origin,
  const long_t from,
  const long_t to )
{

  assert(
    to >= 0 && ( delay ) from < kernel().connection_manager.get_min_delay() );
  assert( from < to );

  for ( long_t lag = from; lag < to; ++lag )
  {

    double t = 0.0;

    // numerical integration with adaptive step size control:
    // ------------------------------------------------------
    // gsl_odeiv_evolve_apply performs only a single numerical
    // integration step, starting from t and bounded by step;
    // the while-loop ensures integration over the whole simulation
    // step (0, step] if more than one integration step is needed due
    // to a small integration step size;
    // note that (t+IntegrationStep > step) leads to integration over
    // (t, step] and afterwards setting t to step, but it does not
    // enforce setting IntegrationStep to step-t; this is of advantage
    // for a consistent and efficient integration across subsequent
    // simulation intervals
    while ( t < B_.step_ )
    {
      const int status = gsl_odeiv_evolve_apply( B_.e_,
        B_.c_,
        B_.s_,
        &B_.sys_,             // system of ODE
        &t,                   // from t
        B_.step_,             // to t <= step
        &B_.IntegrationStep_, // integration step size
        S_.y_ );              // neuronal state

      if ( status != GSL_SUCCESS )
        throw GSLSolverFailure( get_name(), status );
    }

    S_.y_[ State_::G_EXC ] += B_.spike_exc_.get_value( lag );
    S_.y_[ State_::G_INH ] += B_.spike_inh_.get_value( lag );

    // absolute refractory period
    if ( S_.r_ )
    { // neuron is absolute refractory
      --S_.r_;
      S_.y_[ State_::V_M ] = P_.V_reset_;
    }
    else
      // neuron is not absolute refractory
      if ( S_.y_[ State_::V_M ] >= P_.V_th_ )
    {
      S_.r_ = V_.RefractoryCounts_;
      S_.y_[ State_::V_M ] = P_.V_reset_;

      set_spiketime( Time::step( origin.get_steps() + lag + 1 ) );

      SpikeEvent se;
      kernel().event_delivery_manager.send( *this, se, lag );
    }

    // set new input current
    B_.I_stim_ = B_.currents_.get_value( lag );

    // log state data
    B_.logger_.record_data( origin.get_steps() + lag );
  }
}
Beispiel #13
0
ocl::Kernel& ocl::Program::kernel(const std::string &name) const
{
    const utl::Type& t = utl::Type::type<T>();
    return kernel(name, t);
}
void ProcessingThread::run()
{
    while(1)
    {
        // Check if paused
        pauseMutex.lock();
        if (paused)
        {
            pauseMutex.unlock();
            sleep(3);
            continue;
        }
        pauseMutex.unlock();

        /////////////////////////////////
        // Stop thread if stopped=TRUE //
        /////////////////////////////////
        stoppedMutex.lock();
        if (stopped)
        {
            stopped = false;
            stoppedMutex.unlock();
            break;
        }
        stoppedMutex.unlock();
        /////////////////////////////////
        /////////////////////////////////

        inputMutex.lock();
        if (inputMode != INPUT_IMAGE)
        {
            inputMutex.unlock();
            currentFrame = outputBuffer->getFrame();
        }
        else
        {
            inputMutex.unlock();
            if (outputBuffer->getSizeOfImageBuffer() > 0)
            {
                currentFrame = outputBuffer->getFrame();
            }
            msleep(50);
        }
        inputMutex.unlock();

        updM.lock();
        ////////////////////////////////////
        // PERFORM IMAGE PROCESSING BELOW //
        ////////////////////////////////////

        cv::Mat outputIm = currentFrame.clone();

        if (filters.flags[ImageProcessingFlags::ConvertColorspace])
        {
            switch (settings.colorSpace)
            {
            case 0:
            { // Gray
                cv::cvtColor(currentFrame,outputIm, CV_RGB2GRAY);
            } break;
            case 1:
            { // HSV
                cv::cvtColor(currentFrame,outputIm, CV_RGB2HSV);
            } break;
            case 3:
            { // Lba
                cv::cvtColor(currentFrame,outputIm, CV_RGB2Lab);
            } break;
            }
        }

        if (filters.flags[ImageProcessingFlags::SaltPepperNoise])
        {
            for (int i=0; i<settings.saltPepperNoiseDensity; i+=1)
            { // adding noise
                // generate randomly the col and row
                int m = qrand() % outputIm.rows;
                int n = qrand() % outputIm.cols;

                // generate randomly the value {black, white}
                int color_ = ((qrand() % 100) > 50) ? 255 : 0;

                if (outputIm.channels() == 1)
                { // gray-level image
                    outputIm.at<uchar>(m, n)= color_;
                }
                else if (outputIm.channels() == 3)
                { // color image
                    outputIm.at<cv::Vec3b>(m, n)[0]= color_;
                    outputIm.at<cv::Vec3b>(m, n)[1]= color_;
                    outputIm.at<cv::Vec3b>(m, n)[2]= color_;
                }
            }
        }

        if (filters.flags[ImageProcessingFlags::Dilate])
        {
            cv::dilate(outputIm,
                       outputIm,
                       cv::Mat(),
                       cv::Point(-1, -1),
                       settings.dilateIterations);
        }

        if (filters.flags[ImageProcessingFlags::Erode])
        {
            cv::erode(outputIm,
                      outputIm,
                      cv::Mat(),
                      cv::Point(-1, -1),
                      settings.erodeIterations);
        }

        if (filters.flags[ImageProcessingFlags::Open])
        {
            cv::morphologyEx(outputIm,
                             outputIm,
                             cv::MORPH_OPEN,
                             cv::Mat(),
                             cv::Point(-1, -1),
                             settings.openIterations);
        }

        if (filters.flags[ImageProcessingFlags::Close])
        {
            cv::morphologyEx(outputIm,
                             outputIm,
                             cv::MORPH_CLOSE,
                             cv::Mat(),
                             cv::Point(-1, -1),
                             settings.openIterations);
        }

        if (filters.flags[ImageProcessingFlags::Blur])
        {
            cv::GaussianBlur(outputIm,
                             outputIm,
                             cv::Size(settings.blurSize, settings.blurSize),
                             settings.blurSigma);
        }

        if (filters.flags[ImageProcessingFlags::Sobel])
        {
            int scale = 1;
            int delta = 0;
            int ddepth = CV_16S;

            // check the direction
            switch (settings.sobelDirection)
            {
            case 0:
            { // horizontal
                cv::Mat grad_x;
                cv::Sobel( outputIm, grad_x, ddepth, 1, 0, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT );
                cv::convertScaleAbs( grad_x, outputIm );
            } break;
            case 1:
            { // vertical
                cv::Mat grad_y;
                cv::Sobel( outputIm, grad_y, ddepth, 0, 1, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT );
                cv::convertScaleAbs( grad_y, outputIm );
            } break;
            case 2:
            { // both directions
                cv::Mat grad_x;
                cv::Mat grad_y;
                cv::Mat abs_grad_x;
                cv::Mat abs_grad_y;
                cv::Sobel( outputIm, grad_x, ddepth, 1, 0, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT );
                cv::Sobel( outputIm, grad_y, ddepth, 0, 1, settings.sobelKernelSize, scale, delta, BORDER_DEFAULT );
                cv::convertScaleAbs( grad_x, abs_grad_x );
                cv::convertScaleAbs( grad_y, abs_grad_y );

                cv::addWeighted( abs_grad_x, 0.5, abs_grad_y, 0.5, 0, outputIm );
            } break;
            }
        }

        if (filters.flags[ImageProcessingFlags::Laplacian])
        {
            int scale = 1;
            int delta = 0;
            int ddepth = CV_16S;

            cv::Laplacian( outputIm, outputIm, ddepth, settings.laplacianKernelSize, scale, delta, BORDER_DEFAULT );
            cv::convertScaleAbs( outputIm, outputIm );
        }

        if (filters.flags[ImageProcessingFlags::SharpByKernel])
        {
            cv::Mat kernel(3,3,CV_32F,cv::Scalar(0));// init the kernel with zeros
            // assigns kernel values
            kernel.at<float>(1,1)= settings.sharpKernelCenter;
            kernel.at<float>(0,1)= -1.0;
            kernel.at<float>(2,1)= -1.0;
            kernel.at<float>(1,0)= -1.0;
            kernel.at<float>(1,2)= -1.0;
            //filter the image
            cv::filter2D(outputIm,outputIm,outputIm.depth(),kernel);
        }

        if (filters.flags[ImageProcessingFlags::EdgeDetection])
        { // with canny
            cv::Canny(outputIm, outputIm, settings.cannyLowThres, settings.cannyHighThres);
        }

        if (filters.flags[ImageProcessingFlags::LinesHough])
        {
            // Apply Canny algorithm
            cv::Mat contours;
            cv::Canny(outputIm,contours,125,350);

            // Hough tranform for line detection
            vector<cv::Vec2f> lines;
            cv::HoughLines(contours,lines, 1, PI/180, settings.linesHoughVotes);

            vector<cv::Vec2f>::const_iterator it= lines.begin();

            while (it!=lines.end())
            {
                float rho = (*it)[0]; // first element is distance rho
                float theta = (*it)[1]; // second element is angle theta
                if (theta < PI/4. || theta > 3.*PI/4.)
                {// ~vertical line
                    // point of intersection of the line with first row
                    cv::Point pt1(rho/cos(theta),0);
                    // point of intersection of the line with last row
                    cv::Point pt2((rho-contours.rows*sin(theta))/cos(theta),contours.rows);
                    // draw a white line
                    cv::line( outputIm, pt1, pt2, cv::Scalar(255), 1);
                }
                else
                { // ~horizontal line
                    // point of intersection of the line with first column
                    cv::Point pt1(0,rho/sin(theta));
                    // point of intersection of the line with last column
                    cv::Point pt2(contours.cols, (rho-contours.cols*cos(theta))/sin(theta));
                    // draw a white line
                    cv::line(outputIm, pt1, pt2, cv::Scalar(255), 1);
                }
                ++it;
            }
        }

        if (filters.flags[ImageProcessingFlags::CirclesHough])
        {
            cv::Mat temp;
            if (outputIm.channels() > 1)
            {
                cv::cvtColor(outputIm, temp, CV_RGB2GRAY);
            }
            else
            {
                temp = outputIm;
            }

            cv::GaussianBlur(temp, temp, cv::Size(5,5), 1.5);
            vector<cv::Vec3f> circles;

            cv::HoughCircles(temp, circles, CV_HOUGH_GRADIENT,
                            2,    // accumulator resolution (size of the image / 2)
                            50,   // minimum distance between two circles
                            200,  // Canny high threshold
                            60,   // minimum number of votes
                            settings.circlesHoughMin,
                            settings.circlesHoughMax);

            std::vector<cv::Vec3f>::const_iterator itc= circles.begin();
            while (itc!=circles.end())
            {
                cv::circle(outputIm,
                        cv::Point((*itc)[0], (*itc)[1]), // circle centre
                                (*itc)[2],               // circle radius
                                cv::Scalar(255),         // color
                                2);                      // thickness
                ++itc;
            }
        }

        if (filters.flags[ImageProcessingFlags::Countours])
        {
            cv::Mat temp;
            if (outputIm.channels() > 1)
            {
                cv::cvtColor(outputIm, temp, CV_RGB2GRAY);
            }
            else
            {
                temp = outputIm;
            }

            cv::blur(temp, temp, Size(3,3));
            cv::Canny(temp, temp, settings.contoursThres, settings.contoursThres+30);

            vector< vector<cv::Point> > contours;
            cv::findContours(temp,
                            contours,              // a vector of contours
                            CV_RETR_TREE,          // retrieve all contours, reconstructs a full hierarchy
                            CV_CHAIN_APPROX_NONE); // all pixels of each contours

            cv::drawContours(outputIm,contours,
                            -1,                        // draw all contours
                            cv::Scalar(255, 255, 255), // in white
                            1);                        // with a thickness of 1
        }

        if (filters.flags[ImageProcessingFlags::BoundingBox])
        {
            cv::Mat temp;
            if (outputIm.channels() > 1)
            {
                cv::cvtColor(outputIm, temp, CV_RGB2GRAY);
            }
            else
            {
                temp = outputIm;
            }

            cv::blur(temp, temp, Size(3,3));
            cv::Canny(temp, temp, settings.boundingBoxThres, settings.boundingBoxThres*2);

            vector< vector<cv::Point> > contours;
            cv::findContours(temp,
                            contours,              // a vector of contours
                            CV_RETR_TREE,          // retrieve all contours, reconstructs a full hierarchy
                            CV_CHAIN_APPROX_NONE); // all pixels of each contours

            vector< vector<cv::Point> >::iterator itc = contours.begin();
            while (itc != contours.end())
            {
                cv::Rect r0 = cv::boundingRect(cv::Mat(*itc));
                cv::rectangle(outputIm,r0,cv::Scalar(255, 0, 0), 2);
                ++itc;
            }
        }

        if (filters.flags[ImageProcessingFlags::enclosingCircle])
        {
            cv::Mat temp;
            if (outputIm.channels() > 1)
            {
                cv::cvtColor(outputIm, temp, CV_RGB2GRAY);
            }
            else
            {
                temp = outputIm;
            }

            cv::blur(temp, temp, Size(3,3));
            cv::Canny(temp, temp, settings.enclosingCircleThres, settings.enclosingCircleThres*2);

            vector< vector<cv::Point> > contours;
            cv::findContours(temp,
                            contours,              // a vector of contours
                            CV_RETR_TREE,          // retrieve all contours, reconstructs a full hierarchy
                            CV_CHAIN_APPROX_NONE); // all pixels of each contours

            vector< vector<cv::Point> >::iterator itc = contours.begin();
            while (itc != contours.end())
            {
                float radius;
                cv::Point2f center;
                cv::minEnclosingCircle(cv::Mat(*itc),center,radius);
                cv::circle(outputIm, center,
                        static_cast<int>(radius),
                        cv::Scalar(0, 255, 0),
                        2);
                ++itc;
            }
        }

        if (filters.flags[ImageProcessingFlags::harris])
        {
            cv::Mat temp;
            if (outputIm.channels() > 1)
            {
                cv::cvtColor(outputIm, temp, CV_RGB2GRAY);
            }
            else
            {
                temp = outputIm;
            }

            // Detector parameters
            int blockSize = 2;
            int apertureSize = 3;
            double k = 0.04;

            // Detecting corners
            cv::cornerHarris(temp, temp, blockSize, apertureSize, k, BORDER_DEFAULT);

            // Normalizing
            normalize(temp,temp, 0, 255, NORM_MINMAX, CV_32FC1, Mat());

            // Drawing a circle around corners
            for( int j = 0; j < temp.rows ; j++ )
            {
                for( int i = 0; i < temp.cols; i++ )
                {
                    if( (int) temp.at<float>(j,i) > settings.harrisCornerThres)
                    {
                        circle(outputIm, Point( i, j ), 5,  Scalar(0, 0 , 255), 2, 8, 0);
                    }
                }
            }
        }

        if (filters.flags[ImageProcessingFlags::FAST])
        {
            // vector of keypoints
            vector<cv::KeyPoint> keypoints;
            // Construction of the Fast feature detector object
            cv::FastFeatureDetector fast(settings.fastThreshold); // threshold for detection
            // feature point detection
            fast.detect(outputIm,keypoints);

            cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255), cv::DrawMatchesFlags::DRAW_OVER_OUTIMG);
        }

        if (filters.flags[ImageProcessingFlags::SURF])
        {
            // vector of keypoints
            vector<cv::KeyPoint> keypoints;
            // Construct the SURF feature detector object
            cv::SurfFeatureDetector surf((double) settings.surfThreshold); // threshold
            // Detect the SURF features
            surf.detect(outputIm,keypoints);

            // Draw the keypoints with scale and orientation information
            cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255),cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
        }

        if (filters.flags[ImageProcessingFlags::SIFT])
        {

            vector<cv::KeyPoint> keypoints;
            // Construct the SURF feature detector object
            cv::SiftFeatureDetector sift( settings.siftContrastThres,        // feature threshold
                                          (double) settings.siftEdgeThres); // threshold to reduce sens. to lines

            sift.detect(outputIm,keypoints);
            // Draw the keypoints with scale and orientation information
            cv::drawKeypoints(outputIm, keypoints, outputIm, cv::Scalar(255,255,255),cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
        }

        if (filters.flags[ImageProcessingFlags::EqualizeHistogram])
        {
            // converting the image to gray
            if (outputIm.channels() == 3)
            {
                vector<Mat> bgr_planes;
                split( outputIm, bgr_planes );
                equalizeHist( bgr_planes[0], bgr_planes[0] );
                equalizeHist( bgr_planes[1], bgr_planes[1] );
                equalizeHist( bgr_planes[2], bgr_planes[2] );
                merge( bgr_planes, outputIm );
            }
            else
            {
                equalizeHist( outputIm, outputIm );
            }
        }

        // Computing histogram
        if (filters.flags[ImageProcessingFlags::ComputeHistogram])
        {
            cv::Mat grayIm;
            cv::Mat hist;
            // converting the image to gray
            if (outputIm.channels() == 3)
            {
                 cv::cvtColor(outputIm,grayIm, CV_RGB2GRAY);
            }
            else
            {
                grayIm = outputIm;
            }

            int histSize = 256;           // number of bins
            float range [] = {0, 256};    // ranges
            const float* histRange = { range };
            bool uniform = true, accumulate = false;

            // compute histogram
            cv::calcHist(&grayIm,
                         1,  // using just one image
                         0,  // using just one layer
                         cv::Mat(),
                         hist,
                         1,
                         &histSize,
                         &histRange,
                         uniform,
                         accumulate);


            int hist_w = 691; int hist_h =161;
            Mat result( hist_h, hist_w, CV_8UC3, Scalar( 255,255,255) );
            int bin_w = cvRound( (double) hist_w/histSize );
            normalize(hist, hist, 0, result.rows, NORM_MINMAX, -1, Mat());

            /// Draw for each channel
            for( int i = 1; i < histSize; i++ )
            {
                line(result,
                     Point( bin_w*(i-1), hist_h - cvRound(hist.at<float>(i-1)) ),
                     Point( bin_w*(i), hist_h - cvRound(hist.at<float>(i)) ),
                     Scalar( 0, 0, 0), 2, 8, 0  );
            }
            // emit signal
            emit newProcessedHistogram(MatToQImage(result));
        }

        updM.unlock();

        processedFrame =  outputIm;
        // Inform GUI thread of new frame (QImage)
        emit newProcessedFrame(MatToQImage(outputIm));
    }
}
Beispiel #15
0
inline kernel_call transpose_kernel(
        const backend::command_queue &queue, size_t width, size_t height,
        const backend::device_vector<T2> &in,
        const backend::device_vector<T2> &out
        )
{
    backend::source_generator o;
    kernel_common<T>(o, queue);

    // determine max block size to fit into local memory/workgroup
    size_t block_size = 128;
    {
#ifndef VEXCL_BACKEND_CUDA
        cl_device_id dev = backend::get_device_id(queue);
        cl_ulong local_size;
        size_t workgroup;
        clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &local_size, NULL);
        clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &workgroup, NULL);
#else
        const auto local_size = queue.device().max_shared_memory_per_block();
        const auto workgroup = queue.device().max_threads_per_block();
#endif
        while(block_size * block_size * sizeof(T) * 2 > local_size) block_size /= 2;
        while(block_size * block_size > workgroup) block_size /= 2;
    }

    // from NVIDIA SDK.
    o.kernel("transpose").open("(")
        .template parameter< global_ptr<const T2> >("input")
        .template parameter< global_ptr<      T2> >("output")
        .template parameter< cl_uint              >("width")
        .template parameter< cl_uint              >("height")
    .close(")").open("{");

    o.new_line() << "const size_t global_x = " << o.global_id(0) << ";";
    o.new_line() << "const size_t global_y = " << o.global_id(1) << ";";
    o.new_line() << "const size_t local_x  = " << o.local_id(0)  << ";";
    o.new_line() << "const size_t local_y  = " << o.local_id(1)  << ";";
    o.new_line() << "const size_t group_x  = " << o.group_id(0)  << ";";
    o.new_line() << "const size_t group_y  = " << o.group_id(1)  << ";";
    o.new_line() << "const size_t target_x = local_y + group_y * " << block_size << ";";
    o.new_line() << "const size_t target_y = local_x + group_x * " << block_size << ";";
    o.new_line() << "const bool range = global_x < width && global_y < height;";

    // local memory
    {
        std::ostringstream s;
        s << "block[" << block_size * block_size << "]";
        o.smem_static_var(type_name<T2>(), s.str());
    }

    // copy from input to local memory
    o.new_line() << "if(range) "
        << "block[local_x + local_y * " << block_size << "] = input[global_x + global_y * width];";

    // wait until the whole block is filled
    o.new_line().barrier();

    // transpose local block to target
    o.new_line() << "if(range) "
      << "output[target_x + target_y * height] = block[local_x + local_y * " << block_size << "];";

    o.close("}");

    backend::kernel kernel(queue, o.str(), "transpose");

    kernel.push_arg(in);
    kernel.push_arg(out);
    kernel.push_arg(static_cast<cl_uint>(width));
    kernel.push_arg(static_cast<cl_uint>(height));

    // range multiple of wg size, last block maybe not completely filled.
    size_t r_w = (width  + block_size - 1) / block_size;
    size_t r_h = (height + block_size - 1) / block_size;

    kernel.config(backend::ndrange(r_w, r_h), backend::ndrange(block_size, block_size));

    std::ostringstream desc;
    desc << "transpose{"
         << "w=" << width << "(" << r_w << "), "
         << "h=" << height << "(" << r_h << "), "
         << "bs=" << block_size << "}";

    return kernel_call(false, desc.str(), kernel);
}
index NodeManager::add_node( index mod, long n ) // no_p
{
  assert( current_ != 0 );
  assert( root_ != 0 );

  if ( mod >= kernel().model_manager.get_num_node_models() )
  {
    throw UnknownModelID( mod );
  }

  if ( n < 1 )
  {
    throw BadProperty();
  }

  const thread n_threads = kernel().vp_manager.get_num_threads();
  assert( n_threads > 0 );

  const index min_gid = local_nodes_.get_max_gid() + 1;
  const index max_gid = min_gid + n;

  Model* model = kernel().model_manager.get_model( mod );
  assert( model != 0 );

  model->deprecation_warning( "Create" );

  /* current_ points to the instance of the current subnet on thread 0.
     The following code makes subnet a pointer to the wrapper container
     containing the instances of the current subnet on all threads.
   */
  const index subnet_gid = current_->get_gid();
  Node* subnet_node = local_nodes_.get_node_by_gid( subnet_gid );
  assert( subnet_node != 0 );

  SiblingContainer* subnet_container =
    dynamic_cast< SiblingContainer* >( subnet_node );
  assert( subnet_container != 0 );
  assert( subnet_container->num_thread_siblings()
    == static_cast< size_t >( n_threads ) );
  assert( subnet_container->get_thread_sibling( 0 ) == current_ );

  if ( max_gid > local_nodes_.max_size() || max_gid < min_gid )
  {
    LOG( M_ERROR,
      "NodeManager::add:node",
      "Requested number of nodes will overflow the memory." );
    LOG( M_ERROR, "NodeManager::add:node", "No nodes were created." );
    throw KernelException( "OutOfMemory" );
  }
  kernel().modelrange_manager.add_range( mod, min_gid, max_gid - 1 );

  if ( model->potential_global_receiver()
    and kernel().mpi_manager.get_num_rec_processes() > 0 )
  {
    // In this branch we create nodes for global receivers
    const int n_per_process = n / kernel().mpi_manager.get_num_rec_processes();
    const int n_per_thread = n_per_process / n_threads + 1;

    // We only need to reserve memory on the ranks on which we
    // actually create nodes. In this if-branch ---> Only on recording
    // processes
    if ( kernel().mpi_manager.get_rank()
      >= kernel().mpi_manager.get_num_sim_processes() )
    {
      local_nodes_.reserve( std::ceil( static_cast< double >( max_gid )
        / kernel().mpi_manager.get_num_sim_processes() ) );
      for ( thread t = 0; t < n_threads; ++t )
      {
        // Model::reserve() reserves memory for n ADDITIONAL nodes on thread t
        model->reserve_additional( t, n_per_thread );
      }
    }

    for ( size_t gid = min_gid; gid < max_gid; ++gid )
    {
      const thread vp = kernel().vp_manager.suggest_rec_vp( get_n_gsd() );
      const thread t = kernel().vp_manager.vp_to_thread( vp );

      if ( kernel().vp_manager.is_local_vp( vp ) )
      {
        Node* newnode = model->allocate( t );
        newnode->set_gid_( gid );
        newnode->set_model_id( mod );
        newnode->set_thread( t );
        newnode->set_vp( vp );
        newnode->set_has_proxies( true );
        newnode->set_local_receiver( false );

        local_nodes_.add_local_node( *newnode ); // put into local nodes list

        current_->add_node( newnode ); // and into current subnet, thread 0.
      }
      else
      {
        local_nodes_.add_remote_node( gid ); // ensures max_gid is correct
        current_->add_remote_node( gid, mod );
      }
      increment_n_gsd();
    }
  }

  else if ( model->has_proxies() )
  {
    // In this branch we create nodes for all GIDs which are on a local thread
    const int n_per_process = n / kernel().mpi_manager.get_num_sim_processes();
    const int n_per_thread = n_per_process / n_threads + 1;

    // We only need to reserve memory on the ranks on which we
    // actually create nodes. In this if-branch ---> Only on
    // simulation processes
    if ( kernel().mpi_manager.get_rank()
      < kernel().mpi_manager.get_num_sim_processes() )
    {
      // TODO: This will work reasonably for round-robin. The extra 50 entries
      //       are for subnets and devices.
      local_nodes_.reserve(
        std::ceil( static_cast< double >( max_gid )
          / kernel().mpi_manager.get_num_sim_processes() ) + 50 );
      for ( thread t = 0; t < n_threads; ++t )
      {
        // Model::reserve() reserves memory for n ADDITIONAL nodes on thread t
        // reserves at least one entry on each thread, nobody knows why
        model->reserve_additional( t, n_per_thread );
      }
    }

    size_t gid;
    if ( kernel().vp_manager.is_local_vp(
           kernel().vp_manager.suggest_vp( min_gid ) ) )
    {
      gid = min_gid;
    }
    else
    {
      gid = next_local_gid_( min_gid );
    }
    size_t next_lid = current_->global_size() + gid - min_gid;
    // The next loop will not visit every node, if more than one rank is
    // present.
    // Since we already know what range of gids will be created, we can tell the
    // current subnet the range and subsequent calls to
    // `current_->add_remote_node()`
    // become irrelevant.
    current_->add_gid_range( min_gid, max_gid - 1 );

    // min_gid is first valid gid i should create, hence ask for the first local
    // gid after min_gid-1
    while ( gid < max_gid )
    {
      const thread vp = kernel().vp_manager.suggest_vp( gid );
      const thread t = kernel().vp_manager.vp_to_thread( vp );

      if ( kernel().vp_manager.is_local_vp( vp ) )
      {
        Node* newnode = model->allocate( t );
        newnode->set_gid_( gid );
        newnode->set_model_id( mod );
        newnode->set_thread( t );
        newnode->set_vp( vp );

        local_nodes_.add_local_node( *newnode ); // put into local nodes list
        current_->add_node( newnode ); // and into current subnet, thread 0.

        // lid setting is wrong, if a range is set, as the subnet already
        // assumes,
        // the nodes are available.
        newnode->set_lid_( next_lid );
        const size_t next_gid = next_local_gid_( gid );
        next_lid += next_gid - gid;
        gid = next_gid;
      }
      else
      {
        ++gid; // brutal fix, next_lid has been set in if-branch
      }
    }
    // if last gid is not on this process, we need to add it as a remote node
    if ( not kernel().vp_manager.is_local_vp(
           kernel().vp_manager.suggest_vp( max_gid - 1 ) ) )
    {
      local_nodes_.add_remote_node( max_gid - 1 ); // ensures max_gid is correct
      current_->add_remote_node( max_gid - 1, mod );
    }
  }
  else if ( not model->one_node_per_process() )
  {
    // We allocate space for n containers which will hold the threads
    // sorted. We use SiblingContainers to store the instances for
    // each thread to exploit the very efficient memory allocation for
    // nodes.
    //
    // These containers are registered in the global nodes_ array to
    // provide access to the instances both for manipulation by SLI
    // functions and so that NodeManager::calibrate() can discover the
    // instances and register them for updating.
    //
    // The instances are also registered with the instance of the
    // current subnet for the thread to which the created instance
    // belongs. This is mainly important so that the subnet structure
    // is preserved on all VPs.  Node enumeration is done on by the
    // registration with the per-thread instances.
    //
    // The wrapper container can be addressed under the GID assigned
    // to no-proxy node created. If this no-proxy node is NOT a
    // container (e.g. a device), then each instance can be retrieved
    // by giving the respective thread-id to get_node(). Instances of
    // SiblingContainers cannot be addressed individually.
    //
    // The allocation of the wrapper containers is spread over threads
    // to balance memory load.
    size_t container_per_thread = n / n_threads + 1;

    // since we create the n nodes on each thread, we reserve the full load.
    for ( thread t = 0; t < n_threads; ++t )
    {
      model->reserve_additional( t, n );
      siblingcontainer_model_->reserve_additional( t, container_per_thread );
      static_cast< Subnet* >( subnet_container->get_thread_sibling( t ) )
        ->reserve( n );
    }

    // The following loop creates n nodes. For each node, a wrapper is created
    // and filled with one instance per thread, in total n * n_thread nodes in
    // n wrappers.
    local_nodes_.reserve(
      std::ceil( static_cast< double >( max_gid )
        / kernel().mpi_manager.get_num_sim_processes() ) + 50 );
    for ( index gid = min_gid; gid < max_gid; ++gid )
    {
      thread thread_id = kernel().vp_manager.vp_to_thread(
        kernel().vp_manager.suggest_vp( gid ) );

      // Create wrapper and register with nodes_ array.
      SiblingContainer* container = static_cast< SiblingContainer* >(
        siblingcontainer_model_->allocate( thread_id ) );
      container->set_model_id(
        -1 ); // mark as pseudo-container wrapping replicas, see reset_network()
      container->reserve( n_threads ); // space for one instance per thread
      container->set_gid_( gid );
      local_nodes_.add_local_node( *container );

      // Generate one instance of desired model per thread
      for ( thread t = 0; t < n_threads; ++t )
      {
        Node* newnode = model->allocate( t );
        newnode->set_gid_( gid ); // all instances get the same global id.
        newnode->set_model_id( mod );
        newnode->set_thread( t );
        newnode->set_vp( kernel().vp_manager.thread_to_vp( t ) );

        // Register instance with wrapper
        // container has one entry for each thread
        container->push_back( newnode );

        // Register instance with per-thread instance of enclosing subnet.
        static_cast< Subnet* >( subnet_container->get_thread_sibling( t ) )
          ->add_node( newnode );
      }
    }
  }
  else
  {
    // no proxies and one node per process
    // this is used by MUSIC proxies
    // Per r9700, this case is only relevant for music_*_proxy models,
    // which have a single instance per MPI process.
    for ( index gid = min_gid; gid < max_gid; ++gid )
    {
      Node* newnode = model->allocate( 0 );
      newnode->set_gid_( gid );
      newnode->set_model_id( mod );
      newnode->set_thread( 0 );
      newnode->set_vp( kernel().vp_manager.thread_to_vp( 0 ) );

      // Register instance
      local_nodes_.add_local_node( *newnode );

      // and into current subnet, thread 0.
      current_->add_node( newnode );
    }
  }

  // set off-grid spike communication if necessary
  if ( model->is_off_grid() )
  {
    kernel().event_delivery_manager.set_off_grid_communication( true );
    LOG( M_INFO,
      "NodeManager::add_node",
      "Neuron models emitting precisely timed spikes exist: "
      "the kernel property off_grid_spiking has been set to true.\n\n"
      "NOTE: Mixing precise-spiking and normal neuron models may "
      "lead to inconsistent results." );
  }

  return max_gid - 1;
}
Beispiel #17
0
inline kernel_call bluestein_mul_in(
        const backend::command_queue &queue, bool inverse, size_t batch,
        size_t radix, size_t p, size_t threads, size_t stride,
        const backend::device_vector<T2> &data,
        const backend::device_vector<T2> &exp,
        const backend::device_vector<T2> &out
        )
{
    backend::source_generator o;
    kernel_common<T>(o, queue);
    mul_code<T2>(o, false);
    twiddle_code<T, T2>(o);

    o.kernel("bluestein_mul_in").open("(")
        .template parameter< global_ptr<const T2> >("data")
        .template parameter< global_ptr<const T2> >("exp")
        .template parameter< global_ptr<      T2> >("output")
        .template parameter< cl_uint              >("radix")
        .template parameter< cl_uint              >("p")
        .template parameter< cl_uint              >("out_stride")
    .close(")").open("{");

    o.new_line() << "const size_t thread  = " << o.global_id(0)   << ";";
    o.new_line() << "const size_t threads = " << o.global_size(0) << ";";
    o.new_line() << "const size_t batch   = " << o.global_id(1)   << ";";
    o.new_line() << "const size_t element = " << o.global_id(2)   << ";";

    o.new_line() << "if(element < out_stride)";
    o.open("{");

    o.new_line() << "const size_t in_off  = thread + batch * radix * threads + element * threads;";
    o.new_line() << "const size_t out_off = thread * out_stride + batch * out_stride * threads + element;";

    o.new_line() << "if(element < radix)";
    o.open("{");

    o.new_line() << type_name<T2>() << " w = exp[element];";

    o.new_line() << "if(p != 1)";
    o.open("{");

    o.new_line() << "ulong a = (ulong)element * (thread % p);";
    o.new_line() << "ulong b = (ulong)radix * p;";
    o.new_line() << type_name<T2>() << " t = twiddle(" << std::setprecision(16)
        << (inverse ? 1 : -1) * boost::math::constants::two_pi<T>()
        << " * (a % (2 * b)) / b);";
    o.new_line() << "w = mul(w, t);";
    o.close("}");

    o.new_line() << "output[out_off] = mul(data[in_off], w);";

    o.close("}");
    o.new_line() << "else";
    o.open("{");

    o.new_line() << type_name<T2>() << " r = {0,0};";
    o.new_line() << "output[out_off] = r;";

    o.close("}");
    o.close("}");
    o.close("}");

    backend::kernel kernel(queue, o.str(), "bluestein_mul_in");
    kernel.push_arg(data);
    kernel.push_arg(exp);
    kernel.push_arg(out);
    kernel.push_arg(static_cast<cl_uint>(radix));
    kernel.push_arg(static_cast<cl_uint>(p));
    kernel.push_arg(static_cast<cl_uint>(stride));

    const size_t wg = kernel.preferred_work_group_size_multiple(queue);
    const size_t stride_pad = (stride + wg - 1) / wg;

    kernel.config(
            backend::ndrange(threads, batch, stride_pad),
            backend::ndrange(      1,     1,         wg)
            );

    std::ostringstream desc;
    desc << "bluestein_mul_in{batch=" << batch << ", radix=" << radix << ", p=" << p << ", threads=" << threads << ", stride=" << stride << "(" << stride_pad << "), wg=" << wg << "}";
    return kernel_call(false, desc.str(), kernel);
}
bool
NodeManager::is_local_node( Node* n ) const
{
  return kernel().vp_manager.is_local_vp( n->get_vp() );
}
void
TopologyModule::init( SLIInterpreter* i )
{
  // Register the topology functions as SLI commands.

  i->createcommand( "CreateLayer_D", &createlayer_Dfunction );

  i->createcommand( "GetPosition_i", &getposition_ifunction );

  i->createcommand( "Displacement_a_i", &displacement_a_ifunction );

  i->createcommand( "Distance_a_i", &distance_a_ifunction );

  i->createcommand( "CreateMask_D", &createmask_Dfunction );

  i->createcommand( "Inside_a_M", &inside_a_Mfunction );

  i->createcommand( "and_M_M", &and_M_Mfunction );

  i->createcommand( "or_M_M", &or_M_Mfunction );

  i->createcommand( "sub_M_M", &sub_M_Mfunction );

  i->createcommand( "mul_P_P", &mul_P_Pfunction );

  i->createcommand( "div_P_P", &div_P_Pfunction );

  i->createcommand( "add_P_P", &add_P_Pfunction );

  i->createcommand( "sub_P_P", &sub_P_Pfunction );

  i->createcommand(
    "GetGlobalChildren_i_M_a", &getglobalchildren_i_M_afunction );

  i->createcommand( "ConnectLayers_i_i_D", &connectlayers_i_i_Dfunction );

  i->createcommand( "CreateParameter_D", &createparameter_Dfunction );

  i->createcommand( "GetValue_a_P", &getvalue_a_Pfunction );

  i->createcommand( "DumpLayerNodes_os_i", &dumplayernodes_os_ifunction );

  i->createcommand(
    "DumpLayerConnections_os_i_l", &dumplayerconnections_os_i_lfunction );

  i->createcommand( "GetElement_i_ia", &getelement_i_iafunction );

  i->createcommand( "cvdict_M", &cvdict_Mfunction );

  i->createcommand(
    "SelectNodesByMask_L_a_M", &selectnodesbymask_L_a_Mfunction );

  kernel().model_manager.register_node_model< FreeLayer< 2 > >(
    "topology_layer_free" );
  kernel().model_manager.register_node_model< FreeLayer< 3 > >(
    "topology_layer_free_3d" );
  kernel().model_manager.register_node_model< GridLayer< 2 > >(
    "topology_layer_grid" );
  kernel().model_manager.register_node_model< GridLayer< 3 > >(
    "topology_layer_grid_3d" );

  // Register mask types
  register_mask< BallMask< 2 > >();
  register_mask< BallMask< 3 > >();
  register_mask< EllipseMask< 2 > >();
  register_mask< EllipseMask< 3 > >();
  register_mask< BoxMask< 2 > >();
  register_mask< BoxMask< 3 > >();
  register_mask< BoxMask< 3 > >( "volume" ); // For compatibility with topo 2.0
  register_mask( "doughnut", create_doughnut );
  register_mask< GridMask< 2 > >();

  // Register parameter types
  register_parameter< ConstantParameter >( "constant" );
  register_parameter< LinearParameter >( "linear" );
  register_parameter< ExponentialParameter >( "exponential" );
  register_parameter< GaussianParameter >( "gaussian" );
  register_parameter< Gaussian2DParameter >( "gaussian2D" );
  register_parameter< GammaParameter >( "gamma" );
  register_parameter< UniformParameter >( "uniform" );
  register_parameter< NormalParameter >( "normal" );
  register_parameter< LognormalParameter >( "lognormal" );
}
void
NodeManager::ensure_valid_thread_local_ids()
{
  // Check if the network size changed, in order to not enter
  // the critical region if it is not necessary. Note that this
  // test also covers that case that nodes have been deleted
  // by reset.
  if ( size() == nodes_vec_network_size_ )
  {
    return;
  }

#ifdef _OPENMP
#pragma omp critical( update_nodes_vec )
  {
// This code may be called from a thread-parallel context, when it is
// invoked by TargetIdentifierIndex::set_target() during parallel
// wiring. Nested OpenMP parallelism is problematic, therefore, we
// enforce single threading here. This should be unproblematic wrt
// performance, because the nodes_vec_ is rebuilt only once after
// changes in network size.
#endif

    // Check again, if the network size changed, since a previous thread
    // can have updated nodes_vec_ before.
    if ( size() != nodes_vec_network_size_ )
    {

      /* We clear the existing nodes_vec_ and then rebuild it. */
      nodes_vec_.clear();
      nodes_vec_.resize( kernel().vp_manager.get_num_threads() );
      wfr_nodes_vec_.clear();
      wfr_nodes_vec_.resize( kernel().vp_manager.get_num_threads() );

      for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t )
      {
        nodes_vec_[ t ].clear();
        wfr_nodes_vec_[ t ].clear();

        // Loops below run from index 1, because index 0 is always the root
        // network, which is never updated.
        size_t num_thread_local_nodes = 0;
        size_t num_thread_local_wfr_nodes = 0;
        for ( size_t idx = 1; idx < local_nodes_.size(); ++idx )
        {
          Node* node = local_nodes_.get_node_by_index( idx );
          if ( not node->is_subnet()
            && ( static_cast< index >( node->get_thread() ) == t
                 || node->num_thread_siblings() > 0 ) )
          {
            num_thread_local_nodes++;
            if ( node->node_uses_wfr() )
            {
              num_thread_local_wfr_nodes++;
            }
          }
        }
        nodes_vec_[ t ].reserve( num_thread_local_nodes );
        wfr_nodes_vec_[ t ].reserve( num_thread_local_wfr_nodes );

        for ( size_t idx = 1; idx < local_nodes_.size(); ++idx )
        {
          Node* node = local_nodes_.get_node_by_index( idx );

          // Subnets are never updated and therefore not included.
          if ( node->is_subnet() )
          {
            continue;
          }

          // If a node has thread siblings, it is a sibling container, and we
          // need to add the replica for the current thread. Otherwise, we have
          // a normal node, which is added only on the thread it belongs to.
          if ( node->num_thread_siblings() > 0 )
          {
            node->get_thread_sibling( t )->set_thread_lid(
              nodes_vec_[ t ].size() );
            nodes_vec_[ t ].push_back( node->get_thread_sibling( t ) );
          }
          else if ( static_cast< index >( node->get_thread() ) == t )
          {
            // these nodes cannot be subnets
            node->set_thread_lid( nodes_vec_[ t ].size() );
            nodes_vec_[ t ].push_back( node );

            if ( node->node_uses_wfr() )
            {
              wfr_nodes_vec_[ t ].push_back( node );
            }
          }
        }
      } // end of for threads

      nodes_vec_network_size_ = size();

      wfr_is_used_ = false;
      // wfr_is_used_ indicates, whether at least one
      // of the threads has a neuron that uses waveform relaxtion
      // all threads then need to perform a wfr_update
      // step, because gather_events() has to be done in a
      // openmp single section
      for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t )
      {
        if ( wfr_nodes_vec_[ t ].size() > 0 )
        {
          wfr_is_used_ = true;
        }
      }
    }
#ifdef _OPENMP
  } // end of omp critical region
#endif
}
void
EventDeliveryManager::collocate_buffers_( bool done )
{
  // count number of spikes in registers
  int num_spikes = 0;
  int num_grid_spikes = 0;
  int num_offgrid_spikes = 0;
  int uintsize_secondary_events = 0;

  std::vector< std::vector< std::vector< unsigned int > > >::iterator i;
  std::vector< std::vector< unsigned int > >::iterator j;
  for ( i = spike_register_.begin(); i != spike_register_.end(); ++i )
    for ( j = i->begin(); j != i->end(); ++j )
      num_grid_spikes += j->size();

  std::vector< std::vector< std::vector< OffGridSpike > > >::iterator it;
  std::vector< std::vector< OffGridSpike > >::iterator jt;
  for ( it = offgrid_spike_register_.begin();
        it != offgrid_spike_register_.end();
        ++it )
    for ( jt = it->begin(); jt != it->end(); ++jt )
      num_offgrid_spikes += jt->size();

  // accumulate number of generated spikes in the local spike counter
  local_spike_counter_ += num_grid_spikes + num_offgrid_spikes;

  // here we need to count the secondary events and take them
  // into account in the size of the buffers
  // assume that we already serialized all secondary
  // events into the secondary_events_buffer_
  // and that secondary_events_buffer_.size() contains the correct size
  // of this buffer in units of unsigned int

  for ( j = secondary_events_buffer_.begin();
        j != secondary_events_buffer_.end();
        ++j )
    uintsize_secondary_events += j->size();

  // +1 because we need one end marker invalid_synindex
  // +1 for bool-value done
  num_spikes =
    num_grid_spikes + num_offgrid_spikes + uintsize_secondary_events + 2;

  if ( !off_grid_spiking_ ) // on grid spiking
  {
    // make sure buffers are correctly sized
    if ( global_grid_spikes_.size()
      != static_cast< unsigned int >(
           kernel().mpi_manager.get_recv_buffer_size() ) )
      global_grid_spikes_.resize(
        kernel().mpi_manager.get_recv_buffer_size(), 0 );

    if ( num_spikes + ( kernel().vp_manager.get_num_threads()
                        * kernel().connection_manager.get_min_delay() )
      > static_cast< unsigned int >(
           kernel().mpi_manager.get_send_buffer_size() ) )
      local_grid_spikes_.resize(
        ( num_spikes + ( kernel().connection_manager.get_min_delay()
                         * kernel().vp_manager.get_num_threads() ) ),
        0 );
    else if ( local_grid_spikes_.size()
      < static_cast< unsigned int >(
                kernel().mpi_manager.get_send_buffer_size() ) )
      local_grid_spikes_.resize(
        kernel().mpi_manager.get_send_buffer_size(), 0 );

    // collocate the entries of spike_registers into local_grid_spikes__
    std::vector< unsigned int >::iterator pos = local_grid_spikes_.begin();
    if ( num_offgrid_spikes == 0 )
    {
      for ( i = spike_register_.begin(); i != spike_register_.end(); ++i )
        for ( j = i->begin(); j != i->end(); ++j )
        {
          pos = std::copy( j->begin(), j->end(), pos );
          *pos = comm_marker_;
          ++pos;
        }
    }
    else
    {
      std::vector< OffGridSpike >::iterator n;
      it = offgrid_spike_register_.begin();
      for ( i = spike_register_.begin(); i != spike_register_.end(); ++i )
      {
        jt = it->begin();
        for ( j = i->begin(); j != i->end(); ++j )
        {
          pos = std::copy( j->begin(), j->end(), pos );
          for ( n = jt->begin(); n != jt->end(); ++n )
          {
            *pos = n->get_gid();
            ++pos;
          }
          *pos = comm_marker_;
          ++pos;
          ++jt;
        }
        ++it;
      }
      for ( it = offgrid_spike_register_.begin();
            it != offgrid_spike_register_.end();
            ++it )
        for ( jt = it->begin(); jt != it->end(); ++jt )
          jt->clear();
    }

    // remove old spikes from the spike_register_
    for ( i = spike_register_.begin(); i != spike_register_.end(); ++i )
      for ( j = i->begin(); j != i->end(); ++j )
        j->clear();

    // here all spikes have been written to the local_grid_spikes buffer
    // pos points to next position in this outgoing communication buffer
    for ( j = secondary_events_buffer_.begin();
          j != secondary_events_buffer_.end();
          ++j )
    {
      pos = std::copy( j->begin(), j->end(), pos );
      j->clear();
    }

    // end marker after last secondary event
    // made sure in resize that this position is still allocated
    write_to_comm_buffer( invalid_synindex, pos );
    // append the boolean value indicating whether we are done here
    write_to_comm_buffer( done, pos );
  }
  else // off_grid_spiking
  {
    // make sure buffers are correctly sized
    if ( global_offgrid_spikes_.size()
      != static_cast< unsigned int >(
           kernel().mpi_manager.get_recv_buffer_size() ) )
      global_offgrid_spikes_.resize(
        kernel().mpi_manager.get_recv_buffer_size(), OffGridSpike( 0, 0.0 ) );

    if ( num_spikes + ( kernel().vp_manager.get_num_threads()
                        * kernel().connection_manager.get_min_delay() )
      > static_cast< unsigned int >(
           kernel().mpi_manager.get_send_buffer_size() ) )
      local_offgrid_spikes_.resize(
        ( num_spikes + ( kernel().connection_manager.get_min_delay()
                         * kernel().vp_manager.get_num_threads() ) ),
        OffGridSpike( 0, 0.0 ) );
    else if ( local_offgrid_spikes_.size()
      < static_cast< unsigned int >(
                kernel().mpi_manager.get_send_buffer_size() ) )
      local_offgrid_spikes_.resize(
        kernel().mpi_manager.get_send_buffer_size(), OffGridSpike( 0, 0.0 ) );

    // collocate the entries of spike_registers into local_offgrid_spikes__
    std::vector< OffGridSpike >::iterator pos = local_offgrid_spikes_.begin();
    if ( num_grid_spikes == 0 )
      for ( it = offgrid_spike_register_.begin();
            it != offgrid_spike_register_.end();
            ++it )
        for ( jt = it->begin(); jt != it->end(); ++jt )
        {
          pos = std::copy( jt->begin(), jt->end(), pos );
          pos->set_gid( comm_marker_ );
          ++pos;
        }
    else
    {
      std::vector< unsigned int >::iterator n;
      i = spike_register_.begin();
      for ( it = offgrid_spike_register_.begin();
            it != offgrid_spike_register_.end();
            ++it )
      {
        j = i->begin();
        for ( jt = it->begin(); jt != it->end(); ++jt )
        {
          pos = std::copy( jt->begin(), jt->end(), pos );
          for ( n = j->begin(); n != j->end(); ++n )
          {
            *pos = OffGridSpike( *n, 0 );
            ++pos;
          }
          pos->set_gid( comm_marker_ );
          ++pos;
          ++j;
        }
        ++i;
      }
      for ( i = spike_register_.begin(); i != spike_register_.end(); ++i )
        for ( j = i->begin(); j != i->end(); ++j )
          j->clear();
    }

    // empty offgrid_spike_register_
    for ( it = offgrid_spike_register_.begin();
          it != offgrid_spike_register_.end();
          ++it )
      for ( jt = it->begin(); jt != it->end(); ++jt )
        jt->clear();
  }
}
void
NodeManager::prepare_nodes()
{
  assert( kernel().is_initialized() );

  /* We initialize the buffers of each node and calibrate it. */

  size_t num_active_nodes = 0;     // counts nodes that will be updated
  size_t num_active_wfr_nodes = 0; // counts nodes that use waveform relaxation

  std::vector< lockPTR< WrappedThreadException > > exceptions_raised(
    kernel().vp_manager.get_num_threads() );

#ifdef _OPENMP
#pragma omp parallel reduction( + : num_active_nodes, num_active_wfr_nodes )
  {
    size_t t = kernel().vp_manager.get_thread_id();
#else
    for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t )
    {
#endif

    // We prepare nodes in a parallel region. Therefore, we need to catch
    // exceptions here and then handle them after the parallel region.
    try
    {
      for ( std::vector< Node* >::iterator it = nodes_vec_[ t ].begin();
            it != nodes_vec_[ t ].end();
            ++it )
      {
        prepare_node_( *it );
        if ( not( *it )->is_frozen() )
        {
          ++num_active_nodes;
          if ( ( *it )->node_uses_wfr() )
          {
            ++num_active_wfr_nodes;
          }
        }
      }
    }
    catch ( std::exception& e )
    {
      // so throw the exception after parallel region
      exceptions_raised.at( t ) =
        lockPTR< WrappedThreadException >( new WrappedThreadException( e ) );
    }

  } // end of parallel section / end of for threads

  // check if any exceptions have been raised
  for ( index thr = 0; thr < kernel().vp_manager.get_num_threads(); ++thr )
  {
    if ( exceptions_raised.at( thr ).valid() )
    {
      throw WrappedThreadException( *( exceptions_raised.at( thr ) ) );
    }
  }

  std::ostringstream os;
  std::string tmp_str = num_active_nodes == 1 ? " node" : " nodes";
  os << "Preparing " << num_active_nodes << tmp_str << " for simulation.";

  if ( num_active_wfr_nodes != 0 )
  {
    tmp_str = num_active_wfr_nodes == 1 ? " uses " : " use ";
    os << " " << num_active_wfr_nodes << " of them" << tmp_str
       << "iterative solution techniques.";
  }

  num_active_nodes_ = num_active_nodes;
  LOG( M_INFO, "NodeManager::prepare_nodes", os.str() );
}

void
NodeManager::post_run_cleanup()
{
#ifdef _OPENMP
#pragma omp parallel
  {
    index t = kernel().vp_manager.get_thread_id();
#else // clang-format off
  for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t )
  {
#endif // clang-format on
    for ( size_t idx = 0; idx < local_nodes_.size(); ++idx )
    {
      Node* node = local_nodes_.get_node_by_index( idx );
      if ( node != 0 )
      {
        if ( node->num_thread_siblings() > 0 )
        {
          node->get_thread_sibling( t )->post_run_cleanup();
        }
        else
        {
          if ( static_cast< index >( node->get_thread() ) == t )
          {
            node->post_run_cleanup();
          }
        }
      }
    }
  }
}
void
nest::FixedOutDegreeBuilder::connect_()
{
  librandom::RngPtr grng = kernel().rng_manager.get_grng();

  for ( GIDCollection::const_iterator sgid = sources_->begin();
        sgid != sources_->end();
        ++sgid )
  {
    std::set< long > ch_ids;
    std::vector< index > tgt_ids_;
    const long n_rnd = targets_->size();

    for ( long j = 0; j < outdegree_; ++j )
    {
      unsigned long t_id;
      index tgid;

      do
      {
        t_id = grng->ulrand( n_rnd );
        tgid = ( *targets_ )[ t_id ];
      } while ( ( not autapses_ and tgid == *sgid )
        || ( not multapses_ and ch_ids.find( t_id ) != ch_ids.end() ) );

      if ( not multapses_ )
        ch_ids.insert( t_id );

      tgt_ids_.push_back( tgid );
    }

#pragma omp parallel
    {
      // get thread id
      const int tid = kernel().vp_manager.get_thread_id();

      try
      {
        // allocate pointer to thread specific random generator
        librandom::RngPtr rng = kernel().rng_manager.get_rng( tid );

        for ( std::vector< index >::const_iterator tgid = tgt_ids_.begin();
              tgid != tgt_ids_.end();
              ++tgid )
        {
          // check whether the target is on this mpi machine
          if ( not kernel().node_manager.is_local_gid( *tgid ) )
            continue;

          Node* const target = kernel().node_manager.get_node( *tgid );
          const thread target_thread = target->get_thread();

          // check whether the target is on our thread
          if ( tid != target_thread )
            continue;

          single_connect_( *sgid, *target, target_thread, rng );
        }
      }
      catch ( std::exception& err )
      {
        // We must create a new exception here, err's lifetime ends at
        // the end of the catch block.
        exceptions_raised_.at( tid ) = lockPTR< WrappedThreadException >(
          new WrappedThreadException( err ) );
      }
    }
  }
}
Beispiel #24
0
/**
 * Compute a distance substitution kernel
 * @param x first string 
 * @param y second string
 * @return distance substitution kernel
 */
float kern_distance_compare(hstring_t x, hstring_t y)
{
    float k = kernel(x, y);
    return knorm(norm, k, x, y, kernel);
}
inline void
nest::ConnBuilder::check_synapse_params_( std::string syn_name,
  const DictionaryDatum& syn_spec )
{
  // throw error if weight is specified with static_synapse_hom_w
  if ( syn_name == "static_synapse_hom_w" )
  {
    if ( syn_spec->known( names::weight ) )
      throw BadProperty(
        "Weight cannot be specified since it needs to be equal "
        "for all connections when static_synapse_hom_w is used." );
    return;
  }


  // throw error if n or a are set in quantal_stp_synapse, Connect cannot handle
  // them since they are integer
  if ( syn_name == "quantal_stp_synapse" )
  {
    if ( syn_spec->known( names::n ) )
      throw NotImplemented(
        "Connect doesn't support the setting of parameter "
        "n in quantal_stp_synapse. Use SetDefaults() or CopyModel()." );
    if ( syn_spec->known( names::a ) )
      throw NotImplemented(
        "Connect doesn't support the setting of parameter "
        "a in quantal_stp_synapse. Use SetDefaults() or CopyModel()." );
    return;
  }

  // print warning if delay is specified outside cont_delay_synapse
  if ( syn_name == "cont_delay_synapse" )
  {
    if ( syn_spec->known( names::delay ) )
      LOG( M_WARNING,
        "Connect",
        "The delay will be rounded to the next multiple of the time step. "
        "To use a more precise time delay it needs to be defined within "
        "the synapse, e.g. with CopyModel()." );
    return;
  }

  // throw error if no volume transmitter is defined or parameters are specified
  // that need to be introduced via CopyModel or SetDefaults
  if ( syn_name == "stdp_dopamine_synapse" )
  {
    if ( syn_spec->known( "vt" ) )
      throw NotImplemented(
        "Connect doesn't support the direct specification of the "
        "volume transmitter of stdp_dopamine_synapse in syn_spec."
        "Use SetDefaults() or CopyModel()." );
    // setting of parameter c and n not thread save
    if ( kernel().vp_manager.get_num_threads() > 1 )
    {
      if ( syn_spec->known( names::c ) )
        throw NotImplemented(
          "For multi-threading Connect doesn't support the setting "
          "of parameter c in stdp_dopamine_synapse. "
          "Use SetDefaults() or CopyModel()." );
      if ( syn_spec->known( names::n ) )
        throw NotImplemented(
          "For multi-threading Connect doesn't support the setting "
          "of parameter n in stdp_dopamine_synapse. "
          "Use SetDefaults() or CopyModel()." );
    }
    std::string param_arr[] = {
      "A_minus", "A_plus", "Wmax", "Wmin", "b", "tau_c", "tau_n", "tau_plus"
    };
    std::vector< std::string > param_vec( param_arr, param_arr + 8 );
    for ( std::vector< std::string >::iterator it = param_vec.begin();
          it != param_vec.end();
          it++ )
    {
      if ( syn_spec->known( *it ) )
        throw NotImplemented(
          "Connect doesn't support the setting of parameter " + *it
          + " in stdp_dopamine_synapse. Use SetDefaults() or CopyModel()." );
    }
    return;
  }
}
void
nest::iaf_psc_alpha_canon::update( Time const& origin,
  const long from,
  const long to )
{
  assert( to >= 0 );
  assert( static_cast< delay >( from )
    < kernel().connection_manager.get_min_delay() );
  assert( from < to );

  // at start of slice, tell input queue to prepare for delivery
  if ( from == 0 )
  {
    B_.events_.prepare_delivery();
  }

  /* Neurons may have been initialized to superthreshold potentials.
     We need to check for this here and issue spikes at the beginning of
     the interval.
  */
  if ( S_.y3_ >= P_.U_th_ )
  {
    emit_instant_spike_( origin,
      from,
      V_.h_ms_ * ( 1 - std::numeric_limits< double >::epsilon() ) );
  }

  for ( long lag = from; lag < to; ++lag )
  {
    // time at start of update step
    const long T = origin.get_steps() + lag;
    // if neuron returns from refractoriness during this step, place
    // pseudo-event in queue to mark end of refractory period
    if ( S_.is_refractory_
      && ( T + 1 - S_.last_spike_step_ == V_.refractory_steps_ ) )
    {
      B_.events_.add_refractory( T, S_.last_spike_offset_ );
    }

    // save state at beginning of interval for spike-time interpolation
    V_.y0_before_ = S_.y0_;
    V_.y2_before_ = S_.y2_;
    V_.y3_before_ = S_.y3_;

    // get first event
    double ev_offset;
    double ev_weight;
    bool end_of_refract;

    if ( not B_.events_.get_next_spike(
           T, true, ev_offset, ev_weight, end_of_refract ) )
    { // No incoming spikes, handle with fixed propagator matrix.
      // Handling this case separately improves performance significantly
      // if there are many steps without input spikes.

      // update membrane potential
      if ( not S_.is_refractory_ )
      {
        S_.y3_ = V_.P30_ * ( P_.I_e_ + S_.y0_ ) + V_.P31_ * S_.y1_
          + V_.P32_ * S_.y2_ + V_.expm1_tau_m_ * S_.y3_ + S_.y3_;

        // lower bound of membrane potential
        S_.y3_ = ( S_.y3_ < P_.U_min_ ? P_.U_min_ : S_.y3_ );
      }

      // update synaptic currents
      S_.y2_ = V_.expm1_tau_syn_ * V_.h_ms_ * S_.y1_
        + V_.expm1_tau_syn_ * S_.y2_ + V_.h_ms_ * S_.y1_ + S_.y2_;
      S_.y1_ = V_.expm1_tau_syn_ * S_.y1_ + S_.y1_;

      /* The following must not be moved before the y1_, y2_ update,
         since the spike-time interpolation within emit_spike_ depends
         on all state variables having their values at the end of the
         interval.
      */
      if ( S_.y3_ >= P_.U_th_ )
      {
        emit_spike_( origin, lag, 0, V_.h_ms_ );
      }
    }
    else
    {
      // We only get here if there is at least on event,
      // which has been read above.  We can therefore use
      // a do-while loop.

      // Time within step is measured by offsets, which are h at the beginning
      // and 0 at the end of the step.
      double last_offset = V_.h_ms_; // start of step

      do
      {
        // time is measured backward: inverse order in difference
        const double ministep = last_offset - ev_offset;

        propagate_( ministep );

        // check for threshold crossing during ministep
        // this must be done before adding the input, since
        // interpolation requires continuity
        if ( S_.y3_ >= P_.U_th_ )
        {
          emit_spike_( origin, lag, V_.h_ms_ - last_offset, ministep );
        }

        // handle event
        if ( end_of_refract )
        {
          S_.is_refractory_ = false;
        } // return from refractoriness
        else
        {
          S_.y1_ += V_.PSCInitialValue_ * ev_weight;
        } // spike input

        // store state
        V_.y2_before_ = S_.y2_;
        V_.y3_before_ = S_.y3_;
        last_offset = ev_offset;

      } while ( B_.events_.get_next_spike(
        T, true, ev_offset, ev_weight, end_of_refract ) );

      // no events remaining, plain update step across remainder
      // of interval
      if ( last_offset > 0 ) // not at end of step, do remainder
      {
        propagate_( last_offset );
        if ( S_.y3_ >= P_.U_th_ )
        {
          emit_spike_( origin, lag, V_.h_ms_ - last_offset, last_offset );
        }
      }
    } // else

    // Set new input current. The current change occurs at the
    // end of the interval and thus must come AFTER the threshold-
    // crossing interpolation
    S_.y0_ = B_.currents_.get_value( lag );


    // logging
    B_.logger_.record_data( origin.get_steps() + lag );
  } // from lag = from ...
}
nest::ConnBuilder::ConnBuilder( const GIDCollection& sources,
  const GIDCollection& targets,
  const DictionaryDatum& conn_spec,
  const DictionaryDatum& syn_spec )
  : sources_( &sources )
  , targets_( &targets )
  , autapses_( true )
  , multapses_( true )
  , symmetric_( false )
  , exceptions_raised_( kernel().vp_manager.get_num_threads() )
  , synapse_model_( kernel().model_manager.get_synapsedict()->lookup(
      "static_synapse" ) )
  , weight_( 0 )
  , delay_( 0 )
  , param_dicts_()
  , parameters_requiring_skipping_()
{
  // read out rule-related parameters -------------------------
  //  - /rule has been taken care of above
  //  - rule-specific params are handled by subclass c'tor
  updateValue< bool >( conn_spec, names::autapses, autapses_ );
  updateValue< bool >( conn_spec, names::multapses, multapses_ );
  updateValue< bool >( conn_spec, names::symmetric, symmetric_ );

  // read out synapse-related parameters ----------------------
  if ( !syn_spec->known( names::model ) )
    throw BadProperty( "Synapse spec must contain synapse model." );
  const std::string syn_name = ( *syn_spec )[ names::model ];
  if ( not kernel().model_manager.get_synapsedict()->known( syn_name ) )
    throw UnknownSynapseType( syn_name );

  // if another synapse than static_synapse is defined we need to make
  // sure that Connect can process all parameter specified
  if ( syn_name != "static_synapse" )
    check_synapse_params_( syn_name, syn_spec );

  synapse_model_ = kernel().model_manager.get_synapsedict()->lookup( syn_name );

  DictionaryDatum syn_defaults =
    kernel().model_manager.get_connector_defaults( synapse_model_ );

  // All synapse models have the possibility to set the delay (see
  // SynIdDelay), but some have homogeneous weights, hence it should
  // be possible to set the delay without the weight.
  default_weight_ = !syn_spec->known( names::weight );

  default_delay_ = !syn_spec->known( names::delay );

  // If neither weight nor delay are given in the dict, we handle this
  // separately. Important for hom_w synapses, on which weight cannot
  // be set. However, we use default weight and delay for _all_ types
  // of synapses.
  default_weight_and_delay_ = ( default_weight_ && default_delay_ );

#ifdef HAVE_MUSIC
  // We allow music_channel as alias for receptor_type during
  // connection setup
  ( *syn_defaults )[ names::music_channel ] = 0;
#endif

  if ( !default_weight_and_delay_ )
  {
    weight_ = syn_spec->known( names::weight )
      ? ConnParameter::create( ( *syn_spec )[ names::weight ],
          kernel().vp_manager.get_num_threads() )
      : ConnParameter::create( ( *syn_defaults )[ names::weight ],
          kernel().vp_manager.get_num_threads() );
    register_parameters_requiring_skipping_( *weight_ );
    delay_ = syn_spec->known( names::delay )
      ? ConnParameter::create(
          ( *syn_spec )[ names::delay ], kernel().vp_manager.get_num_threads() )
      : ConnParameter::create( ( *syn_defaults )[ names::delay ],
          kernel().vp_manager.get_num_threads() );
  }
  else if ( default_weight_ )
  {
    delay_ = syn_spec->known( names::delay )
      ? ConnParameter::create(
          ( *syn_spec )[ names::delay ], kernel().vp_manager.get_num_threads() )
      : ConnParameter::create( ( *syn_defaults )[ names::delay ],
          kernel().vp_manager.get_num_threads() );
  }
  register_parameters_requiring_skipping_( *delay_ );
  // Structural plasticity parameters
  // Check if both pre and post synaptic element are provided
  if ( syn_spec->known( names::pre_synaptic_element )
    && syn_spec->known( names::post_synaptic_element ) )
  {
    pre_synaptic_element_name =
      getValue< std::string >( syn_spec, names::pre_synaptic_element );
    post_synaptic_element_name =
      getValue< std::string >( syn_spec, names::post_synaptic_element );
  }
  else
  {
    if ( syn_spec->known( names::pre_synaptic_element )
      || syn_spec->known( names::post_synaptic_element ) )
    {
      throw BadProperty(
        "In order to use structural plasticity, both a pre and post synaptic "
        "element must be specified" );
    }
    pre_synaptic_element_name = "";
    post_synaptic_element_name = "";
  }

  // synapse-specific parameters
  // TODO: Can we create this set once and for all?
  //       Should not be done as static initialization, since
  //       that might conflict with static initialization of
  //       Name system.
  std::set< Name > skip_set;
  skip_set.insert( names::weight );
  skip_set.insert( names::delay );
  skip_set.insert( Name( "min_delay" ) );
  skip_set.insert( Name( "max_delay" ) );
  skip_set.insert( Name( "num_connections" ) );
  skip_set.insert( Name( "num_connectors" ) );
  skip_set.insert( Name( "property_object" ) );
  skip_set.insert( Name( "synapsemodel" ) );

  for ( Dictionary::const_iterator default_it = syn_defaults->begin();
        default_it != syn_defaults->end();
        ++default_it )
  {
    const Name param_name = default_it->first;
    if ( skip_set.find( param_name ) != skip_set.end() )
      continue; // weight, delay or not-settable parameter

    if ( syn_spec->known( param_name ) )
    {
      synapse_params_[ param_name ] = ConnParameter::create(
        ( *syn_spec )[ param_name ], kernel().vp_manager.get_num_threads() );
      register_parameters_requiring_skipping_( *synapse_params_[ param_name ] );
    }
  }

  // Now create dictionary with dummy values that we will use
  // to pass settings to the synapses created. We create it here
  // once to avoid re-creating the object over and over again.
  if ( synapse_params_.size() > 0 )
  {
    for ( index t = 0; t < kernel().vp_manager.get_num_threads(); ++t )
    {
      param_dicts_.push_back( new Dictionary() );

      for ( ConnParameterMap::const_iterator it = synapse_params_.begin();
            it != synapse_params_.end();
            ++it )
      {
        if ( it->first == names::receptor_type
          || it->first == names::music_channel
          || it->first == names::synapse_label )
          ( *param_dicts_[ t ] )[ it->first ] = Token( new IntegerDatum( 0 ) );
        else
          ( *param_dicts_[ t ] )[ it->first ] = Token( new DoubleDatum( 0.0 ) );
      }
    }
  }

  // If symmetric_ is requested call reset on all parameters in order
  // to check if all parameters support symmetric connections
  if ( symmetric_ )
  {
    if ( weight_ )
    {
      weight_->reset();
    }
    if ( delay_ )
    {
      delay_->reset();
    }
    for ( ConnParameterMap::const_iterator it = synapse_params_.begin();
          it != synapse_params_.end();
          ++it )
    {
      it->second->reset();
    }
  }
}
Beispiel #28
0
int main(int argc, char *argv[])
{
  if (argc != 4)
  {
    cout << "Usage: " << argv[0] << " cpu|gpu out_func out_prefix" << endl;
    return 1;
  }

  ImageParam input(UInt(8), 3, "input");
  Func clamped("clamped"), grayscale("grayscale");
  Func g_x("g_x"), g_y("g_y"), g_mag("g_mag");
  Func sobel("sobel");
  Var c("c"), x("x"), y("y");

  // Algorithm
  clamped(x, y, c) = input(
    clamp(x, 0, input.width()-1),
    clamp(y, 0, input.height()-1),
    c) / 255.f;
  grayscale(x, y) =
    clamped(x, y, 0)*0.299f +
    clamped(x, y, 1)*0.587f +
    clamped(x, y, 2)*0.114f;

  Image<int16_t> kernel(3, 3);
  kernel(0, 0) = -1;
  kernel(0, 1) = -2;
  kernel(0, 2) = -1;
  kernel(1, 0) = 0;
  kernel(1, 1) = 0;
  kernel(1, 2) = 0;
  kernel(2, 0) = 1;
  kernel(2, 1) = 2;
  kernel(2, 2) = 1;

  RDom r(kernel);
  g_x(x, y) += kernel(r.x, r.y) * grayscale(x + r.x - 1, y + r.y - 1);
  g_y(x, y) += kernel(r.y, r.x) * grayscale(x + r.x - 1, y + r.y - 1);
  g_mag(x, y) = sqrt(g_x(x, y)*g_x(x, y) + g_y(x, y)*g_y(x, y));
  sobel(x, y, c) = select(c==3, 255, u8(clamp(g_mag(x, y), 0, 1)*255));

  // Channel order
  input.set_stride(0, 4);
  input.set_extent(2, 4);
  sobel.reorder_storage(c, x, y);
  sobel.output_buffer().set_stride(0, 4);
  sobel.output_buffer().set_extent(2, 4);

  // Schedules
  if (!strcmp(argv[1], "cpu"))
  {
    sobel.parallel(y).vectorize(c, 4);
  }
  else if (!strcmp(argv[1], "gpu"))
  {
    sobel.cuda_tile(x, y, 16, 4);
  }
  else
  {
    cout << "Invalid schedule type '" << argv[1] << "'" << endl;
    return 1;
  }

  compile(sobel, input, argv[2], argv[3]);

  return 0;
}
void
nest::AllToAllBuilder::connect_()
{

#pragma omp parallel
  {
    // get thread id
    const int tid = kernel().vp_manager.get_thread_id();

    try
    {
      // allocate pointer to thread specific random generator
      librandom::RngPtr rng = kernel().rng_manager.get_rng( tid );

      for ( GIDCollection::const_iterator tgid = targets_->begin();
            tgid != targets_->end();
            ++tgid )
      {
        // check whether the target is on this mpi machine
        if ( not kernel().node_manager.is_local_gid( *tgid ) )
        {
          for ( GIDCollection::const_iterator sgid = sources_->begin();
                sgid != sources_->end();
                ++sgid )
            skip_conn_parameter_( tid );
          continue;
        }

        Node* const target = kernel().node_manager.get_node( *tgid );
        const thread target_thread = target->get_thread();

        // check whether the target is on our thread
        if ( tid != target_thread )
        {
          for ( GIDCollection::const_iterator sgid = sources_->begin();
                sgid != sources_->end();
                ++sgid )
            skip_conn_parameter_( tid );
          continue;
        }

        for ( GIDCollection::const_iterator sgid = sources_->begin();
              sgid != sources_->end();
              ++sgid )
        {
          if ( not autapses_ and *sgid == *tgid )
          {
            skip_conn_parameter_( target_thread );
            continue;
          }

          single_connect_( *sgid, *target, target_thread, rng );
        }
      }
    }
    catch ( std::exception& err )
    {
      // We must create a new exception here, err's lifetime ends at
      // the end of the catch block.
      exceptions_raised_.at( tid ) =
        lockPTR< WrappedThreadException >( new WrappedThreadException( err ) );
    }
  }
}
Beispiel #30
0
int main(int argc, char**argv){
  namespace po = boost::program_options;
  std::string spectra_filename;
  std::string orig_filename;
  std::string output_omega_filename;
  std::string output_diff_filename;
  std::string output_tau_filename;
  std::string kernel_name;
  int n_matsubara, n_tau;
  kernel_type k_type=standard;
  double beta;
  bool multiply_m1divpi=false;
  
  po::options_description desc("Allowed options");
  desc.add_options()
  ("help", "show this help")
  ("beta", po::value<double>(&beta), "inverse temperature")
  ("n_matsubara", po::value<int>(&n_matsubara)->default_value(-1), "number of matsubara frequencies")
  ("n_tau", po::value<int>(&n_tau)->default_value(20000), "number of imaginary time points")
  ("imag_freq_file", po::value<std::string>(&orig_filename)->default_value("G_omega_av.dat"), "input G(i omega_n) to maxent")
  ("real_freq_file", po::value<std::string>(&spectra_filename)->default_value("spectra.dat"), "output A=-1/pi*ImG(omega) from maxent")
  ("output_freq_file", po::value<std::string>(&output_omega_filename)->default_value("G_omega_back.dat"), "backcontinued output G(omega) with errors")
  ("diff_freq_file", po::value<std::string>(&output_diff_filename)->default_value("G_omega_diff.dat"), "difference to input file")
  ("output_tau_file", po::value<std::string>(&output_tau_filename)->default_value("G_tau_back.dat"), "backcontinued output G(tau) with errors")
  ("kernel", po::value<std::string>(&kernel_name)->default_value("standard"), "kernel type: standard, anomalous, ...")
  ("multiply_m1divpi", "if not specified: scales results by -pi, as required if converting ImG to A. If specified: standard Kramers Kronig (required for Sigma/Anomalous/etc backcont)")
  ;
  po::variables_map vm;
  po::store(po::parse_command_line(argc, argv, desc), vm);
  po::notify(vm);
  if (vm.count("help")) {
    std::cout<<desc;
    return 1;
  }

  //toggle between continuation of G to A and continuation of any other quantity with standard Kramers Kronig relation  
  if (vm.count("multiply_m1divpi")) {
    multiply_m1divpi=true;
  }

  std::ifstream orig_file(orig_filename.c_str());
  if(!orig_file.good()) throw std::invalid_argument("imag freq file: "+orig_filename+" could not be opened. specify with --imag_freq_file");
  std::ifstream spectra_file(spectra_filename.c_str());
  if(!spectra_file.good()) throw std::invalid_argument("real freq file: "+spectra_filename+" could not be opened. specify with --real_freq_file");
  if(!vm.count("beta")) throw std::runtime_error("you need to specify the inverse temperature with --beta.");
  if(vm.count("kernel")){
    if(kernel_name==std::string("standard")){
      k_type=standard;
      std::cout<<"using standard kernel."<<std::endl;
    }else if(kernel_name==std::string("anomalous")){
      k_type=anomalous;
      std::cout<<"using anomalous kernel."<<std::endl;
    }else if(kernel_name==std::string("bosonic")){
      k_type=bosonic;
      std::cout<<"using bosonic kernel."<<std::endl;
    }else if(kernel_name==std::string("me_bosonic")){
      k_type=me_bosonic;
      std::cout<<"using maxent's bosonic kernel."<<std::endl;
    }else if(kernel_name==std::string("me_anomalous")){
      k_type=me_anomalous;
      std::cout<<"using maxent's anomalous kernel."<<std::endl;
    }else{
      throw std::runtime_error("kernel type not recognized.");
    }
  }
  
  std::vector<std::complex<double> > imag_freq_data;
  std::vector<std::complex<double> > imag_freq_error;
  std::vector<double > real_freq_data;
  std::vector<double > real_freq_freq;
  do{
    double dummy, imag_freq_data_real, imag_freq_data_imag, imag_freq_sigma_real, imag_freq_sigma_imag;
    orig_file>>dummy>>imag_freq_data_real>>imag_freq_data_imag>>imag_freq_sigma_real>>imag_freq_sigma_imag>>std::ws;
    imag_freq_data.push_back(std::complex<double>(imag_freq_data_real, imag_freq_data_imag));
    imag_freq_error.push_back(std::complex<double>(imag_freq_sigma_real, imag_freq_sigma_imag));
  }while(orig_file.good());
  do{
    double frequency, value, defaultm;
    spectra_file>>frequency>>value>>defaultm>>std::ws;
    real_freq_data.push_back(value);
    real_freq_freq.push_back(frequency);
  }while(spectra_file.good());
  
  std::cout<<"read in files: "<<imag_freq_data.size()<<" matsubara freqs and "<<real_freq_data.size()<<" real frequency points."<<std::endl;
  if(n_matsubara ==-1) n_matsubara=imag_freq_data.size();
  if(real_freq_data[0]+real_freq_data.back() > 1.e-4) std::cerr<<"problem with spectra: does not go to zero at boundary?\n";
  std::cout<<real_freq_data[0]<<" "<<real_freq_data.back()<<std::endl;
  
  //back-continue to the imaginary axis
  if(k_type==standard){
    std::vector<double > imag_time_back(n_tau,0.);
    std::ofstream gtau_file(output_tau_filename.c_str());
    gtau_file.precision(14);
    for(int i=0;i<n_tau;++i){
      double tau=i/(double)n_tau*beta;
      imag_time_back[i]=0.;
      for(int w=1;w<real_freq_freq.size()-1;++w){
        double freq =real_freq_freq[w];
        double value=real_freq_data[w];
        double delta=(real_freq_freq[w+1]-real_freq_freq[w-1])/2.;
        double kernel=-std::exp(-freq*tau)/(std::exp(-freq*beta)+1);
        if(!std::isnan(kernel))
          imag_time_back[i]+=kernel*value*delta;
        //std::cout<<freq<<" "<<value<<" "<<delta<<" "<<std::exp(-freq*tau)/(std::exp(-freq*beta)+1)*value*delta<<" "<<imag_time_back[i]<<std::endl;
      }
      double kernel1=-std::exp(-real_freq_freq[0]*tau    )/(std::exp(-real_freq_freq[0]    *beta)+1);
      double kernel2=-std::exp(-real_freq_freq.back()*tau)/(std::exp(-real_freq_freq.back()*beta)+1);
      if(!std::isnan(kernel1))
        imag_time_back[i]+=kernel1*real_freq_data[0]*(real_freq_freq[1]-real_freq_freq[0])/2.;
      if(!std::isnan(kernel2))
        imag_time_back[i]+=kernel2*real_freq_data.back()*(real_freq_freq.back()-real_freq_freq[real_freq_freq.size()-2])/2.;
      if(multiply_m1divpi) imag_time_back[i]*=-1./M_PI;
      gtau_file<<tau<<" "<<imag_time_back[i]<<std::endl;
    }
  }
  
  std::vector<std::complex<double> > imag_freq_data_back(n_matsubara);
  std::ofstream gomega_file(output_omega_filename.c_str());
  gomega_file.precision(14);
  int n=0;
  //don't compute bosonic singular value @ n=0
  if(k_type==bosonic || k_type==anomalous){
    n++;
    std::cout<< "Warning: kernel is singular at iomega_n=0. Skipping..." <<std::endl;
  }
  for(;n<n_matsubara;++n){
    double omega_n;
    if(k_type==standard) omega_n=(2.*n+1)*M_PI/beta;
    else omega_n=(2.*n)*M_PI/beta;
    imag_freq_data_back[n]=0.;
    for(int w=1;w<real_freq_freq.size()-1;++w){
      double freq =real_freq_freq[w];
      double value=real_freq_data[w];
      double delta=(real_freq_freq[w+1]-real_freq_freq[w-1])/2.;
      std::complex<double> kernel_val=kernel(omega_n, freq,k_type);
      imag_freq_data_back[n]+=kernel_val*value*delta;
    }
    std::complex<double> kernel1=kernel(omega_n, real_freq_freq[0],k_type); 
    std::complex<double> kernel2=kernel(omega_n, real_freq_freq.back(),k_type);
    imag_freq_data_back[n]+=kernel1*real_freq_data[0]*(real_freq_freq[1]-real_freq_freq[0])/2.;
    imag_freq_data_back[n]+=kernel2*real_freq_data.back()*(real_freq_freq.back()-real_freq_freq[real_freq_freq.size()-2])/2.;
    if(multiply_m1divpi) imag_freq_data_back[n]*=-1./M_PI;
    gomega_file<<omega_n<<" "<<imag_freq_data_back[n].real()<<" "<<imag_freq_data_back[n].imag()<<std::endl;
  }
  
  std::ofstream gomega_diff_file(output_diff_filename.c_str());
  for(int n=0;n<n_matsubara;++n){
    double diff_real=imag_freq_data[n].real()-imag_freq_data_back[n].real();
    double diff_imag=imag_freq_data[n].imag()-imag_freq_data_back[n].imag();
    gomega_diff_file<<(2.*n+1)*M_PI/beta<<" "<<diff_real<<" "<<diff_imag<<" "<<imag_freq_error[n].real()<<" "<<imag_freq_error[n].imag()<<std::endl;
  }
}