void operator()(const tbb::blocked_range<int>& r) const 
 {
     ASSERT(r.size() == 1, NULL);
     tbb::task *t = new(tbb::task::allocate_root()) tbb::empty_task();
     tbb::task::enqueue(*t); // ensure no missing wake-ups
     my_barrier.timed_wait(10, "Attention: poorly reproducible event, if seen stress testing required" );
 }
  void operator()(const tbb::blocked_range<size_t>& r) const
  {
    float sum;
    
    const float* __restrict p = &m_data[0] + r.begin();
    float* __restrict d = &m_output[0]+r.begin();

    float k[n];
    float c[n];
    k[0] = m_kernel[0];
    for (int i = 1; i < n; ++i)
    {
      c[i] = p[i-1];
      k[i] = m_kernel[i];
    }

    for (int i = 0, e = r.size()-n-1; i < e ; i += n) {
      d[i+0] = (c[0] = p[i+0]) * k[0] + c[1]*k[2]+c[2]*k[2]+c[3]*k[3]+c[4]*k[4]+c[5]*k[5]+c[6]*k[6];
      d[i+1] = (c[6] = p[i+1]) * k[0] + c[0]*k[2]+c[1]*k[2]+c[2]*k[3]+c[3]*k[4]+c[4]*k[5]+c[5]*k[6];
      d[i+2] = (c[5] = p[i+2]) * k[0] + c[6]*k[2]+c[0]*k[2]+c[1]*k[3]+c[2]*k[4]+c[3]*k[5]+c[4]*k[6];
      d[i+3] = (c[4] = p[i+3]) * k[0] + c[5]*k[2]+c[6]*k[2]+c[0]*k[3]+c[1]*k[4]+c[2]*k[5]+c[3]*k[6];
      d[i+4] = (c[3] = p[i+4]) * k[0] + c[4]*k[2]+c[5]*k[2]+c[6]*k[3]+c[0]*k[4]+c[1]*k[5]+c[2]*k[6];
      d[i+5] = (c[2] = p[i+5]) * k[0] + c[3]*k[2]+c[4]*k[2]+c[5]*k[3]+c[6]*k[4]+c[0]*k[5]+c[1]*k[6];
      d[i+6] = (c[1] = p[i+6]) * k[0] + c[2]*k[2]+c[3]*k[2]+c[4]*k[3]+c[5]*k[4]+c[6]*k[5]+c[0]*k[6];
    }


  }
예제 #3
0
 void operator()(const tbb::blocked_range<int>& r) const 
 {
     ASSERT(r.size() == 1, NULL);
     // allocate_root() uses current context of parallel_for which is destroyed when it finishes.
     // But enqueued tasks can outlive parallel_for execution. Thus, use a persistent context.
     tbb::task *t = new(tbb::task::allocate_root(persistent_context)) tbb::empty_task();
     tbb::task::enqueue(*t); // ensure no missing wake-ups
     my_barrier.timed_wait(10, "Attention: poorly reproducible event, if seen stress testing required" );
 }
예제 #4
0
  void operator()(const ::tbb::blocked_range<size_type> &r) const
  {
    assert(r.size() == 1);

    const size_type interval_idx = r.begin();

    const size_type offset_to_first = interval_size * interval_idx;
    const size_type offset_to_last = thrust::min(n, offset_to_first + interval_size);

    Iterator1 my_keys_first     = keys_first    + offset_to_first;
    Iterator1 my_keys_last      = keys_first    + offset_to_last;
    Iterator2 my_values_first   = values_first  + offset_to_first;
    Iterator3 my_result_offset  = result_offset + interval_idx;
    Iterator4 my_keys_result    = keys_result   + *my_result_offset;
    Iterator5 my_values_result  = values_result + *my_result_offset;
    Iterator6 my_carry_result   = carry_result  + interval_idx;

    // consume the rest of the interval with reduce_by_key
    typedef typename thrust::iterator_value<Iterator1>::type key_type;
    typedef typename partial_sum_type<Iterator2,BinaryFunction>::type value_type;

    // XXX is there a way to pose this so that we don't require default construction of carry?
    thrust::pair<key_type, value_type> carry;

    thrust::tie(my_keys_result, my_values_result, carry.first, carry.second) =
      reduce_by_key_with_carry(my_keys_first,
                               my_keys_last,
                               my_values_first,
                               my_keys_result,
                               my_values_result,
                               binary_pred,
                               binary_op);

    // store to carry only when we actually have a carry
    // store to my_keys_result & my_values_result otherwise
    
    // create tail_flags so we can check for a carry
    tail_flags<Iterator1,BinaryPredicate> flags = make_tail_flags(keys_first, keys_first + n, binary_pred);

    if(interval_has_carry(interval_idx, interval_size, num_intervals, flags.begin()))
    {
      // we can ignore the carry's key
      // XXX because the carry result is uninitialized, we should copy construct
      *my_carry_result = carry.second;
    }
    else
    {
      *my_keys_result = carry.first;
      *my_values_result = carry.second;
    }
  }
예제 #5
0
    void operator()( const tbb::blocked_range<int>& range ) const {
        ASSERT( range.begin() < range.end(), NULL );
#if TBB_DEPRECATED
        for( int i=range.begin(); i!=range.end(); ++i )
#else
        const int h = range.size() / 2 ;
        {
            const int begin = range.begin();
            const int left_part = h/2 ;
            const int right_part = h - left_part;
            //elements added by this call are counted by copy_inits_by_grow_by_range variable bellow
            my_vector.grow_by(FooIterator(begin),FooIterator(begin+left_part));
            //these ones are counted by def_inits_by_grow_by_bunch
            typename MyVector::iterator const s = my_vector.grow_by(right_part);
            for( int k = 0; k < right_part; ++k )
                s[k].bar() = begin + left_part + k;
        }

        for(int i=range.begin() + h; i!=range.end(); ++i )
#endif
        {
            if( i&1 ) {//counted by def_inits_by_grow_by_single_odd
#if TBB_DEPRECATED
                my_vector[my_vector.grow_by(1)].bar() = i;
#else
                my_vector.grow_by(1)->bar() = i;
#endif
            } else {// "Even" part
                typename MyVector::value_type f;
                f.bar() = i;
#if TBB_DEPRECATED
                size_t r;
#else
                typename MyVector::iterator r;
#endif
                if( i&2 ){
                    //counted by copy_inits_by_push_back_single_second_bit_set
                    r = my_vector.push_back( f );
                }else{
                    //counted by copy_inits_by_grow_by_single_second_bit_cleared_part
                    r = my_vector.grow_by(1, f);
                }
#if TBB_DEPRECATED
                ASSERT( my_vector[r].bar()==i, NULL );
#else
                ASSERT( r->bar()==i, NULL );
#endif
            }
        }
    }
    void operator()(const tbb::blocked_range< size_t >& r)
    {
        const InputType* curr_value = input_vector_ + r.begin();
        const InputType* end_value = input_vector_ + r.end();

        if ( r.size() % 2 ) // odd
        {
            InputType sample = *curr_value;

            if (sample < min_) min_ = sample;
            if (sample < max_) max_ = sample;

            curr_value++;
        }

        while ( curr_value != end_value )
        {
            InputType sample1 = *curr_value;
            InputType sample2 = *(curr_value + 1);

            if (sample1 < sample2)
            {
                if (sample1 < min_) min_ = sample1;
                if (sample2 > max_) max_ = sample2;
            } else {
                if (sample2 < min_) min_ = sample2;
                if (sample1 > max_) max_ = sample1;
            }

            curr_value += 2;
        }

//        while ( curr_value != end_value )
//        {
//            InputType sample1 = *curr_value;

//            if (sample1 < min_) {
//                min_ = sample1;
//            } else {
//                max_ = sample1;
//            }

//            curr_value ++;
//        }
    }
예제 #7
0
파일: universe.cpp 프로젝트: ElaraFX/tbb
 void operator()( const tbb::blocked_range<int>& y_range ) const {
     u_.UpdateVelocity(Universe::Rectangle(1,y_range.begin(),u_.UniverseWidth-1,y_range.size()));
 }
예제 #8
0
파일: universe.cpp 프로젝트: ElaraFX/tbb
 void operator()( const tbb::blocked_range<int>& range ) const {
     Universe::Rectangle area(0, range.begin(), u_.UniverseWidth-1, range.size());
     u_.UpdateStress(area);
 }
예제 #9
0
void DoShuffleParallel(const tbb::blocked_range<size_t>& r, const std::tr1::function<void(void*, const void*, size_t, const u8, const u8, const u8, const u8)>& func, void* dest, const void* source, const u8 red, const u8 green, const u8 blue, const u8 alpha)
{
	size_t offset = r.begin()*STRIDE;
	size_t size = r.size()*STRIDE;
	func(reinterpret_cast<s8*>(dest) + offset, reinterpret_cast<const s8*>(source) + offset, size, red, green, blue, alpha);
}
예제 #10
0
void DoLerpParallel(const tbb::blocked_range<size_t>& r, const std::tr1::function<void(void*, const void*, const void*, float, size_t)>& func, void* dest, const void* source1, const void* source2, float alpha)
{
	size_t offset = r.begin()*STRIDE;
	size_t size = r.size()*STRIDE;
	func(reinterpret_cast<s8*>(dest) + offset, reinterpret_cast<const s8*>(source1) + offset, reinterpret_cast<const s8*>(source2) + offset, alpha, size);
}