Exemplo n.º 1
0
std::vector<continuous_result> continuous::split_input(size_t num_threads) {
  flexible_type current_min = m_transformer.min;
  flexible_type current_max = m_transformer.max;
  double current_scale_min = m_transformer.scale_min;
  double current_scale_max = m_transformer.scale_max;
  std::vector<continuous_result> thread_results(num_threads);
  for (auto& thread_result : thread_results) {
    thread_result.init(current_min, current_max, current_scale_min, current_scale_max);
  }
  return thread_results;
}
Exemplo n.º 2
0
CRef<CSearchResultSet> CLocalRPSBlast::RunThreadedSearch(void)
{

   	s_ModifyVolumePaths(m_rps_databases);

   	if((kAutoThreadedSearch == m_num_of_threads) ||
   	  (m_num_of_threads > m_rps_databases.size()))
   	{
   		//Default num of thread : a thread for each db
   		m_num_of_threads = m_rps_databases.size();
   	}
   	else if(m_num_of_threads < m_rps_databases.size())
   	{
   		// Combine databases, modified the size of rps_database
   		s_MapDbToThread(m_rps_databases, m_num_of_threads);
   	}

   	vector<CRef<CSearchResultSet> * > 	thread_results(m_num_of_threads, NULL);
   	vector <CRPSThread* >				thread(m_num_of_threads, NULL);
   	vector<CRef<CSearchResultSet> >   results;

   	for(unsigned int t=0; t < m_num_of_threads; t++)
   	{
   		// CThread destructor is protected, all threads destory themselves when terminated
   		thread[t] = (new CRPSThread(m_query_vector, m_rps_databases[t], m_opt_handle->SetOptions().Clone()));
   		thread[t]->Run();
   	}

   	for(unsigned int t=0; t < m_num_of_threads; t++)
   	{
   		thread[t]->Join(reinterpret_cast<void**> (&thread_results[t]));
   	}

   	for(unsigned int t=0; t < m_num_of_threads; t++)
   	{
   		results.push_back(*(thread_results[t]));
   	}

   	CRef<CBlastRPSInfo>  rpsInfo = CSetupFactory::CreateRpsStructures(m_db_name,
   	            												CRef<CBlastOptions> (&(m_opt_handle->SetOptions())));
   	return s_CombineSearchSets(results, m_num_of_threads);

}
  void vector_scan_impl(vector_base<NumericT> const & vec1,
                        vector_base<NumericT>       & vec2,
                        bool is_inclusive)
  {
    NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
    NumericT       * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);

    vcl_size_t start1 = viennacl::traits::start(vec1);
    vcl_size_t inc1   = viennacl::traits::stride(vec1);
    vcl_size_t size1  = viennacl::traits::size(vec1);
    if (size1 < 1)
      return;

    vcl_size_t start2 = viennacl::traits::start(vec2);
    vcl_size_t inc2   = viennacl::traits::stride(vec2);

#ifdef VIENNACL_WITH_OPENMP
    if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
    {
      std::vector<NumericT> thread_results(omp_get_max_threads());

      // inclusive scan each thread segment:
      #pragma omp parallel
      {
        vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
        vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
        vcl_size_t thread_stop  = std::min<vcl_size_t>(thread_start + work_per_thread, size1);

        NumericT thread_sum = 0;
        for(vcl_size_t i = thread_start; i < thread_stop; i++)
          thread_sum += data_vec1[i * inc1 + start1];

        thread_results[omp_get_thread_num()] = thread_sum;
      }

      // exclusive-scan of thread results:
      NumericT current_offset = 0;
      for (vcl_size_t i=0; i<thread_results.size(); ++i)
      {
        NumericT tmp = thread_results[i];
        thread_results[i] = current_offset;
        current_offset += tmp;
      }

      // exclusive/inclusive scan of each segment with correct offset:
      #pragma omp parallel
      {
        vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
        vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
        vcl_size_t thread_stop  = std::min<vcl_size_t>(thread_start + work_per_thread, size1);

        NumericT thread_sum = thread_results[omp_get_thread_num()];
        if (is_inclusive)
        {
          for(vcl_size_t i = thread_start; i < thread_stop; i++)
          {
            thread_sum += data_vec1[i * inc1 + start1];
            data_vec2[i * inc2 + start2] = thread_sum;
          }
        }
        else
        {
          for(vcl_size_t i = thread_start; i < thread_stop; i++)
          {
            NumericT tmp = data_vec1[i * inc1 + start1];
            data_vec2[i * inc2 + start2] = thread_sum;
            thread_sum += tmp;
          }
        }
      }
    } else
#endif
    {
      NumericT sum = 0;
      if (is_inclusive)
      {
        for(vcl_size_t i = 0; i < size1; i++)
        {
          sum += data_vec1[i * inc1 + start1];
          data_vec2[i * inc2 + start2] = sum;
        }
      }
      else
      {
        for(vcl_size_t i = 0; i < size1; i++)
        {
          NumericT tmp = data_vec1[i * inc1 + start1];
          data_vec2[i * inc2 + start2] = sum;
          sum += tmp;
        }
      }
    }

  }