std::vector<continuous_result> continuous::split_input(size_t num_threads) { flexible_type current_min = m_transformer.min; flexible_type current_max = m_transformer.max; double current_scale_min = m_transformer.scale_min; double current_scale_max = m_transformer.scale_max; std::vector<continuous_result> thread_results(num_threads); for (auto& thread_result : thread_results) { thread_result.init(current_min, current_max, current_scale_min, current_scale_max); } return thread_results; }
CRef<CSearchResultSet> CLocalRPSBlast::RunThreadedSearch(void) { s_ModifyVolumePaths(m_rps_databases); if((kAutoThreadedSearch == m_num_of_threads) || (m_num_of_threads > m_rps_databases.size())) { //Default num of thread : a thread for each db m_num_of_threads = m_rps_databases.size(); } else if(m_num_of_threads < m_rps_databases.size()) { // Combine databases, modified the size of rps_database s_MapDbToThread(m_rps_databases, m_num_of_threads); } vector<CRef<CSearchResultSet> * > thread_results(m_num_of_threads, NULL); vector <CRPSThread* > thread(m_num_of_threads, NULL); vector<CRef<CSearchResultSet> > results; for(unsigned int t=0; t < m_num_of_threads; t++) { // CThread destructor is protected, all threads destory themselves when terminated thread[t] = (new CRPSThread(m_query_vector, m_rps_databases[t], m_opt_handle->SetOptions().Clone())); thread[t]->Run(); } for(unsigned int t=0; t < m_num_of_threads; t++) { thread[t]->Join(reinterpret_cast<void**> (&thread_results[t])); } for(unsigned int t=0; t < m_num_of_threads; t++) { results.push_back(*(thread_results[t])); } CRef<CBlastRPSInfo> rpsInfo = CSetupFactory::CreateRpsStructures(m_db_name, CRef<CBlastOptions> (&(m_opt_handle->SetOptions()))); return s_CombineSearchSets(results, m_num_of_threads); }
void vector_scan_impl(vector_base<NumericT> const & vec1, vector_base<NumericT> & vec2, bool is_inclusive) { NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1); NumericT * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2); vcl_size_t start1 = viennacl::traits::start(vec1); vcl_size_t inc1 = viennacl::traits::stride(vec1); vcl_size_t size1 = viennacl::traits::size(vec1); if (size1 < 1) return; vcl_size_t start2 = viennacl::traits::start(vec2); vcl_size_t inc2 = viennacl::traits::stride(vec2); #ifdef VIENNACL_WITH_OPENMP if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) { std::vector<NumericT> thread_results(omp_get_max_threads()); // inclusive scan each thread segment: #pragma omp parallel { vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1; vcl_size_t thread_start = work_per_thread * omp_get_thread_num(); vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1); NumericT thread_sum = 0; for(vcl_size_t i = thread_start; i < thread_stop; i++) thread_sum += data_vec1[i * inc1 + start1]; thread_results[omp_get_thread_num()] = thread_sum; } // exclusive-scan of thread results: NumericT current_offset = 0; for (vcl_size_t i=0; i<thread_results.size(); ++i) { NumericT tmp = thread_results[i]; thread_results[i] = current_offset; current_offset += tmp; } // exclusive/inclusive scan of each segment with correct offset: #pragma omp parallel { vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1; vcl_size_t thread_start = work_per_thread * omp_get_thread_num(); vcl_size_t thread_stop = std::min<vcl_size_t>(thread_start + work_per_thread, size1); NumericT thread_sum = thread_results[omp_get_thread_num()]; if (is_inclusive) { for(vcl_size_t i = thread_start; i < thread_stop; i++) { thread_sum += data_vec1[i * inc1 + start1]; data_vec2[i * inc2 + start2] = thread_sum; } } else { for(vcl_size_t i = thread_start; i < thread_stop; i++) { NumericT tmp = data_vec1[i * inc1 + start1]; data_vec2[i * inc2 + start2] = thread_sum; thread_sum += tmp; } } } } else #endif { NumericT sum = 0; if (is_inclusive) { for(vcl_size_t i = 0; i < size1; i++) { sum += data_vec1[i * inc1 + start1]; data_vec2[i * inc2 + start2] = sum; } } else { for(vcl_size_t i = 0; i < size1; i++) { NumericT tmp = data_vec1[i * inc1 + start1]; data_vec2[i * inc2 + start2] = sum; sum += tmp; } } } }