void score(pcooc_vector_t ** cooc_vec_ptr,/*pcooc_vector_t * res_vec,*/global_score_type * g_scores,unsigned long mem,unsigned lagg, const char * testset,const char * ngramsfname,pindicator_vector_t * pairs2keep ) { double phase1span,phase2span; global_score_type gs; // cout<<"Scoring started..."<<endl; Crono timer; timer.start(); pcooc_vector_t * cooc_vec=*cooc_vec_ptr; pcooc_vector_t * res_vec=new pcooc_vector_t(cooc_vec->nb_sub_vecs()); cooc_vec->print_sizes(); gs.all_occurs=cooc_vec->size(); cout<<endl; cout<<"-------------------------\n" "Sorting (first run)\n" "-------------------------"<<endl; sort_by_key (cooc_vec,mem); timer.stop(); phase1span=timer.span(); cout<<"\nDone sorting partial sets\nPartial sorting took "<<timer.formatted_span()<<endl; cout<<"\n-------------------------\n" "Merging partial sorts\n" "-------------------------"<<endl; timer.reset(); timer.start(); aggregation_func selector=&max_double; switch(lagg){ case OCC_AGG: selector=&max_occur;break; case AVG_AGG: selector=&average;break; } Merger<cooc_type> * /*pcooc_vector_t::merge_iterator*/ g_merger=new Merger<cooc_type>(cooc_vec);//->get_merge_iterator(); #pragma omp parallel shared(cooc_vec,res_vec, g_merger,selector) { g_merger->init(omp_get_thread_num()); #pragma omp barrier while(true){ score_one_source(res_vec,g_merger,selector); if(!(*g_merger)) break; // cout<<omp_get_thread_num()<<": "<</*g_merger.count()<<*/"\t"; } } timer.stop(); cout<<"Done merging in: "<<timer.formatted_span()<<endl; cout<<//"\n-------------------------------------------------------\n"<< "\nFirst run of scoring done \nFirst run, including partial sorting, took "<< timer.formatted_span(phase1span)<<endl; delete g_merger; delete *cooc_vec_ptr; // cooc_vec->clear(); //second sort by target sentences IDs *cooc_vec_ptr=new pcooc_vector_t(res_vec->nb_sub_vecs()); cooc_vec=*cooc_vec_ptr; phase1span+=timer.span(); timer.reset(); timer.start(); cout<<"\nNumber of unique pairs: "<<res_vec->size()<<endl; res_vec->print_sizes(); cout<<endl; cout<<"---------------------------\nResorting (second run)\n---------------------------"<<endl; sort_by_key (res_vec,mem); timer.stop(); phase2span=timer.span(); cout<<"\nDone sorting partial sets\nPartial sorting took "<<timer.formatted_span()<<endl; /*+++++++++++++++++++++++++++Allow subsampling at this stage++++++++++++++++++++++++++++++++++*/ pngram_vector_t * ngrams=NULL; if(testset != NULL || ngramsfname!= NULL){ ngrams= new pngram_vector_t(NB_WORKERS); timer.reset(); timer.start(); if(ngramsfname!= NULL){ cout<<"\n########################\n" "Loading the n-gram list\n" "########################\n"<<endl; pload_ngrams(ngramsfname, ngrams); cout<<"Done loading n-grams!\n"<<"n-gram list loaded in "; }else{ cout<<"\n########################\n" "Creating the n-gram list\n" "########################\n"<<endl; pget_ngrams(testset, ngrams); cout<<"Done creating n-grams!\n"<<"n-gram list created in "; } timer.stop(); cout<<timer.formatted_span()<<endl; timer.reset(); cout<<"\n------------------------\n" "Sorting the n-gram list\n" "------------------------\n"<<endl; timer.start(); sort_unique_ngrams(ngrams,mem); timer.stop(); cout<<"Sorting n-grams took "<<timer.formatted_span()<<endl; if(testset){ cout<<"\n------------------------\n" "Saving the n-gram list\n" "------------------------\n"<<endl; timer.reset(); timer.start(); psave_ngrams(rsplit(testset,PATH_SEP).second.append(".ngrams").c_str(),ngrams); timer.stop(); cout<<"Saving n-grams took "<<timer.formatted_span()<<endl; } } /*++++++++++++++++++++++++++END of ngram creation/loading+++++++++++++++++++++++++++++++++++++*/ if(!ngrams) cout<<"\n-------------------------\n" "Merging partial sorts\n" "-------------------------"<<endl; else cout<<"\n---------------------------------------\n" "Merging and sub-sampling partial sorts\n" "---------------------------------------"<<endl; timer.reset(); timer.start(); Merger<ngram_t> * /*pcooc_vector_t::merge_iterator*/ g_ngmerger=NULL; if(ngrams) g_ngmerger=new Merger<ngram_t>(ngrams); g_merger=new Merger<cooc_type>(res_vec);//itr=res_vec->get_merge_iterator(cmp_by_val); //int more=0; /* tmp_vec.clear(); unsigned long tocc, invn[3]; cooc_type::key_type new_trgt; cout<<"Merging the sorted parts\n"; */ unsigned long big_n1=0, big_n2=0,big_n3=0,big_n4=0, n_matches=0; #pragma omp parallel shared(g_merger,g_ngmerger,cooc_vec,res_vec,ngrams,pairs2keep),\ reduction(+:big_n1),reduction(+:big_n2),reduction(+:big_n3),reduction(+:big_n4), reduction(+:n_matches) { g_merger->init(omp_get_thread_num()); if(g_ngmerger) g_ngmerger->init(omp_get_thread_num()); #pragma omp barrier while(true){ n_matches+=score_one_source(cooc_vec,&big_n1,&big_n2,&big_n3,&big_n4,g_merger,g_ngmerger,pairs2keep);//score_one_source(&lock,&lq,res_vec,g_merger); if(!(*g_merger)) break; // cout<<omp_get_thread_num()<<": "<</*g_merger.count()<<*/"\t"; } } timer.stop(); cout<<"Done merging in: "<<timer.formatted_span()<<endl; cout<<"Disposing unneeded resources\n"; timer.start(); delete g_merger; delete res_vec; if(ngrams){ cout<<"\nNumber of matching pairs to provided ngrams: "<<n_matches<<" ("<<100*double(n_matches)/cooc_vec->size()<<"%)"<<endl; delete g_ngmerger; delete ngrams; } gs.big_n1=big_n1; gs.big_n2=big_n2; gs.big_n3=big_n3; gs.big_n4=big_n4; //cout<<"Phrases more than 4 times: "<<more<<endl; gs.all=cooc_vec->size(); if(g_scores){ if(gs.big_n1==0){ cerr<<"Warning: N1 was found to be zero, taking N1=1!\n"; gs.big_n1=1; } if(gs.big_n2==0){ cerr<<"Warning: N2 was found to be zero, taking N2=1!\n"; gs.big_n2=1; } if(gs.big_n3==0){ cerr<<"Warning: N3 was found to be zero, taking N3=1!\n"; gs.big_n3=1; } memcpy(g_scores,&gs,sizeof(global_score_type)); } timer.stop(); cout<<"\nSecond run of scoring done\nSecond run, including partial sorting, took "<< timer.formatted_span(phase2span)<<endl; cout<<"\nTwo phase phrase scoring took "<<timer.formatted_span(phase1span+phase2span)<<endl<<endl; //calc corresponding scores }
template<class T, class H> void CDR_Test<T, H>::do_test (int total, int niter, int use_array, char* srcbuf, char* dstbuf, int src_offset, int dst_offset) { if (!use_array) { dst_offset = src_offset = 0; } ACE_DEBUG((LM_DEBUG, ACE_TEXT( "Starting Test for %s: %d elements " ) ACE_TEXT( "%susing arrays.\n" ), H::name (), total, ((use_array) ? ACE_TEXT( "" ) : ACE_TEXT( "not " )))); if (!use_array && (total % 4) != 0) { int lasttotal = total; total -= (total % 4); ACE_DEBUG((LM_DEBUG, ACE_TEXT( "Rounding from %d to %d elements.\n" ), lasttotal, total)); } char* src = ACE_ptr_align_binary(srcbuf, H::size ()); T* idata = reinterpret_cast<T*> (src); idata += src_offset; src = reinterpret_cast<char*> (idata); { int i; for (i = 0; i < total; i++) { idata[i] = CDR_Test<T, H>::checkval (i); } } ACE_DEBUG((LM_DEBUG, ACE_TEXT( "Writing data...\n" ))); char* toread = 0; { ACE_TEST_ASSERT(use_array || total % 4 == 0); double totalsecs = 0.0; int n; for (n = 0; n < niter; n++) { size_t size = H::size () * (dst_offset + total) + ACE_CDR::MAX_ALIGNMENT; ACE_OutputCDR os (dstbuf, size); // This is intrusive... char* const end = os.begin ()->wr_ptr() + size; do_seal (end); double secs = 0.0; if (use_array) { { int i; for (i = 0; i < dst_offset; i++) { os << T(0); } } if (n == 0) { ACE_DEBUG((LM_DEBUG, ACE_TEXT ("* src align = %d, dst align = %d\n"), tellalign (src), tellalign (os.begin ()->wr_ptr ()))); } Crono crono; crono.start (); H::write_array (os, idata, total); crono.stop (); secs = crono.read_seconds (); } else { int i = 0; for (; i < dst_offset; i++) { os << T(0); } i = 0; Crono crono; crono.start(); while (i < total) { os << idata[i++]; os << idata[i++]; os << idata[i++]; os << idata[i++]; // static char rs[32 + 1]; // CDR_Test<T,H>::ttoh (idata[i], rs); // ACE_DEBUG ((LM_DEBUG, "Write idata[%d] = %s\n", i, rs)); // os << idata[i]; // i++; } crono.stop (); secs = crono.read_seconds (); } if (!check_seal(end)) { ACE_ERROR((LM_ERROR, ACE_TEXT( "Broken seal, aborting.\n" ))); ACE_OS::exit(1); } totalsecs += secs; if (n == niter - 1) { toread = os.begin ()->rd_ptr (); } } totalsecs = totalsecs / niter; ACE_DEBUG((LM_DEBUG, ACE_TEXT ("Writing to stream %d %s values: %f seconds.\n"), total, H::name (), totalsecs)); } { int i; for (i = 0; i < total; i++) { idata[i] = 0; } } ACE_DEBUG((LM_DEBUG, ACE_TEXT( "Reading them back in opposing byte order...\n" ))); const int opposite_byte_order = 1 - ACE_CDR_BYTE_ORDER; { double totalsecs = 0.0; int n; for (n = 0; n < niter; n++) { ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("====== Read iteration %d\n"), n)); size_t size = (total + dst_offset) * H::size (); ACE_InputCDR is (toread, size, opposite_byte_order); // This is intrusive... char* const end = is.rd_ptr () + size; do_seal (end); double secs = 0.0; if (use_array) { { int i; for (i = 0; i < dst_offset; i++) { T v; is >> v; } } if (n == 0) { ACE_DEBUG((LM_DEBUG, ACE_TEXT ("* src align = %d, dst align = %d\n"), tellalign (is.rd_ptr ()), tellalign (src))); } Crono crono; crono.start (); H::read_array (is, idata, total); crono.stop (); secs = crono.read_seconds (); // Testing for good bit value. Try reading atleast 10 // times the size of total. It should fail with good bit // set to 0. H::read_array (is, idata, 10 * total); if (is.good_bit () != 0) { ACE_ERROR ((LM_ERROR, ACE_TEXT ("Test for good bit failed in %s Array_test\n"), H::name ())); } } else { int i = 0; Crono crono; crono.start (); while (i < total) { #if 0 T v; is >> v; static char rs[32 + 1]; CDR_Test<T,H>::ttoh (v, rs); ACE_DEBUG ((LM_DEBUG, "Read idata[%d] = %s\n", i, rs)); idata[i] = v; i++; #else is >> idata[i++]; is >> idata[i++]; is >> idata[i++]; is >> idata[i++]; #endif /* 0 */ } crono.stop (); secs = crono.read_seconds (); } totalsecs += secs; if (!check_seal (end)) { ACE_ERROR((LM_ERROR, ACE_TEXT( "Broken seal, aborting.\n" ))); ACE_OS::exit(1); } } totalsecs = totalsecs / niter; ACE_DEBUG((LM_DEBUG, ACE_TEXT ("Reading from stream %d %s values") ACE_TEXT (" (byte swapping): %f seconds.\n"), total, H::name (), totalsecs)); }