void operator() (Sequence& seq){
        int used_windows_size=windows_size;
        if (windows_size==0 || windows_size>seq.getDataSize()){                                           // if windows size == 0 then we use the full read length as windows
            used_windows_size=seq.getDataSize();
        }
        if(not keep_low_complexity and not is_high_complexity(seq,kmer_size)){return;}
		bool exists;
		unsigned char count;
		itKmer->setData (seq.getData());
		vector<int> values;                                                                             // For each position: number of occurrences of the kmer starting at this position.
//        vector<int> covered_positions; // DEPRECATED [OLD WAY FOR COMPUTING SHARE KMER POSITIONS, FASTER BUT NON IMPLEMENTED WITH WINDOWS SIZE METHODS (max_populated_window)]
        vector<bool> position_shared =  vector<bool>(seq.getDataSize());                                // boolean vector. A position is true if it's covered by at least a shared kmer.
        for (int pos=0;pos<seq.getDataSize();pos++) position_shared[pos]=false;
        
        int position=0;
		for (itKmer->first(); !itKmer->isDone(); itKmer->next()){
			quasiDico->get_value((*itKmer)->value().getVal(),exists,count);
			if(!exists) {
                count=0;
            }
            values.push_back(count);
//            if (count>0) covered_positions.push_back(position); // DEPRECATED [OLD WAY FOR COMPUTING SHARE KMER POSITIONS, FASTER BUT NON IMPLEMENTED WITH WINDOWS SIZE METHODS (max_populated_window)]
            if (count>0) { // TODO: OPTIMIZABLE.
                for (int pos=position;pos<position+kmer_size && pos<=seq.getDataSize();pos++) position_shared[pos]=true;
            }
            position++;
		}
        
//        float percentage_shared_positions = 100*number_positions_covered_shared_kmer(covered_positions, seq.getDataSize())/float(seq.getDataSize());  // DEPRECATED [OLD WAY FOR COMPUTING SHARE KMER POSITIONS, FASTER BUT NON IMPLEMENTED WITH WINDOWS SIZE METHODS (max_populated_window)]
        const int mpw = max_populated_window(position_shared,used_windows_size);
        const float percentage_span_kmer = 100*mpw/float(used_windows_size);
        
//        if (percentage_shared_positions !=percentage_span_kmer){cout<<percentage_shared_positions<< " == " <<percentage_span_kmer<<" ?"<<endl; exit(1);} // TO REMOVE
        
        
		float mean;
		int median, min, max;
		if(mean_median_min_max(values, mean, median, min, max)){
           
			string toPrint (to_string(seq.getIndex())+" "+to_string(mean)+" "+to_string(median)+" "+to_string(min)+" "+to_string(max)+" "+to_string(percentage_span_kmer));
            
//            toPrint.append(" ");
//            for(int i=0;i<seq.getDataSize() ;i++){
//                
//                if (position_shared[i]) {
//                    toPrint.append("1");
//                }
//                else toPrint.append("0");
//            }
            
            toPrint.append("\n");

            if (percentage_span_kmer>=threshold) {
                synchro->lock();
                //                bv->set(seq.getIndex());
                fwrite(toPrint.c_str(), sizeof(char), toPrint.size(), outFile);
                synchro->unlock ();

            }
			synchro->unlock ();
		}

		else{
			string toPrint (to_string(seq.getIndex())+" none\n");
			synchro->lock();
			fwrite(toPrint.c_str(), sizeof(char), toPrint.size(), outFile);
			synchro->unlock ();
		}
        
	}
Exemple #2
0
 bool operator ()  (Sequence& seq) const  {  return indexes.find (seq.getIndex()) != indexes.end();  }