示例#1
0
/* static */
void Module_DMAP::generic_worker_single_thr(Module_DMAP * search, int id) {
	Mask * sequences;
	int read_seq;
	vector<Mask> printable_solutions;
	Transmitting_Result received;

	if (search->my_rank == 0) {
		sequences = new Mask[SEQUENCES_FOR_BLOCK];
		read_seq = read_sequences(search->input_file1, SEQUENCES_FOR_BLOCK, sequences, search->fastqformat, search->gui_output);
	} else {
		received = search->receive_from_previous(id);
		sequences = received.first;
		read_seq = received.second;
	}

	while (read_seq != 0) {
		Items solutions;
		//ItemsGapped solutions_gapped;
		t_errors calculated_errors;

		for (int i = 0; i < read_seq; i++) {
			Mask & s = sequences[i];
			if (search->my_rank == 0 and search->trim) {
					//	check_routine(sequences[i], 0);
					s.quality_trimming_MOTT(search->min_phred_value_MOTT,search->min_mean_quality,search->min_size);
			}
			if (s.status_discarded()) {
				if (s.status_low_complexity())
					s.low_complexity  =true; //s.set_type(low_complexity);
				else
					s.low_quality = true; //s.set_type(quality_discarded);
				printable_solutions.push_back(s);
				continue;
			}
			if (search->auto_errors)
				calculated_errors = round((double)s.get_good_length() / search->errors_rate);
			else
				calculated_errors = search->common_errors_allowed;
			if (search->my_rank != 0 and s.algn > 0 and s.NM < calculated_errors)
				calculated_errors = s.NM;
			t_errors count = 0;
			for (t_pattern_length i = s.get_good_region_start()-1; (i < s.get_good_region_stop()) and (count <= calculated_errors); i++)
				if (s.sequence[i] == 'N' or s.sequence[i] == 'n')
					count++;
			if (count > calculated_errors) {
				//s.set_type(alignments_not_found);
				printable_solutions.push_back(s);
				continue;
			}

			/** ALIGNMENT **/
			solutions.clear();

			if (search->my_rank == 0 and search->contamination_check) {
				search->CR.search(s.get_good_sequence(),solutions,calculated_errors);
				if (solutions.size() > 0)
					s.contaminated = true;
			}

			if (not s.contaminated)
				search->H.search(s.get_good_sequence(),solutions,calculated_errors);
			if (solutions.size() == 0) {
				/** Try gapped **/
				/*
				solutions_gapped.clear();
				if (search->gap)
					search->H.search_gapped(s.get_good_sequence(),solutions_gapped,search->seed_sizes,search->seed_errors,calculated_errors,search->max_gap);
				*/
				/*
				if (solutions_gapped.size() == 0) {// size 0 means no alignment found
				*/
					//s.set_type(alignments_not_found);
					printable_solutions.push_back(s);
					continue;
				/*
				} else {
					if (not search->printAll) {
						Random_Choice_Result r;
						bool improved = (s.NM + s.NM_gap) > (solutions_gapped.at(0).errors1 + solutions_gapped.at(0).errors2);

						if (improved)
							r = Search_MPI::random_choice_from_previous(0,solutions_gapped.size());
						else
							r = Search_MPI::random_choice_from_previous(s.algn,solutions_gapped.size());
						if (not improved and r.first) {
							// take the previous solution
							s.algn += solutions_gapped.size();
						} else {
							// update solution
							const ResultItemGapped & HM = solutions_gapped.at(r.second);
							s.globalPosition = HM.GlobalPosition1;
							if (improved)
								s.algn = solutions_gapped.size();
							else
								s.algn += solutions_gapped.size();
							s.HI = 1;
							s.IH = 1;
							s.primary = true;
							s.strand = HM.strand;
							s.NM = HM.errors1;
							s.NM_gap = HM.errors2;
							s.contig = HM.contig;
							s.position = HM.GlobalPosition1 - search->H.globaltolocal.startPositions[HM.contig] + 1 ;
							s.position_gap = HM.GlobalPosition2 - search->H.globaltolocal.startPositions[HM.contig] + 1 ;
							s.length1_gap = HM.length1;
							s.length2_gap = HM.length2;
							s.contig = search->contig_conversion.convert(s.contig);

						}
						printable_solutions.push_back(s);


					} else { // printALL
				*/
						/*
						unsigned int processed=0;
						unsigned int alignments;
						(search->toBePrinted < solutions_gapped.size()) ? alignments = search->toBePrinted : alignments = solutions_gapped.size() ;
						while(processed < alignments) {
							const ResultItemGapped & HM = solutions_gapped.at(processed);
							s.globalPosition = HM.GlobalPosition1;
							s.algn = alignments;
							s.HI =1;
							s.IH =1;
							(processed == 0 ) ? s.primary = true : s.primary = false;
							s.strand = HM.strand;
							s.NM = HM.errors1;
							s.NM_gap = HM.errors2;
							s.contig = HM.contig ;
							s.position = HM.GlobalPosition1 - search->H.globaltolocal.startPositions[HM.contig] + 1 ;
							s.position_gap = HM.GlobalPosition2 - search->H.globaltolocal.startPositions[HM.contig] + 1 ;
							s.length1_gap = HM.length1;
							s.length2_gap = HM.length2;
							s.contaminated = contaminated;
							printable_solutions.push_back(s);
							processed++;
						}
						 */
/*
						ERROR_CHANNEL << "--print-all option not implemented yet!" << endl;
						exit(3);
					}
				}
				*/
			} else if (not search->printAll) {
				sort(solutions.begin(), solutions.end(), ResultItem::less()); // sort solutions
				solutions.erase(unique(solutions.begin(), solutions.end(), ResultItem::equal()), solutions.end());

				Random_Choice_Result r;
				bool improved = (s.NM + s.NM_gap) > (solutions.at(0).errors);

				if (improved)
					r = Module_DMAP::random_choice_from_previous(0,solutions.size());
				else {
					r = Module_DMAP::random_choice_from_previous(s.algn,solutions.size());
					s.algn += solutions.size();
				}
				if (not r.first) {
					const ResultItem & HM = solutions.at(r.second);
					s.HI = 1;
					s.IH = 1;
					s.primary = true;
					s.globalPosition = HM.globalPosition;
					s.strand = HM.strand;
					s.NM = HM.errors;
					s.NM_gap = 0;
					if ((search->my_rank == 0) and s.contaminated) {
						s.contig = search->CR.globalToLocal.searchContig(HM.globalPosition); // find the contig/scaffold
						s.position = HM.globalPosition - search->CR.globalToLocal.startPositions[s.contig] + 1;
						s.contig = search->contig_conversion.convert(s.contig);
					} else {
						s.contig = search->H.globalToLocal.searchContig(HM.globalPosition); // find the contig/scaffold
						s.position = HM.globalPosition - search->H.globalToLocal.startPositions[s.contig] + 1;
						s.contig = search->contig_conversion.convert(s.contig);
					}
				}
				printable_solutions.push_back(s);
				continue;
			} else { // printAll
				/*
				// memorize all printable solutions
				sort(solutions.begin(), solutions.end(), ResultItem::less()); // sort solutions
				solutions.erase(unique(solutions.begin(), solutions.end(), ResultItem::equal()), solutions.end());
				unsigned int processed=0;
				unsigned int alignments;
				(search->toBePrinted < solutions.size()) ? alignments = search->toBePrinted : alignments = solutions.size() ;
				while(processed < alignments) {
					// while I print enough solution or there are no more solutions
					const ResultItem & HM = solutions.at(processed);
					s.globalPosition = HM.GlobalPosition;
					s.algn = solutions.size();
					s.IH = alignments;
					s.HI = processed+1;
					(processed == 0 ) ? s.primary = true : s.primary = false;
					s.strand = HM.strand;
					s.NM = HM.errors;
					if (contaminated) {
						s.contig = search->CR.globaltolocal.searchContig(HM.GlobalPosition); // find the contig/scaffold
						s.position = HM.GlobalPosition - search->CR.globaltolocal.startPositions[s.contig] + 1;
					} else {
						s.contig = search->H.globaltolocal.searchContig(HM.GlobalPosition); // find the contig/scaffold
						s.position = HM.GlobalPosition - search->H.globaltolocal.startPositions[s.contig] + 1;
					}
					s.contaminated = contaminated;
					printable_solutions.push_back(s);
				}
				 */

				ERROR_CHANNEL << "--print-all option not implemented yet!" << endl;
				exit(3);
			}
		}

		if (search->my_rank == (search->nprocs-1)) {
			// now print all
			for(unsigned int i=0; i < printable_solutions.size(); i++)
				search->output_samfile.print_output(printable_solutions.at(i));
			search->processed += read_seq;
		} else // send data to next node
			search->send_to_next(printable_solutions,id);

		delete [] sequences;
		printable_solutions.clear();

		if (search->my_rank == 0) {
			sequences = new Mask[SEQUENCES_FOR_BLOCK];
			read_seq = read_sequences(search->input_file1, SEQUENCES_FOR_BLOCK, sequences, search->fastqformat, search->gui_output);
			if (read_seq == 0)
				delete [] sequences;
		} else {
			received = search->receive_from_previous(id);
			sequences = received.first;
			read_seq = received.second;
		}
	}

}
示例#2
0
DecodeResults *
SegmentDecoder :: decode(Instance *inst, Parameter *param) {

    Items *items = inst->items();
    int len = items->size();
    int numFeatures = m_Model->getAlphabet("FEATURES")->size();
    int numLabels   = m_Model->getAlphabet("LABELS")->size();

    double **uniScoreCache = new double *[len];
    for (int i = 0; i < len; ++ i) {
        uniScoreCache[i] = new double[numLabels];
        Item *item = items->at(i);
        for (int label = 0; label < numLabels; ++ label) {
            uniScoreCache[i][label] = 0.0;

            int sz = item->size(label);
            for (int j = 0; j < sz; ++ j) {
                uniScoreCache[i][label] += param->value(item->at(j, label));
            }
        }
    }

    double **biScoreCache = new double *[numLabels + 1];
    for (int prevLabel = 0; prevLabel <= numLabels; ++ prevLabel) {
        biScoreCache[prevLabel] = new double[numLabels];
        for (int currLabel = 0; currLabel < numLabels; ++ currLabel) {
            biScoreCache[prevLabel][currLabel] = param->value(
                    numFeatures * numLabels
                    + prevLabel * numLabels
                    + currLabel);

        }
    }

    KHeap<DecodeState> **states = new KHeap<DecodeState> *[len];
    for (int i = 0; i < len; ++ i) {
        states[i] = new KHeap<DecodeState>[numLabels];
        for (int j = 0; j < numLabels; ++ j) {
            states[i][j].setK(m_Agenda);
        }
    }

    for (int i = 0; i < len; ++ i) {
        // fprintf(stderr, "i=%d\n", i);
        for (int currLabel = 0; currLabel < numLabels; ++ currLabel) {
            if (i == 0) {
                // fprintf(stderr, "!%d->%d\n", numLabels, currLabel);
                if (m_Legal[numLabels][currLabel] == 0) {
                    continue;
                }

                double score = uniScoreCache[0][currLabel] + biScoreCache[numLabels][currLabel];
                states[i][currLabel].insert(DecodeState(currLabel, score, NULL));
            } else {
                for (int prevLabel = 0; prevLabel < numLabels; ++ prevLabel) {
                    if (m_Legal[prevLabel][currLabel] == 0)
                        continue;
                    for (int j = 0; j < states[i - 1][prevLabel].size(); ++ j) {
                        DecodeState *prev = states[i - 1][prevLabel].at(j);
                        double score = prev->score + 
                            uniScoreCache[i][currLabel] + biScoreCache[prevLabel][currLabel];
                        // printf("#%d->%d\n", prevLabel, currLabel);
                        states[i][currLabel].insert(DecodeState(currLabel, score, prev));
                    }
                }
            }
        }
    }

    vector<DecodeState> result_cache;

    for (int label = 0; label < numLabels; ++ label) {
        for (int i = 0; i < states[len - 1][label].size(); ++ i) {
            result_cache.push_back( *states[len - 1][label].at(i) );
        }
    }
    sort( result_cache.begin(), result_cache.end() );

    DecodeResults *ret = new CppDecodeResults();

    for (int i = 0; ((i < m_Agenda) && (result_cache.size() - i > 0)); ++ i) {
        Labels* single = new CppLabels(len);

        int pos = len - 1;
        DecodeState *now = &result_cache[result_cache.size() - i - 1];
        //DecodeState *now = result_cache.at(i);
        for (; pos >= 0; -- pos, now = now->prev) {
            single->set(now->label, pos);
        }
        ret->append( single );
    }

    for (int i = 0; i < len; ++ i)        { delete []uniScoreCache[i]; } delete []uniScoreCache;
    for (int i = 0; i <= numLabels; ++ i) { delete []biScoreCache[i];  } delete []biScoreCache;
    for (int i = 0; i < len; ++ i)        { delete []states[i];        } delete []states;

    return ret;
}