Exemple #1
0
// Align the haplotype to the reference genome represented by the BWT/SSA pair
void HapgenUtil::alignHaplotypeToReferenceBWASW(const std::string& haplotype,
        const BWTIndexSet& referenceIndex,
        HapgenAlignmentVector& outAlignments)
{
    PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceBWASW")
    LRAlignment::LRParams params;

    params.zBest = 20;

    for(size_t i = 0; i <= 1; ++i)
    {
        LRAlignment::LRHitVector hits;
        std::string query = (i == 0) ? haplotype : reverseComplement(haplotype);
        LRAlignment::bwaswAlignment(query, referenceIndex.pBWT, referenceIndex.pSSA, params, hits);

        // Convert the hits into alignments
        for(size_t j = 0; j < hits.size(); ++j)
        {
            int q_alignment_length = hits[j].q_end - hits[j].q_start;

            // Skip non-complete alignments
            if((int)haplotype.length() == q_alignment_length)
            {
                HapgenAlignment aln(hits[j].targetID, hits[j].t_start, hits[j].length, hits[j].G, i == 1);
                outAlignments.push_back(aln);
            }
        }
    }
}
extern std::string consensus(const Contig* contig, const std::vector<Read*>& reads) {

    int size = contig->getParts().size();
    if (size == 0) {
      return "";
    }

    auto& first = contig->getParts().front();
    POA::Graph graph(first.type() ? reads[first.src]->reverse_complement() : reads[first.src]->sequence(), "seq0");

    for (int i = 1; i < size; ++i) {
      const auto& curr = contig->getParts()[i];
      const auto& curr_seq = curr.type() ? reads[curr.src]->reverse_complement() : reads[curr.src]->sequence();
      const int offset = std::max((int) (curr.offset - THRESHOLD * curr_seq.length()), (int) (BAND_PERCENTAGE * curr_seq.length()));

      Timer t;
      t.start();
      POA::Alignment aln(const_cast<string&>(curr_seq), graph);
      aln.align_banded_starting_at(offset, BAND_PERCENTAGE * curr_seq.length());
      t.stop();
      t.print("consensus", "poa");

      graph.insertSequenceAlignment(aln, curr_seq, "seq" + std::to_string(i));
    }

    string consensus;
    graph.generate_consensus(&consensus);
    return consensus;
}
bool TaskManagerSchedulerCount::canRemoveFromTaskManager()
{
	static const PROGMEM prog_char functionName[] = "canRemoveFromTaskManager";

	if( currentExecutionCount > executionCount )
	{
		//all( p << "TaskManagerSchedulerCount:canRemoveFromTaskManager():removing" << endl; )
		//all( wpln( "removing" ) )
		aln( "removing" )

		currentExecutionCount = 0;
		return true;
	}
	else
		return false;
}
Exemple #4
0
    string consensus(const vector<string>& sequences) {
        if (sequences.empty()) {
            return "";
        }

        Graph graph(sequences[0], "seq0");

        for (size_t i = 1; i < sequences.size(); ++i) {
            Alignment aln(const_cast<string&>(sequences[i]), graph);
            aln.align();
            graph.insertSequenceAlignment(aln, sequences[i], "seq" + to_string(i));
        }

        string consensus;
        graph.generate_consensus(&consensus);
        return consensus;
    }
void RS232ToWiFiTask::run()
{
	static const PROGMEM prog_char functionName[] = "run";
	//static const char* prefix = "RS232ToWiFiTask:run():";

	/*
	while( stream->available() > 0 )
	{
		int c = stream->read();
		if( c == -1 ) break;
		info( p << prefix << "received : " << (char)c << endl; )
		stream->write( c );
	}*/

	//if( stream->available() > 0 )
	//{
		//info( p << prefix << "processing RS232<->WiFi, available bytes = " << stream->available() << endl; )

	//static const unsigned int readBytesCountLimit = 65535; // todo to ma byc jako parametr przekazany do konstruktora
	//static const unsigned int timeoutInSeconds = 10; // todo to ma byc jako parametr przekazany do konstruktora

	//if( streamReadBytesCounting.getReadBytesCount() < readBytesCountLimit )
	//{
		//if( restServer.processStream( streamHFA11xRS232WiFiPtr ) )
		if( restServer.processStream( stream ) )
		{
			//info( p << prefix << "all operations finished on RS232<->WiFi" << endl; )
			aln( "all operations finished on RS232<->WiFi" )

			//stream->write( -1 );
			//stream->write( -1 );
			//streamReadBytesCounting.setStreamAndResetInternalState( streamReadBytesCounting.mainStream );
		}
		else
		{
			//info( p << prefix << "RS232<->WiFi connection will be also processed in the future" << endl; )
			aln( "RS232<->WiFi connection will be also processed in the future" )
		}
	//}
	//else
	//{
	//	warning( p << prefix << "maximum read bytes count reached!" << endl; )
	//	streamReadBytesCounting.setStreamAndResetInternalState( streamReadBytesCounting.mainStream );
	//}
	//}
}
void SendResponseSubTask::setWork( bool _isGETHttpMethod , bool _sendResponseInJSON, bool _sendResponseAsServiceMetaDescription , ServiceDescription* _selectdServices , unsigned char _selectedServicesCount )
{
	static const PROGMEM prog_char functionName[] = "setWork";
	//static const char* prefix = "SendResponseSubTask:setWork():";

	isGETHttpMethod = _isGETHttpMethod;
	sendResponseInJSON = _sendResponseInJSON;
	sendResponseAsServiceMetaDescription = _sendResponseAsServiceMetaDescription;
	selectdServices = _selectdServices;
	selectedServicesCount = _selectedServicesCount;

	if( sendResponseAsServiceMetaDescription ) sendResponseInJSON = true;

	//all( p << prefix << "isGETHttpMethod = " << isGETHttpMethod << ", sendResponseInJSON = " << sendResponseInJSON << ", sendResponseAsServiceMetaDescription = " << sendResponseAsServiceMetaDescription << ", selectedServicesCount = " << selectedServicesCount << endl; )
	//all( wpln( "isGETHttpMethod = %u, sendResponseInJSON = %u, sendResponseAsServiceMetaDescription = %u, selectedServicesCount = %u" , isGETHttpMethod , sendResponseInJSON , sendResponseAsServiceMetaDescription , selectedServicesCount ) )
	aln( "isGETHttpMethod = %u, sendResponseInJSON = %u, sendResponseAsServiceMetaDescription = %u, selectedServicesCount = %u" , isGETHttpMethod , sendResponseInJSON , sendResponseAsServiceMetaDescription , selectedServicesCount )

	currentPageRenderer = sendResponseInJSON ? jsonPageRenderer : htmlPageRenderer;

	//if( sendResponseInJSON )
	//	currentPageRenderer = jsonPageRenderer;
	//else
	//	currentPageRenderer = htmlPageRenderer;
}
Exemple #7
0
void runNGSAnalysis(Params &params) {

    time_t begin_time;
    time(&begin_time);

    char model_name[20];

    if (!params.ngs_file) {
        computePairCount(params, NULL, 0.0);
        return;
    }

    // read input file, initialize NGSAlignment
    NGSAlignment aln(params.ngs_file);
    cout.setf(ios::fixed,ios::floatfield);

    //params.freq_type = FREQ_ESTIMATE;

    // initialize NGSTree
    NGSTree tree(params, &aln);
    aln.tree = &tree;
    ModelsBlock *models_block = new ModelsBlock;

    // initialize Model
    string original_model = params.model_name;
    if (params.model_name == "") {
        sprintf(model_name, "GTR+F%d", aln.ncategory);
        params.freq_type = FREQ_ESTIMATE;
    }
    else
        sprintf(model_name, "%s+F%d", params.model_name.c_str(), aln.ncategory);
    params.model_name = model_name;
    tree.setModelFactory(new ModelFactory(params, &tree, models_block));
    tree.setModel(tree.getModelFactory()->model);
    tree.setRate(tree.getModelFactory()->site_rate);

    delete models_block;

    int model_df = tree.getModel()->getNDim() + tree.getRate()->getNDim();
    cout << endl;
    cout << "Model of evolution: " << tree.getModelName() << " (" << model_df << " free parameters)" << endl;
    cout << endl;

    // optimize model parameters and rate scaling factors
    cout << "Optimizing model parameters" << endl;
    double bestTreeScore = tree.getModelFactory()->optimizeParameters(false, true);
    cout << "Log-likelihood: " << bestTreeScore << endl;


    DoubleMatrix part_rate(aln.ncategory);
    StrVector rate_name;


    int i, j;

    rate_name.push_back("Hete_error");

    if (tree.getModel()->isReversible()) {
        for (i = 0; i < aln.num_states-1; i++)
            for (j = i+1; j < aln.num_states; j++) {
                stringstream x;
                x << aln.convertStateBackStr(i) << "<->" << aln.convertStateBackStr(j);
                rate_name.push_back(x.str());
            }
        for (i = 0; i < aln.num_states; i++) {
            stringstream x;
            x << aln.convertStateBackStr(i);
            rate_name.push_back(x.str());
        }
    } else {
        for (i = 0; i < aln.num_states; i++)
            for (j = 0; j < aln.num_states; j++) if (j != i) {
                    stringstream x;
                    x << aln.convertStateBackStr(i) << "->" << aln.convertStateBackStr(j);
                    rate_name.push_back(x.str());
                }
    }


    VerboseMode vb_saved = verbose_mode;
    verbose_mode = VB_QUIET;

    cout << endl << "--> INFERING RATE ASSUMING POSITION-SPECIFIC MODEL..." << endl << endl;
    for (int pos = 0; pos < aln.ncategory; pos++) {
        cout << "Position " << pos+1 << " / ";
        double *pair_pos = aln.pair_freq + (pos*aln.num_states*aln.num_states);
        testSingleRateModel(params, aln, tree, original_model, pair_pos, part_rate[pos], rate_name, false, NULL);
    }


    verbose_mode = vb_saved;

    double *sum_freq = new double[aln.num_states*aln.num_states];
    cout << endl << "-->INFERING RATE UNDER EQUAL-RATE NULL MODEL..." << endl << endl;
    aln.computeSumPairFreq(sum_freq);
    DoubleVector null_rate;
    string out_file = params.out_prefix;
    out_file += ".ngs_e";
    for (i = 0; i < aln.num_states*aln.num_states; i++)
        cout << sum_freq[i] << " ";
    cout << endl;
    testSingleRateModel(params, aln, tree, original_model, sum_freq, null_rate, rate_name, true, out_file.c_str());

    DoubleVector two_rate;

    cout << endl << "-->INFERING RATE UNDER TWO-RATE MODEL..." << endl << endl;
    testTwoRateModel(params, aln, tree, original_model, sum_freq, two_rate, rate_name, true, NULL);


    // report running results
    out_file = params.out_prefix;
    out_file += ".ngs";
    reportNGSAnalysis(out_file.c_str(), params, aln, tree, part_rate, rate_name);

    if (params.ngs_mapped_reads) {
        computePairCount(params, &tree, null_rate[0]);
    }


    time_t end_time;
    time(&end_time);

    cout << "Total run time: " << difftime(end_time, begin_time) << " seconds" << endl << endl;
	delete [] sum_freq;
}
Exemple #8
0
// Align the haplotype to the reference genome represented by the BWT/SSA pair
void HapgenUtil::alignHaplotypeToReferenceKmer(size_t k,
                                               const std::string& haplotype,
                                               const BWTIndexSet& referenceIndex,
                                               const ReadTable* pReferenceTable,
                                               HapgenAlignmentVector& outAlignments)
{
    PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceKmer")
    int64_t max_interval_size = 4;

    if(haplotype.size() < k)
        return;

    std::vector<int> event_count_vector;
    std::vector<HapgenAlignment> tmp_alignments;
    int min_events = std::numeric_limits<int>::max();

    // Align forward and reverse haplotype to reference
    for(size_t i = 0; i <= 1; ++i)
    {
        bool is_reverse = i == 1;
        std::string query = is_reverse ? reverseComplement(haplotype) : haplotype;

        // Find shared kmers between the haplotype and the reference
        CandidateVector candidates;

        size_t nqk = query.size() - k + 1;
        for(size_t j = 0; j < nqk; ++j)
        {
            std::string kmer = query.substr(j, k);

            // Find the interval of this kmer in the reference
            BWTInterval interval = BWTAlgorithms::findInterval(referenceIndex, kmer);
            if(!interval.isValid() || interval.size() >= max_interval_size)
                continue; // not found or too repetitive

            // Extract the reference location of these hits
            for(int64_t k = interval.lower; k  <= interval.upper; ++k)
            {
                SAElem elem = referenceIndex.pSSA->calcSA(k, referenceIndex.pBWT);

                // Make a candidate alignment
                CandidateKmerAlignment candidate;
                candidate.query_index = j;
                candidate.target_index = elem.getPos();
                candidate.target_extrapolated_start = candidate.target_index - candidate.query_index;
                candidate.target_extrapolated_end = candidate.target_extrapolated_start + query.size();
                candidate.target_sequence_id = elem.getID();
                candidates.push_back(candidate);
            }
        }

        // Remove duplicate candidates
        std::sort(candidates.begin(), candidates.end(), CandidateKmerAlignment::sortByStart);
        CandidateVector::iterator new_end = std::unique(candidates.begin(), candidates.end(), CandidateKmerAlignment::equalByStart);
        candidates.resize(new_end - candidates.begin());
        
        for(size_t j = 0; j < candidates.size(); ++j)
        {
            // Extract window around reference
            size_t window_size = 200;
            int ref_start = candidates[j].target_extrapolated_start - window_size;
            int ref_end = candidates[j].target_extrapolated_end + window_size;
            const SeqItem& ref_record = pReferenceTable->getRead(candidates[j].target_sequence_id);
            const DNAString& ref_sequence = ref_record.seq;
            if(ref_start < 0)
                ref_start = 0;

            if(ref_end > (int)ref_sequence.length())
                ref_end = ref_sequence.length();

            std::string ref_substring = ref_sequence.substr(ref_start, ref_end - ref_start);

            // Align haplotype to the reference
            SequenceOverlap overlap = alignHaplotypeToReference(ref_substring, query);
            if(overlap.score < 0 || !overlap.isValid())
                continue;

            int alignment_start = ref_start + overlap.match[0].start;
            int alignment_end = ref_start + overlap.match[0].end; // inclusive
            int alignment_length = alignment_end - alignment_start + 1;

            // Crude count of the number of distinct variation events
            bool has_indel = false;
            int num_events = overlap.edit_distance;
            std::stringstream c_parser(overlap.cigar);
            int len;
            char t;
            while(c_parser >> len >> t) 
            {
                assert(len > 0);

                // Only count one event per insertion/deletion
                if(t == 'D' || t == 'I')
                {
                    num_events -= (len - 1);
                    has_indel = true;
                }
            }

            // Skip poor alignments
            double mismatch_rate = 1.0f - (overlap.getPercentIdentity() / 100.f);
            if(mismatch_rate > 0.05f || overlap.total_columns < 50)
            {
                if(Verbosity::Instance().getPrintLevel() > 4)
                {
                    printf("Haplotype Alignment - Ignoring low quality alignment (%.3lf, %dbp, %d events) to %s:%d\n", 
                        1.0f - mismatch_rate, overlap.total_columns, num_events, ref_record.id.c_str(), ref_start);
                }
                continue;
            }

            bool is_snp = !has_indel && overlap.edit_distance == 1;

            HapgenAlignment aln(candidates[j].target_sequence_id, 
                                alignment_start, 
                                alignment_length, 
                                overlap.score, 
                                num_events,
                                is_reverse, 
                                is_snp);

            tmp_alignments.push_back(aln);
            event_count_vector.push_back(num_events);
            if(Verbosity::Instance().getPrintLevel() > 4)
            {
                printf("Haplotype Alignment - Accepting alignment (%.3lf, %dbp, %d events) to %s:%d\n", 
                    1.0f - mismatch_rate, overlap.total_columns, num_events, ref_record.id.c_str(), ref_start);
            }            
            // Record the best edit distance
            if(num_events < min_events) 
                min_events = num_events;
        }
    }

    // Copy the best alignments into the output
    int MAX_DIFF_TO_BEST = 10;
    int MAX_EVENTS = 8;
    assert(event_count_vector.size() == tmp_alignments.size());
    for(size_t i = 0; i < event_count_vector.size(); ++i)
    {

        if(event_count_vector[i] <= MAX_EVENTS && event_count_vector[i] - min_events <= MAX_DIFF_TO_BEST)
            outAlignments.push_back(tmp_alignments[i]);
        else if(Verbosity::Instance().getPrintLevel() > 3)
            printf("Haplotype Alignment - Ignoring alignment with too many events (%d)\n", event_count_vector[i]);

    }
}
Exemple #9
0
// Align the haplotype to the reference genome represented by the BWT/SSA pair
void HapgenUtil::alignHaplotypeToReferenceKmer(size_t k,
        const std::string& haplotype,
        const BWTIndexSet& referenceIndex,
        const ReadTable* pReferenceTable,
        HapgenAlignmentVector& outAlignments)
{
    PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceKmer")
    int64_t max_interval_size = 4;

    if(haplotype.size() < k)
        return;

    std::vector<int> event_count_vector;
    std::vector<HapgenAlignment> tmp_alignments;
    int min_events = std::numeric_limits<int>::max();

    // Align forward and reverse haplotype to reference
    for(size_t i = 0; i <= 1; ++i)
    {
        bool is_reverse = i == 1;
        std::string query = is_reverse ? reverseComplement(haplotype) : haplotype;

        // Find shared kmers between the haplotype and the reference
        CandidateVector candidates;

        size_t nqk = query.size() - k + 1;
        for(size_t j = 0; j < nqk; ++j)
        {
            std::string kmer = query.substr(j, k);

            // Find the interval of this kmer in the reference
            BWTInterval interval = BWTAlgorithms::findInterval(referenceIndex, kmer);
            if(!interval.isValid() || interval.size() >= max_interval_size)
                continue; // not found or too repetitive

            // Extract the reference location of these hits
            for(int64_t k = interval.lower; k  <= interval.upper; ++k)
            {
                SAElem elem = referenceIndex.pSSA->calcSA(k, referenceIndex.pBWT);

                // Make a candidate alignment
                CandidateKmerAlignment candidate;
                candidate.query_index = j;
                candidate.target_index = elem.getPos();
                candidate.target_extrapolated_start = candidate.target_index - candidate.query_index;
                candidate.target_extrapolated_end = candidate.target_extrapolated_start + query.size();
                candidate.target_sequence_id = elem.getID();
                candidates.push_back(candidate);
            }
        }

        // Remove duplicate candidates
        std::sort(candidates.begin(), candidates.end(), CandidateKmerAlignment::sortByStart);
        CandidateVector::iterator new_end = std::unique(candidates.begin(), candidates.end(), CandidateKmerAlignment::equalByStart);
        candidates.resize(new_end - candidates.begin());

        for(size_t j = 0; j < candidates.size(); ++j)
        {
            // Extract window around reference
            size_t window_size = 200;
            int ref_start = candidates[j].target_extrapolated_start - window_size;
            int ref_end = candidates[j].target_extrapolated_end + window_size;

            const DNAString& ref_sequence = pReferenceTable->getRead(candidates[j].target_sequence_id).seq;
            if(ref_start < 0)
                ref_start = 0;

            if(ref_end > (int)ref_sequence.length())
                ref_end = ref_sequence.length();

            std::string ref_substring = ref_sequence.substr(ref_start, ref_end - ref_start);

            // Align haplotype to the reference
            SequenceOverlap overlap = Overlapper::computeOverlap(query, ref_substring);

            // Skip terrible alignments
            double percent_aligned = (double)overlap.getOverlapLength() / query.size();
            if(percent_aligned < 0.95f)
                continue;
            /*
            // Skip alignments that are not full-length matches of the haplotype
            if(overlap.match[0].start != 0 || overlap.match[0].end != (int)haplotype.size() - 1)
                continue;
            */
            int alignment_start = ref_start + overlap.match[1].start;
            int alignment_end = ref_start + overlap.match[1].end; // inclusive
            int alignment_length = alignment_end - alignment_start + 1;

            // Crude count of the number of distinct variation events
            int num_events = overlap.edit_distance;
            std::stringstream c_parser(overlap.cigar);
            int len;
            char t;
            while(c_parser >> len >> t)
            {
                assert(len > 0);

                // Only count one event per insertion/deletion
                if(t == 'D' || t == 'I')
                    num_events -= (len - 1);
            }


            HapgenAlignment aln(candidates[j].target_sequence_id, alignment_start, alignment_length, overlap.score, is_reverse);
            tmp_alignments.push_back(aln);
            event_count_vector.push_back(num_events);

            // Record the best edit distance
            if(num_events < min_events)
                min_events = num_events;
        }
    }

    // Copy the best alignments into the output
    int MAX_DIFF_TO_BEST = 10;
    int MAX_EVENTS = 8;
    assert(event_count_vector.size() == tmp_alignments.size());
    for(size_t i = 0; i < event_count_vector.size(); ++i)
    {
        if(event_count_vector[i] <= MAX_EVENTS && event_count_vector[i] - min_events <= MAX_DIFF_TO_BEST)
            outAlignments.push_back(tmp_alignments[i]);
    }
}