Example #1
0
void SnpEstimation::estimate(GenotypeFileHandler &genotypeFileHandler, boost::ptr_vector<Snp> &snpList, boost::ptr_vector<Region> &regionList){
    // This contains the genotypes from the reference panel, the main container for this function
    boost::ptr_deque<Genotype> genotype;
    // This contains the index of the SNPs included in the genotype
    std::deque<size_t> snpLoc;
    // This should be invoked when we came to the end of chromosome / region
    bool windowEnd = false;
    // Initialize the linkage and decompose
    Linkage linkage(m_thread);
    Decomposition decompose(m_thread);
    fprintf(stderr, "Estimate SNP heritability\n\n");

    size_t doneItems = 0;
    size_t totalSnp = snpList.size();
    std::string chr = "";

    // This is use for indicating whether if the whole genome is read
    bool completed = false;
    std::vector<size_t> boundary;
    bool starting = true;
    size_t checking = 0; //DEBUG
    while(!completed){
        // Keep doing this until the whole genome is read
        progress(doneItems, totalSnp, chr);
        // only used when the finalizeBuff is true, this indicate whether if the last block is coming from somewhere new
        bool retainLastBlock=false;
        while(boundary.size() < 5 && !completed && !windowEnd){
            genotypeFileHandler.getBlock(snpList, genotype, snpLoc, windowEnd, completed,boundary, false);
            bool boundChange = false;
            linkage.construct(genotype, snpLoc, boundary, snpList, m_ldCorrection, boundChange);

            if(boundChange && boundary.back()==snpLoc.size()){
                    windowEnd=true;
                    boundary.pop_back();
            }
            else if(boundChange&&snpList.at(snpLoc[boundary.back()]).getLoc()- snpList.at(snpLoc[boundary.back()-1]).getLoc() > m_blockSize){
                    retainLastBlock = true;
                    windowEnd=true;
            }
            genotypeFileHandler.getBlock(snpList, genotype, snpLoc, windowEnd, completed,boundary, true);
            linkage.construct(genotype, snpLoc, boundary, snpList, m_ldCorrection, boundChange);
        }
        if(windowEnd && !retainLastBlock && boundary.size() > 2){ //Check whether if we need to merge the two blocks
            size_t indexOfLastSnpOfSecondLastBlock = snpLoc.at(boundary.back()-1); // just in case
            size_t lastSnp = snpLoc.back();
            if(snpList.at(lastSnp).getLoc()-snpList.at(indexOfLastSnpOfSecondLastBlock).getLoc() <= m_blockSize){
                boundary.pop_back();
                bool boundChange=false;
                linkage.construct(genotype, snpLoc, boundary, snpList, m_ldCorrection, boundChange);
            }
        }
        if(retainLastBlock && !windowEnd) throw std::runtime_error("Impossible combination of windowEnd and retain last block!");
        decompose.run(linkage, snpLoc, boundary, snpList, windowEnd, !retainLastBlock, starting, regionList);
        doneItems= snpLoc.at(boundary.back());
        chr = snpList[snpLoc[boundary.back()]].getChr();

        if(retainLastBlock){
            // Then we must remove everything except the last block
            // because finalizeBuff must be true here
            size_t update = boundary.back();
            genotype.erase(genotype.begin(), genotype.begin()+update);
            snpLoc.erase(snpLoc.begin(), snpLoc.begin()+update);
            boundary.clear();
            boundary.push_back(0);
            linkage.clear(update);
            starting = true;
            windowEnd = false;
            retainLastBlock = false;
        }
        else if(windowEnd){
            snpLoc.clear();
            boundary.clear();
            genotype.clear();
            linkage.clear();
            starting = true;
            windowEnd = false;
            retainLastBlock = false;
        }
        else{
            starting = false;
            size_t update=boundary[1];
            genotype.erase(genotype.begin(), genotype.begin()+update);
            snpLoc.erase(snpLoc.begin(), snpLoc.begin()+update);
            linkage.clear(update);
            for(size_t i = 0; i < boundary.size()-1; ++i) boundary[i] = boundary[i+1]-update;
            boundary.pop_back();
        }
    }
    progress(totalSnp, totalSnp, "");
    fprintf(stderr, "\n\nEstimated the SNP Heritability, now proceed to output\n");
}