Пример #1
0
// Attemps to find the next significant CpG site. Returns true if one was found
// and flase otherwise.
static bool
read_next_significant_cpg(istream &cpg_stream, GenomicRegion &cpg,
                          double cutoff, bool &skipped_any, bool &sig_raw,
                          size_t &test_cov, size_t &test_meth,
                          size_t &rest_cov, size_t &rest_meth) {
  GenomicRegion region;
  skipped_any = false;
  sig_raw = false;
  string cpg_encoding;

  while (getline(cpg_stream, cpg_encoding)) {
    string record, chrom, name, sign;
    size_t position;
    double raw_pval, adjusted_pval, corrected_pval;

    std::istringstream iss(cpg_encoding);
    iss.exceptions(std::ios::failbit);
    iss >> chrom >> position >> sign >> name >> raw_pval
        >> adjusted_pval >> corrected_pval
        >> test_cov >> test_meth >> rest_cov >> rest_meth;

    if (0 <= corrected_pval && corrected_pval < cutoff) {
      cpg.set_chrom(chrom);
      cpg.set_start(position);
      cpg.set_end(position + 1);
      sig_raw = (0 <= raw_pval && raw_pval < cutoff);
      return true;
    }
    skipped_any = true;
  }

  return false;
}
Пример #2
0
/**
 * \brief TODO
 */
bool
Exon::sameRegion(const GenomicRegion &r) const {
  return ((r.get_chrom() == this->region.get_chrom()) &&
          (r.get_start() == this->region.get_start()) &&
          (r.get_end() == this->region.get_end()) &&
          (r.get_strand() == this->region.get_strand()));
}
Пример #3
0
/**
 * \brief Does this exon partially overlap the given genomic region? This
 *        function doesn't consider an exact region match (i.e. same chrom,
 *        start, end, and strand) to be a 'partial overlap'.
 */
bool
Exon::partialOverlap(const GenomicRegion &r) const {
  if (r.get_chrom() != this->region.get_chrom()) return false;
  if (r.get_strand() != this->region.get_strand()) return false;
  // by now they must be one the same chromosome and strand... first see if
  // they match exactly
  if ((r.get_start() == this->region.get_start()) &&
      (r.get_end() == this->region.get_end())) return false;
  // not an exact match
  return (r.overlaps(this->region));
}
Пример #4
0
static void
convert_coordinates(const unordered_map<size_t, size_t> &cpgs,
                    GenomicRegion &region)  {
  const unordered_map<size_t, size_t>::const_iterator
    start_itr(cpgs.find(region.get_start()));
  const unordered_map<size_t, size_t>::const_iterator
    end_itr(cpgs.find(region.get_end()));
  if (start_itr == cpgs.end() || end_itr == cpgs.end())
    throw runtime_error("could not convert:\n" + region.tostring());
  region.set_start(start_itr->second);
  region.set_end(end_itr->second);
}
Пример #5
0
static void
get_chrom(const MappedRead &mr, 
	  const chrom_file_map &chrom_files, 
	  GenomicRegion &chrom_region, string &chrom) {
  
  const chrom_file_map::const_iterator fn(chrom_files.find(mr.r.get_chrom()));
  if (fn == chrom_files.end())
    throw SMITHLABException("could not find chrom: " + mr.r.get_chrom());
  
  chrom.clear();
  read_fasta_file(fn->second, mr.r.get_chrom(), chrom);
  if (chrom.empty()) 
    throw SMITHLABException("could not find chrom: " + mr.r.get_chrom());
  
  chrom_region.set_chrom(mr.r.get_chrom());
}
Пример #6
0
static void
get_chrom(const MappedRead &mr,
          const vector<string> &all_chroms,
          const unordered_map<string, size_t> &chrom_lookup,
          GenomicRegion &chrom_region, string &chrom) {

  const unordered_map<string, size_t>::const_iterator
    the_chrom(chrom_lookup.find(mr.r.get_chrom()));
  if (the_chrom == chrom_lookup.end())
    throw runtime_error("could not find chrom: " + mr.r.get_chrom());

  chrom = all_chroms[the_chrom->second];
  if (chrom.empty())
    throw runtime_error("could not find chrom: " + mr.r.get_chrom());

  chrom_region.set_chrom(mr.r.get_chrom());
}
Пример #7
0
static void
get_chrom(const bool VERBOSE, const GenomicRegion &r,
          const unordered_map<string, string>& chrom_files,
      GenomicRegion &chrom_region,  string &chrom) {
  const unordered_map<string, string>::const_iterator
                              fn(chrom_files.find(r.get_chrom()));
  if (fn == chrom_files.end())
    throw runtime_error("could not find chrom: " + r.get_chrom());
  chrom.clear();
  read_fasta_file(fn->second, r.get_chrom(), chrom);
  if (chrom.empty())
    throw runtime_error("could not find chrom: " + r.get_chrom());
  else {
    chrom_region.set_chrom(r.get_chrom());
  }
}
Пример #8
0
void
merge(istream &cpg_stream, ostream &dmr_stream, double cutoff) {

  bool skipped_last_cpg, sig_raw;
  GenomicRegion dmr;
  dmr.set_name("dmr");
  size_t dmr_test_cov = 0; size_t dmr_test_meth = 0;
  size_t dmr_rest_cov = 0; size_t dmr_rest_meth = 0;
  size_t test_cov = 0; size_t test_meth = 0;
  size_t rest_cov = 0; size_t rest_meth = 0;

  // Find the first significant CpG, or terminate the function if none exist.
  if (!read_next_significant_cpg(cpg_stream, dmr, cutoff, skipped_last_cpg,
                            sig_raw, test_cov, test_meth, rest_cov, rest_meth))
    return;

  dmr.set_score(sig_raw);
  dmr_test_cov += test_cov;
  dmr_test_meth += test_meth;
  dmr_rest_cov += rest_cov;
  dmr_rest_meth += rest_meth;

  GenomicRegion cpg;
  cpg.set_name("dmr");

  while(read_next_significant_cpg(cpg_stream, cpg, cutoff, skipped_last_cpg,
                          sig_raw, test_cov, test_meth, rest_cov, rest_meth)) {

    if (skipped_last_cpg || cpg.get_chrom() != dmr.get_chrom()) {
      if (dmr.get_score() != 0)
        dmr_stream << dmr.get_chrom() << '\t'
                   << dmr.get_start() << '\t'
                   << dmr.get_end()   << '\t'
                   << dmr.get_name()  << '\t'
                   << dmr.get_score() << '\t'
                   << double(dmr_test_meth)/dmr_test_cov -
                      double(dmr_rest_meth)/dmr_rest_cov << std::endl;
      dmr = cpg;
      dmr.set_score(sig_raw);
      dmr_test_cov = test_cov;
      dmr_test_meth = test_meth;
      dmr_rest_cov = rest_cov;
      dmr_rest_meth = rest_meth;
    } else {
      dmr.set_end(cpg.get_end());
      dmr.set_score(dmr.get_score() + sig_raw);
      dmr_test_cov += test_cov;
      dmr_test_meth += test_meth;
      dmr_rest_cov += rest_cov;
      dmr_rest_meth += rest_meth;
    }
  }

  dmr_stream << dmr << std::endl;
}
Пример #9
0
void GffSplitLocator::process(std::string& sLine, void* pData) {

    if (m_iLines++ == 0)
        return;

    m_pLineElements->clear();

    this->split(sLine, '\t', m_pLineElements);
    // chrom = 0, strand = 1, start = 2, end = 3

    uint32_t iStart = std::stoi( m_pLineElements->at(1) );
    uint32_t iEnd = std::stoi( m_pLineElements->at(2) );

    GffEntry* pChrom = m_pGffLoader->getChromosome( &(m_pLineElements->at(0)) );


    GenomicRegion* pSplit = new GenomicRegion(iStart, iEnd);

    std::vector<GffEntry*>* pGenes = pChrom->findChildrenAt(pSplit, m_pSearchLevel, false);
    sSplitResults* pResult = NULL;
    std::stringstream oOutLine;

    if (pGenes->size() == 1)
    {

        /**
         * A single gene explains this split
         */

        pResult = this->queryGeneSame(pGenes->at(0), pSplit);

    }

    if (pGenes->size() > 1)
    {

        for (uint32_t i = 0; i < pGenes->size(); ++i)
        {

            GffEntry* pGene = pGenes->at(i);
            sSplitResults* pGeneResult = this->queryGeneSame(pGene, pSplit);

            if (pResult == NULL) {
                pResult = pGeneResult;
            } else {

                if ((pResult->bSameGene == false) && (pGeneResult->bSameGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }

                if ((pResult->bExonsOfGene == false) && (pGeneResult->bExonsOfGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }

                if ((pResult->bSameTranscriptInOneGene == false) && (pGeneResult->bSameTranscriptInOneGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }

            }


        }


    }

    if (pResult != NULL)
    {

        oOutLine << sLine.substr(0, sLine.length()-1) << pResult->toString('\t') << std::endl;
        std::string sFullLine = oOutLine.str();
        m_pFileWriter->writeDirect( sFullLine );

        return;

    }

    pGenes = pChrom->findChildrenAt(pSplit, m_pSearchLevel, true);
    std::vector<GffEntry*>* pRegions = NULL;
    GffEntry* pRegion = NULL;


    if (pGenes->size() == 1)
    {

        /**
         * A single gene explains this split
         */
        GffEntry* pGene = pGenes->at(0);

        if (pGene->contains( pSplit->getStart() ))
            pRegions = pChrom->findChildrenAt(NULL, pSplit->getEnd());
        else
            pRegions = pChrom->findChildrenAt(NULL, pSplit->getStart());

        if (pRegions->size() > 0)
            pRegion = pRegions->at(0);

        pResult = this->queryGeneDifferent(pGene, pRegion, pSplit);

    }

    if (pGenes->size() > 1)
    {

        for (uint32_t i = 0; i < pGenes->size(); ++i)
        {

            GffEntry* pGene = pGenes->at(i);

            if (pGene->contains( pSplit->getStart() ))
                pRegions = pChrom->findChildrenAt(NULL, pSplit->getEnd());
            else
                pRegions = pChrom->findChildrenAt(NULL, pSplit->getStart());

            if (pRegions->size() > 0)
                pRegion = pRegions->at(0);

            sSplitResults* pGeneResult = this->queryGeneDifferent(pGene, pRegion, pSplit);

            if (pResult == NULL) {
                pResult = pGeneResult;
            } else {

                if ((pResult->bSameGene == false) && (pGeneResult->bSameGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }

                if ((pResult->bExonsOfGene == false) && (pGeneResult->bExonsOfGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }

                if ((pResult->bSameTranscriptInOneGene == false) && (pGeneResult->bSameTranscriptInOneGene == true))
                {
                    pResult = pGeneResult;
                    continue;
                }
            }


        }


    }

    if (pResult != NULL)
    {

        oOutLine << sLine.substr(0, sLine.length()-1) << pResult->toString('\t') << std::endl;
        std::string sFullLine = oOutLine.str();
        m_pFileWriter->writeDirect( sFullLine );

        return;

    }

    m_pLineElements->clear();
}
Пример #10
0
static bool
succeeds(const string &chrom, const size_t position, 
	 const GenomicRegion &r) {
  return r.get_chrom() < chrom ||
    (chrom == r.get_chrom() && r.get_end() <= position);
}
Пример #11
0
static bool
precedes(const string &chrom, const size_t position, 
	 const GenomicRegion &r) {
  return chrom < r.get_chrom() ||
    (chrom == r.get_chrom() && position < r.get_start());
}
Пример #12
0
static inline bool
end_less(const GenomicRegion &a, const GenomicRegion &b) {
  return a.get_end() < b.get_end();
}
Пример #13
0
static inline bool
same_end(const GenomicRegion &a, const GenomicRegion &b) {
  return a.get_end() == b.get_end();
}
Пример #14
0
static inline bool
start_leq(const GenomicRegion &a, const GenomicRegion &b) {
  return a.get_start() <= b.get_start();
}
Пример #15
0
static inline bool
strand_less(const GenomicRegion &a, const GenomicRegion &b) {
  return a.get_strand() <= b.get_strand();
}