Exemple #1
0
int runFishhook(int argc, char** argv) {

  parseFishOptions(argc, argv);

  if (opt::verbose) {
    std::cerr << "FishHook Params: " << std::endl 
	      << "\tWidth: " << SeqLib::AddCommas(opt::width) << std::endl
	      << "\tEvents: " << opt::events << std::endl
	      << "\tCoverage Mask: " << opt::coverage << std::endl
	      << "\tSlop: " << SeqLib::AddCommas(opt::slop) << std::endl
	      << "\tInterval Tracks: " << std::endl;
    for (auto& i : opt::interval_files)
      std::cerr << "\t-- " << i << std::endl;
    std::cerr << "\tScored Tracks: " << std::endl;
    for (auto& i : opt::scored_files)
      std::cerr << "\t-- " << i << std::endl;
    std::cerr << "\tSequence Features: " << std::endl;
    for (auto& i : opt::seq_features)
      std::cerr << "\t-- " << i << std::endl;

  }

  // read in the covariate tracks
  SeqHashMap<std::string, Fractions> intervals;
  
  // read a header for info
  SeqLib::BamReader rdr;
  if (!rdr.Open(opt::bam)) {
    std::cerr << "Error: Could not read BAM supplied by -b: " << opt::bam << std::endl;
    exit(EXIT_FAILURE);
  }
  hdr = rdr.Header();
  
  // read in the reference genome
  SeqLib::RefGenome ref;
  if (!ref.LoadIndex(opt::refgenome)) {
      if (opt::seq_features.size()) {
	std::cerr << "Error: Could not read referene genome supplied by -G: " << opt::refgenome << std::endl;
	exit(EXIT_FAILURE);
      }
  }

  // read in the events
  if (opt::verbose) std::cerr << "...reading events " << opt::events << std::endl;
  EventList events;
  if (!events.readFromBed(opt::events, hdr)) {
    std::cerr << "Error: Could not read events BED: " << opt::events << std::endl;
    exit(EXIT_FAILURE);
  }
  if (opt::verbose) std::cerr << "...read in " << SeqLib::AddCommas(events.size()) << " events" << std::endl;
  events.CreateTreeMap();
  
  // create the tiled regions
  FishHookTiles fish(opt::width, opt::slop, hdr.GetHeaderSequenceVector());
  if (opt::verbose)
    std::cerr << "...constructed " << SeqLib::AddCommas(fish.size()) << " fishhook intervals" << std::endl;
  fish.CreateTreeMap();

  // read the coverage mask
  SeqLib::GRC cov;
  if (!opt::coverage.empty()) {
    if (opt::verbose) std::cerr << "...reading coverage mask " << opt::coverage << std::endl;
    cov.ReadBED(opt::coverage, hdr);
    if (opt::verbose) std::cerr << "...read in " << SeqLib::AddCommas(cov.size()) << " covered regions " << std::endl;
    if (!cov.size()) {
      std::cerr << "Non-empty coverage track read with 0 regions. Check that is non-empty BED" << std::endl;
      exit(EXIT_FAILURE);
    }
    if (opt::verbose) std::cerr << "...creating interval tree map on covered regions and overlapping with tiles" << std::endl;
    cov.CreateTreeMap();

    // find covered amount per tile
    std::vector<int32_t> q, s;
    SeqLib::GRC ovlp;
    // fish is subject
    if (fish.size() > cov.size()) // do in most efficient order
      ovlp = cov.FindOverlaps(fish, q, s, false);
    else
      ovlp = fish.FindOverlaps(cov, s, q, false);
    if (opt::verbose) std::cerr << "..." << SeqLib::AddCommas(ovlp.size()) << " regions are covered" << std::endl;

    // set the amount covered by each
    for (size_t i = 0; i < ovlp.size(); ++i) {
      fish[s[i]].covered += (double)ovlp[i].Width() / fish[s[i]].Width();
    }

    // mask the events
    q.clear(); s.clear(); ovlp.clear();
    // events is subject
    if (events.size() > cov.size()) // do in most efficient order
      ovlp = cov.FindOverlaps(events, q, s, false);
    else
      ovlp = events.FindOverlaps(cov, s, q, false);

    EventList newe;
    // set the amount covered by each
    for (size_t i = 0; i < ovlp.size(); ++i) {
      newe.add(Event(ovlp[i], events.at(s[i]).id));
    }
    events = newe;
    events.CreateTreeMap();

    if (opt::verbose) std::cerr << "...kept " << SeqLib::AddCommas(events.size()) << " events after mask" << std::endl;
    
  } else {
    for (auto& i : fish)
      i.covered = 1; // the entire thing is covered if no mask provided
  }

  // read in the interval tracks
  for (auto& i : opt::interval_files)
    read_track(i, intervals, cov, false);
  for (auto& i : opt::scored_files)
    read_track(i, intervals, cov, true);

  // count events per tile (also de-dupes on patient per bin)
  fish.CountEvents(events);
  // overlap the covariates with the tiles
  for (auto& i : intervals) {
    fish.AddIntervalCovariate(i.first, i.second);
  }

  // make the matrix
  FishModel fm;
  fm.AddTiles(fish);
  
  fm.SetNumThreads(opt::num_threads);
  fm.EstimateOLS();
  fm.CooksDistance(fm.GetOLS());

  // write the covariates
  fish.PrintBEDHeader(std::cout);
  fish.PrintBED(std::cout, hdr);

  

  return 0;
}