template<class T> inline static Matrix<T> smooth (const Matrix<T>& y, const size_t& span = 3, const INTERP::Method& method = INTERP::AKIMA) { typedef typename TypeTraits<T>::RT RT; Loess<T> l ((RT)span/(RT)size(y,0)); Matrix<T> ret = l.lowess(y); return ret; }
/** main loop */ void run() { set<string> all_chromosomes; /** data shuttle, will be used by the pileup function */ PileupShuttle shuttle; shuttle.owner=this; /* loop over the exon */ size_t exon_index=0; while( exon_index< this->exons.size()) { WHERE(exon_index << "/"<<this->exons.size() ); shuttle.exon = this->exons.at(exon_index); /* delete exon size<=25 */ if(shuttle.exon->size()<= this->minimum_exon_size) { this->exons.erase(exon_index); continue; } //get gc% for this segement shuttle.exon->gc_percent=gcPercentAt( shuttle.exon->segment.chrom.c_str(), shuttle.exon->segment.start, shuttle.exon->segment.end ); shuttle.exon->coverage.resize(this->bamFiles.size(),0); /** loop over each sample */ for(shuttle.bam_index=0; shuttle.bam_index< this->bamFiles.size(); ++shuttle.bam_index ) { shuttle.bamFile = this->bamFiles.at(shuttle.bam_index); int32_t tid=shuttle.bamFile->findTidByName(shuttle.exon->segment.chrom.c_str()); if(tid<0) THROW("chromosome name not indexed in tid."); bam_plbuf_t *buf= ::bam_plbuf_init(scan_depth_func, &shuttle); ::bam_fetch( shuttle.bamFile->bamPtr(), shuttle.bamFile->bamIndex(), tid, shuttle.exon->segment.start, shuttle.exon->segment.end, buf, BamFile2::fetch_func ); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); //normalize to exon length shuttle.exon->coverage[shuttle.bam_index]/=(double)( shuttle.exon->size() ); } /* min-depth */ if(shuttle.exon->min_depth()<=0) { this->exons.erase(exon_index); continue; } /* mediane_depthh */ if(shuttle.exon->median_depth()<10) { this->exons.erase(exon_index); continue; } all_chromosomes.insert(shuttle.exon->segment.chrom); ++exon_index; }/* end of loop over exons */ vector<Exon*> sort_on_gc; for(size_t i=0;i< this->exons.size();++i) { sort_on_gc.push_back(this->exons.at(i)); } /* sort on GC% */ std::sort(sort_on_gc.begin(),sort_on_gc.end(),cmp_on_gc); fstream os("jeter.tar",ios::out); Tar tarball(os); std::string prefix("redondepth"); std::string filename; FILE* gnuplotout= safe_tmpfile(); /* reset gnuplot */ fprintf(gnuplotout, "set term postscript\n" "set output \"redondepth.ps\"\n" ); /* loop over each individual */ for(size_t sampleidx1=0;sampleidx1< this->bamFiles.size();++sampleidx1) { char folder_name[50]; sprintf(folder_name,"SAMPLE%d",(int)(1+sampleidx1)); string prefix2(prefix); prefix2.append("/").append(folder_name); /* initialize */ for(size_t i=0;i< sort_on_gc.size();++i) { Exon* exon=sort_on_gc.at(i); exon->normalized_coverage.clear(); } /* loop over the other individuals */ for(size_t sampleidx2=0;sampleidx2< this->bamFiles.size();++sampleidx2) { /* skip if same individual */ if(sampleidx1==sampleidx2) continue; vector<double> x; vector<double> y; /** loop over each exon and build lowess X/Y */ for(size_t i=0;i< sort_on_gc.size();++i) { Exon* exon=sort_on_gc.at(i); x.push_back(exon->gc_percent); y.push_back(log2(exon->coverage[sampleidx1]/exon->coverage[sampleidx2])); } auto_ptr<vector<double> > y_prime=loessAlgo.lowess(&(x.front()),&(y.front()),x.size()); /* loop over all exons and push normalized ratio*/ for(size_t i=0;i< sort_on_gc.size();++i) { Exon* exon=sort_on_gc.at(i); double l=log2(exon->coverage[sampleidx1]/exon->coverage[sampleidx2]); exon->normalized_coverage.push_back(l-y_prime->at(i)); } } for(std::set<string>::iterator r=all_chromosomes.begin(); r!=all_chromosomes.end(); ++r) { FILE* outdat = safe_tmpfile(); for(size_t i=0;i< this->exons.size();++i) { Exon* exon=this->exons.at(i); if(exon->segment.chrom.compare(*r)!=0) continue; fprintf( outdat, "%d\t%f\n", exon->segment.start, exon->median_normalized_coverage() ); } string filename_dat(prefix2); filename_dat.append("/").append(*r).append(".dat"); tarball.putFile(outdat,filename_dat.c_str()); fclose(outdat); fprintf(gnuplotout,"set title \"SAMPLE-%d %s\"\n",(int)(1+sampleidx1),r->c_str()); fprintf(gnuplotout,"set xlabel \"Position\"\n"); fprintf(gnuplotout,"set ylabel \"Median depth\"\n"); fprintf(gnuplotout,"set yrange [-2:2]\n"); fprintf(gnuplotout,"plot \"%s/%s.dat\" using 1:2 notitle\n",folder_name,r->c_str()); } } filename.assign(prefix).append("/gnuplot.txt"); tarball.putFile(gnuplotout,filename.c_str()); tarball.finish(); fclose(gnuplotout); os.flush(); os.close(); }