Example #1
0
template<class T> inline static Matrix<T>
smooth (const Matrix<T>& y, const size_t& span = 3, const INTERP::Method& method = INTERP::AKIMA) {
	typedef typename TypeTraits<T>::RT RT;
    Loess<T> l ((RT)span/(RT)size(y,0));
    Matrix<T> ret = l.lowess(y);
	return ret;
}
Example #2
0
	/** main loop */
	void run()
	    {
	    set<string> all_chromosomes;
	    /** data shuttle, will be used by the pileup function */
	    PileupShuttle shuttle;
	    shuttle.owner=this;

	    /* loop over the exon */
	    size_t exon_index=0;
	    while( exon_index< this->exons.size())
		{
		WHERE(exon_index << "/"<<this->exons.size() );
		shuttle.exon = this->exons.at(exon_index);

		/* delete exon size<=25 */
		if(shuttle.exon->size()<= this->minimum_exon_size)
		    {
		    this->exons.erase(exon_index);
		    continue;
		    }


		//get gc% for this segement
		shuttle.exon->gc_percent=gcPercentAt(
			shuttle.exon->segment.chrom.c_str(),
			shuttle.exon->segment.start,
			shuttle.exon->segment.end
			);




		shuttle.exon->coverage.resize(this->bamFiles.size(),0);

		 /** loop over each sample */
		for(shuttle.bam_index=0;
			shuttle.bam_index< this->bamFiles.size();
			++shuttle.bam_index
			)
		    {
		    shuttle.bamFile = this->bamFiles.at(shuttle.bam_index);
		    int32_t tid=shuttle.bamFile->findTidByName(shuttle.exon->segment.chrom.c_str());
		    if(tid<0) THROW("chromosome name not indexed in tid.");
		    bam_plbuf_t *buf= ::bam_plbuf_init(scan_depth_func, &shuttle);
		    ::bam_fetch(
			    shuttle.bamFile->bamPtr(),
			    shuttle.bamFile->bamIndex(),
			    tid,
			    shuttle.exon->segment.start,
			    shuttle.exon->segment.end,
			    buf,
			    BamFile2::fetch_func
			    );
		    bam_plbuf_push(0, buf);
		    bam_plbuf_destroy(buf);

		    //normalize to exon length
		    shuttle.exon->coverage[shuttle.bam_index]/=(double)( shuttle.exon->size()  );
		    }

		/* min-depth */
		if(shuttle.exon->min_depth()<=0)
		    {
		    this->exons.erase(exon_index);
		    continue;
		    }

		/* mediane_depthh */
		if(shuttle.exon->median_depth()<10)
		    {
		    this->exons.erase(exon_index);
		    continue;
		    }

		all_chromosomes.insert(shuttle.exon->segment.chrom);
		++exon_index;
		}/* end of loop over exons */

	   vector<Exon*> sort_on_gc;
	   for(size_t i=0;i< this->exons.size();++i)
	       {
	       sort_on_gc.push_back(this->exons.at(i));
	       }
	   /* sort on GC% */
	   std::sort(sort_on_gc.begin(),sort_on_gc.end(),cmp_on_gc);


	   fstream os("jeter.tar",ios::out);
	   Tar tarball(os);
	   std::string prefix("redondepth");
	   std::string filename;
	   FILE* gnuplotout= safe_tmpfile();


	       /* reset gnuplot */
	       fprintf(gnuplotout,
		       "set term postscript\n"
		       "set output \"redondepth.ps\"\n"
		       );


	   /* loop over each individual */
	   for(size_t sampleidx1=0;sampleidx1< this->bamFiles.size();++sampleidx1)
	       {
	       char folder_name[50];
	       sprintf(folder_name,"SAMPLE%d",(int)(1+sampleidx1));
	       string prefix2(prefix);
	       prefix2.append("/").append(folder_name);



	       /* initialize */
	       for(size_t i=0;i< sort_on_gc.size();++i)
		   {
		   Exon* exon=sort_on_gc.at(i);
		   exon->normalized_coverage.clear();
		   }

	       /* loop over the other individuals */
	       for(size_t sampleidx2=0;sampleidx2< this->bamFiles.size();++sampleidx2)
	       	       {
		       /* skip if same individual */
		       if(sampleidx1==sampleidx2) continue;
		       vector<double> x;
		       vector<double> y;
		       /** loop over each exon and build lowess X/Y */
		       for(size_t i=0;i< sort_on_gc.size();++i)
			   {
			   Exon* exon=sort_on_gc.at(i);
			   x.push_back(exon->gc_percent);
			   y.push_back(log2(exon->coverage[sampleidx1]/exon->coverage[sampleidx2]));
			   }
		       auto_ptr<vector<double> > y_prime=loessAlgo.lowess(&(x.front()),&(y.front()),x.size());

		       /* loop over all exons and push normalized ratio*/
		       for(size_t i=0;i< sort_on_gc.size();++i)
			   {
			   Exon* exon=sort_on_gc.at(i);
			   double l=log2(exon->coverage[sampleidx1]/exon->coverage[sampleidx2]);
			   exon->normalized_coverage.push_back(l-y_prime->at(i));
			   }
	       	       }


	       for(std::set<string>::iterator r=all_chromosomes.begin();
		       r!=all_chromosomes.end();
		       ++r)
		   {
		   FILE* outdat = safe_tmpfile();

		   for(size_t i=0;i< this->exons.size();++i)
		       {
		       Exon* exon=this->exons.at(i);
		       if(exon->segment.chrom.compare(*r)!=0) continue;

		       fprintf(
			       outdat,
			       "%d\t%f\n",
			       exon->segment.start,
			       exon->median_normalized_coverage()
			   );
		       }

		   string filename_dat(prefix2);
		   filename_dat.append("/").append(*r).append(".dat");
		   tarball.putFile(outdat,filename_dat.c_str());
		   fclose(outdat);
		   fprintf(gnuplotout,"set title \"SAMPLE-%d %s\"\n",(int)(1+sampleidx1),r->c_str());
		   fprintf(gnuplotout,"set xlabel \"Position\"\n");
		   fprintf(gnuplotout,"set ylabel \"Median depth\"\n");
		   fprintf(gnuplotout,"set yrange [-2:2]\n");
		   fprintf(gnuplotout,"plot \"%s/%s.dat\" using 1:2 notitle\n",folder_name,r->c_str());
		   }
	       }
	    filename.assign(prefix).append("/gnuplot.txt");
	    tarball.putFile(gnuplotout,filename.c_str());
	    tarball.finish();
	    fclose(gnuplotout);
	    os.flush();
	    os.close();
	    }