Exemple #1
0
double chisq_poisson(unsigned int *observed,double lambda,int kmax,unsigned int nsamp)
{

 unsigned int k;
 double *expected;
 double delchisq,chisq,pvalue;

 /*
  * Allocate a vector for the expected value of the bin frequencies up
  * to kmax-1.
  */
 expected = (double *)malloc(kmax*sizeof(double));
 for(k = 0;k<kmax;k++){
   expected[k] = nsamp*gsl_ran_poisson_pdf(k,lambda);
 }

 /*
  * Compute Pearson's chisq for this vector of the data with poisson
  * expected values.
  */
 chisq = 0.0;
 for(k = 0;k < kmax;k++){
   delchisq = ((double) observed[k] - expected[k])*
      ((double) observed[k] - expected[k])/expected[k];
   chisq += delchisq;
   if(verbose == D_CHISQ || verbose == D_ALL){
     printf("%u:  observed = %f,  expected = %f, delchisq = %f, chisq = %f\n",
        k,(double)observed[k],expected[k],delchisq,chisq);
   }
 }

 if(verbose == D_CHISQ || verbose == D_ALL){
   printf("Evaluated chisq = %f for %u k values\n",chisq,kmax);
 }

 /*
  * Now evaluate the corresponding pvalue.  The only real question
  * is what is the correct number of degrees of freedom.  We have
  * kmax bins, so it should be kmax-1.
  */
 pvalue = gsl_sf_gamma_inc_Q((double)(kmax-1)/2.0,chisq/2.0);
 if(verbose == D_CHISQ || verbose == D_ALL){
   printf("pvalue = %f in chisq_poisson.\n",pvalue);
 }

 free(expected);

 return(pvalue);

}
Exemple #2
0
int
compute_pmf(int nrules, params_t *params)
{
	int i;
	if ((log_lambda_pmf = malloc(nrules * sizeof(double))) == NULL)
		return (errno);
	for (i = 0; i < nrules; i++) {
		log_lambda_pmf[i] =
		    log(gsl_ran_poisson_pdf(i, params->lambda));
		if (debug > 100)
			printf("log_lambda_pmf[ %d ] = %6f\n",
			    i, log_lambda_pmf[i]);
	}

	if ((log_eta_pmf =
	    malloc((1 + MAX_RULE_CARDINALITY) * sizeof(double))) == NULL)
		return (errno);
	for (i = 0; i <= MAX_RULE_CARDINALITY; i++) {
		log_eta_pmf[i] =
		    log(gsl_ran_poisson_pdf(i, params->eta));
		if (debug > 100)
			printf("log_eta_pmf[ %d ] = %6f\n",
			    i, log_eta_pmf[i]);
	}

	/*
	 * For simplicity, assume that all the cardinalities
	 * <= MAX_RULE_CARDINALITY appear in the mined rules
	 */
	eta_norm = gsl_cdf_poisson_P(MAX_RULE_CARDINALITY, params->eta)
	    - gsl_ran_poisson_pdf(0, params->eta);

	if (debug > 10)
		printf("eta_norm(Beta_Z) = %6f\n", eta_norm);

	return (0);
}
Exemple #3
0
double compute_genlike(int i, int j, double ti, double tj, double nu1, double nu2, double alpha, double tau, struct dna_dist *dnainfo, struct param *par){
	double out, T = ti>tj ? ti-tj : tj-ti;
	int temp;

	/* SORT I/J TO OPTIMIZE CACHE USAGE */
	if(i>j){
		temp=i;
		i=j;
		j=temp;
	}

	/* GET PSEUDO-LIKELIHOOD */
	if(dnainfo->nbcommon->rows[i]->values[j] < 1) { /* if no genetic data */
		out = par->weightNaGen;
	} else { /* if genetic info availavable */
		out = alpha * ( gsl_ran_poisson_pdf((unsigned int) dnainfo->transi->rows[i]->values[j], nu1*T*dnainfo->nbcommon->rows[i]->values[j]) 
				+ gsl_ran_poisson_pdf((unsigned int) dnainfo->transv->rows[i]->values[j], nu2*T*dnainfo->nbcommon->rows[i]->values[j]) ) 
			+ (1.0-alpha) * ( gsl_ran_poisson_pdf((unsigned int) dnainfo->transi->rows[i]->values[j], nu1*(T+2.0*tau)*dnainfo->nbcommon->rows[i]->values[j]) 
				       + gsl_ran_poisson_pdf((unsigned int) dnainfo->transv->rows[i]->values[j], nu2*(T+2.0*tau)*dnainfo->nbcommon->rows[i]->values[j]) ) ;
	}

	/* RETURN */
	return out;
} /* end compute_genlike */
 inline double calc_probability_of_sequence(
         const CharacterStateVectorType::const_iterator& long_read_begin,
         const CharacterStateVectorType::const_iterator& long_read_end,
         double mean_number_of_errors_per_site) const {
     CharacterStateVectorType::const_iterator start_pos = long_read_begin;
     CharacterStateVectorType::const_iterator stop_pos = long_read_end - this->size_ + 1;
     assert(stop_pos >= start_pos);
     unsigned long num_mismatches = 0;
     double prob = 0.0;
     while (start_pos < stop_pos) {
         num_mismatches = std::inner_product(
                 this->begin_, this->end_, start_pos,
                 0, std::plus<unsigned int>(),
                 std::not2(std::equal_to<CharacterStateVectorType::value_type>()));
         // prob += gsl_ran_binomial_pdf(num_mismatches, mean_number_of_errors_per_site, this->size_);
         prob += gsl_ran_poisson_pdf(num_mismatches, 1.0/0.0107);
         ++start_pos;
     }
     // return std::log(prob);
     return prob;
 }
Exemple #5
0
int main(){
	int i;
	const int M = 5e7;
	time_t t1, t2, t;
	double mu= 1e-7;
	int L=3e6, T, N;

	/* INIT GSL RNG*/
	t = time(NULL); // time in seconds, used to change the seed of the random generator
	gsl_rng * rng;
 	const gsl_rng_type *typ;
	gsl_rng_env_setup();
	typ=gsl_rng_default;
	rng=gsl_rng_alloc(typ);
	gsl_rng_set(rng,t); // changes the seed of the random generator


	printf("\n - performing %d computations of Bernoulli prob mass fct - ", M);
	time(&t1);
	for(i=0;i<M;i++){
		T=gsl_rng_uniform_int(rng,100); /* time */
		N=gsl_rng_uniform_int(rng, 20); /* nb of mutations*/
		gsl_ran_binomial_pdf((unsigned int) N, mu, T*L);
	}
	time(&t2);
	printf("\nTime ellapsed: %d ", (int) (t2-t1));


	printf("\n - performing %d computations of Poisson prob mass fct- ", M);
	time(&t1);
	for(i=0;i<M;i++){
		T=gsl_rng_uniform_int(rng,100); /* time */
		N=gsl_rng_uniform_int(rng, 20); /* nb of mutations*/
		gsl_ran_poisson_pdf((unsigned int) N, mu*T*L);
	}
	time(&t2);
	printf("\nTime ellapsed: %d \n", (int) (t2-t1));

}
void AT_get_DSB_distribution(const long     n_bins_f,
		const double         f_d_Gy[],
		const double         f_dd_Gy[],
		const double         f[],
		const double         enhancement_factor[],
		const double         DSB_per_Gy_per_domain,
		const long           domains_per_nucleus,
		const long			 max_number_of_DSBs,
		double				 p_DSB[],
		double*				 total_pDSBs,
		double*              total_nDSBs,
		double*				 number_of_iDSBs,
		double*              number_of_cDSBs,
		double*              avg_number_of_DSBs_in_cDSBs){


	  // Compute average number of DSBs per domain for each dose bin
	  double* avg_DSB = (double*)calloc(n_bins_f, sizeof(double));

	  for(long i = 0; i < n_bins_f; i++){
		  avg_DSB[i] = f_d_Gy[i] * enhancement_factor[i] * DSB_per_Gy_per_domain;
	  }

	  // Reset variables
	  *total_pDSBs = 0.0;
	  *total_nDSBs = 0.0;
	  *number_of_iDSBs = 0.0;
	  *number_of_cDSBs = 0.0;
	  *avg_number_of_DSBs_in_cDSBs = 0.0;

	  // For all possible number of DSBs (incl. 0)
	  for(long i = 0; i < max_number_of_DSBs; i++){
		  p_DSB[i] = 0.0;								// Set DSB prob to zero
		  // For all dose bins
		  for(long j = 0; j < n_bins_f; j++){
			 if(isnan(enhancement_factor[j])){			// Set DSB prob to NAN if no enhancement factor is defined
				p_DSB[i] = NAN;
				break;
			 }
			 // Add p of i DSBs in case of dose j (i.e. average no of DSBs j)
			 // weighted by the probability (f * dd)
			 if(avg_DSB[j] > 0.0){
				 p_DSB[i] += gsl_ran_poisson_pdf(i, avg_DSB[j]) * f[j] * f_dd_Gy[j];
			 }else{
				 if(i == 0){
					 p_DSB[i] += f[j] * f_dd_Gy[j]; // gsl_ran_poisson_pdf cannot compute P(i, 0.0) = 0 (i > 0) or 1 (i = 0)
				 }
			 }
		  }
		  if(!isnan(p_DSB[i])){
			  *total_pDSBs += p_DSB[i];
			  *total_nDSBs += i*p_DSB[i];
			  if(i == 1){
				  *number_of_iDSBs += p_DSB[i];
			  }
			  if(i > 1){
				  *number_of_cDSBs += p_DSB[i];
				  *avg_number_of_DSBs_in_cDSBs += p_DSB[i] * i;
			  }
		  }

	  }

	  *total_nDSBs     *= domains_per_nucleus;
	  *number_of_iDSBs *= domains_per_nucleus;
	  *number_of_cDSBs *= domains_per_nucleus;
	  *avg_number_of_DSBs_in_cDSBs *= domains_per_nucleus / *number_of_cDSBs;
}
Exemple #7
0
double pdf_Poisson(double m, long x)               { return gsl_ran_poisson_pdf(x,m); }
double
test_poisson_large_pdf (unsigned int n)
{
  return gsl_ran_poisson_pdf (n, 30.0);
}
double
test_poisson_pdf (unsigned int n)
{
  return gsl_ran_poisson_pdf (n, 5.0);
}
Exemple #10
0
int main(int argc, char* argv[]) 
{
	// parameters that you can set. 
	string infF    = "";
	string infR    = "";
	string infPath = "";
	string outI    = "oIter.txt";
	string outP    = "oPar.txt";
	string outCorr = "oCorr.txt";
	string outCnt  = "oCnt.txt";
	string outPath = "";
	string outStub = "";
	//bool useemp    = false;
	double truncLimit = 0.99;
	int verbose    = 3;
	int lambdaD    = 147;
	int minD       = 130;
	int maxD       = 180;
	int sampleSize = -1;
	int burnins    = 1000;
	int iterations = 10000;
	int seed       = -1;
	// Unknowns
	double lambdaF[2];
	double lambdaR[2];
	double pF[2];
	double pR[2];

	/*
	 *  Read and check input parameters
	 */

	string errorLine =  "usage " + 
		string(argv[0]) + 
		" [parameters] \n" +
		"\t-iF      <forward reads binary file> \n" +
		"\t-iR      <reverse reads binary file> \n" +
		"\t-iPath   <path to forward and reverse reads binary files \n" +
		"            (overrides iR and iF if set)> \n" +
		"\t-oPath   <outdirectory, where all output files are put. \n" + 
		"\t          NOT created - needs to exists. Defaults to where \n"+
		"\t          the program is executed from.>\n" +
		"\t-oStub   <outfile names stub, \n" +
		"\t          defaults -oIter to <-oStub_>oIter.txt, \n" + 
		"\t          defaults -oPar to <-oStub_>oPar.txt> \n" + 
		"\t          defaults -oCorr to <-oStub_>oCorr.txt> \n" +
		"\t          defaults -oCnt to <-oStub_>oCnt.txt> \n" + 
		"\t-oIter   <outfile, where to write MCMC parameter\n" +
		"\t          estimates for each iteration,default generated from -oStub>\n" +
		"\t-oPar    <parameter-file, where to write MCMC\n" +
		"\t          final parameter estimates, default generated from -oStub> \n" +
         /*
		   "\t-oCorr   <out-file, lists correlation coefficients between forward\n" +
		   "\t          and reverse reads in [mind,maxd] \n" +
		   "\t          default generated from -oStub> \n" +
		 */
		"\t-oCnt    <out-file, lists forward and reverse read count frequencies\n" +
		"\t          default generated from -oStub> \n" +
		"\t-trunc   <truncation limit in (0,1], default 0.99.> \n" +
		"\t-size    <sample size, default all observations.> \n" +
		"\t-burnin  <number of MCMC burnin iterations,\n" +
		"\t          default 1000.> \n" +
		"\t-iter    <number of MCMC iterations\n" +
		"\t          (non-burnin iterations), default 10000.> \n" +
		"\t-seed    <set seed to random number generator.> \n" +
		"\t-ld      <distance-lambda, default 147 bp.> \n" +
		"\t-mind    <min distance, default 130 bp.> \n" +
		"\t-maxd    <max distance, default 180 bp.> \n" +
		/*
		  "\t-useemp  <toggle use of data-driven distance distribution, or poisson\n" +
		  "\t          around distance-lambda. default off.>\n" +
		*/
		"\t-v       <verbose level 0-3. default 2>";
	
	bool fail = false;
	string failmessage = "";
	
	for (int i = 1; i < argc; i++)
	{
	    if(strcmp(argv[i],"-iF") == 0)
			infF.assign(argv[++i]);
	    else if(strcmp(argv[i],"-iR") == 0)
			infR.assign(argv[++i]);
		else if(strcmp(argv[i],"-iPath") == 0)
			infPath.assign(argv[++i]);
	    else if(strcmp(argv[i],"-oPath") == 0)
			outPath.assign(argv[++i]);
	    else if(strcmp(argv[i],"-oStub") == 0)
			outStub.assign(argv[++i]);
	    else if(strcmp(argv[i],"-oIter") == 0)
			outI.assign(argv[++i]);
	    else if(strcmp(argv[i],"-oPar") == 0)
			outP.assign(argv[++i]);
	    /*
		  else if(strcmp(argv[i],"-oCorr") == 0)
			outCorr.assign(argv[++i]);
		*/
	    else if(strcmp(argv[i],"-oCnt") == 0)
			outCnt.assign(argv[++i]);
	    /*
		  else if (strcmp(argv[i],"-useemp") == 0)
			useemp = true;
		*/
	    else if (strcmp(argv[i],"-v") == 0)
			verbose = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-seed") == 0)
			seed = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-trunc") == 0)
			truncLimit = atof(argv[++i]);
	    else if (strcmp(argv[i],"-size") == 0)
			sampleSize = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-burnin") == 0)
			burnins = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-iter") == 0)
			iterations = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-ld") == 0)
			lambdaD = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-mind") == 0)
			minD = atoi(argv[++i]);
	    else if (strcmp(argv[i],"-maxd") == 0)
			maxD = atoi(argv[++i]);
	    else
		{
			failmessage.assign("Unknown argument: ");
			failmessage.append(argv[i]);
			failmessage.append("\n");
			fail = true;
		}
	}
	
	if (truncLimit <= 0 || truncLimit > 1)
	{
	    failmessage.append("-trunc value does not make sense.\n");
	    fail = true;
	}

	bool infPathSpec = false;
	if (strcmp(infPath.c_str(), "") != 0)
	{
		infPathSpec = true;
	    DIR *d = opendir(infPath.c_str());
	    if(d)
		{
			closedir(d);
		}
	    else
		{
			failmessage.append("-iPath does not exist.\n");
			fail = true;
		}
	}
	
	if (strcmp(infF.c_str(), "") == 0)
	{
		if (!infPathSpec)
		{
			failmessage.append("-iF or -iPath must be specified.\n");
			fail = true;
		}
	}
	
	if (strcmp(infR.c_str(), "") == 0)
	{
		if (!infPathSpec)
		{
			failmessage.append("-iR or -iPath must be specified.\n");
			fail = true;
		}
	}
		
	if (strcmp(outI.c_str(), "") == 0)
	{
	    failmessage.append("invalid -oIter.\n");
	    fail = true;
	}
		
	if (strcmp(outP.c_str(), "") == 0)
	{
	    failmessage.append("invalid -oPar.\n");
	    fail = true;
	}

	if (strcmp(outCorr.c_str(), "") == 0)
	{
	    failmessage.append("invalid -oCorr.\n");
	    fail = true;
	}

	if (strcmp(outCnt.c_str(), "") == 0)
	{
	    failmessage.append("invalid -oCnt.\n");
	    fail = true;
	}
	
	if (strcmp(outPath.c_str(), "") != 0)
	{
	    DIR* d = opendir(outPath.c_str());
	    if(d)
		{
			closedir(d);
		}
	    else
		{
			failmessage.append("-oPath does not exist.\n");
			fail = true;
		}
	}

	int infFCnt = 1;
	if (infPathSpec)
	{
		infFCnt = countFiles(infPath);
		if (infFCnt < 1)
		{
			failmessage.append("ERROR: infile path \"");
			failmessage.append(infPath.c_str());
			failmessage.append("\" does not contain a positive number of F and R binary files, aborting.\n");
			fail = true;
		}
	}
	
	if (fail)
	{
		cerr << endl << failmessage.c_str() << endl << errorLine << endl;
		return(-1);
	}
	
	if(strcmp(outStub.c_str(),"") != 0)
	{
	    outI = outStub + "_" + outI;
	    outP = outStub + "_" + outP;
	    outCorr = outStub + "_" + outCorr;
		outCnt = outStub + "_" + outCnt;
	}
	
	if(strcmp(outPath.c_str(),"") != 0)
	{
	    outI = outPath + outI;
	    outP = outPath + outP;
	    outCorr = outPath + outCorr;
		outCnt = outPath + outCnt;
	}

	if (seed < -1)
		seed = -1;

	ifstream iff[infFCnt];
	ifstream ifr[infFCnt];
	string fileNames[infFCnt];
	
	if (infPathSpec)
	{
		if (openFiles(infPath, iff, ifr, fileNames) != infFCnt)
		{
			failmessage.append("ERROR: all files in \"");
			failmessage.append(infPath.c_str());
			failmessage.append("\" could not be opened, aborting.\n");
			fail = true;
		}
	}
	else
	{
		iff[0].open(infF.c_str(),ios::binary);
		ifr[0].open(infR.c_str(),ios::binary);
		if (iff[0].fail())
		{
			failmessage.append("ERROR: Forward reads binary file \"");
			failmessage.append(infF.c_str());
			failmessage.append("\" could not be opened, aborting.\n");
			fail = true;
		}
		if (ifr[0].fail())
		{
			failmessage.append("ERROR: Reverse reads binary file \"");
			failmessage.append(infR.c_str());
			failmessage.append("\" could not be opened, aborting.\n");
			fail = true;
		}
	}
	
/*
	iff.open(infF.c_str(),ios::binary);
	if (iff.fail())
	{
		failmessage.append("ERROR: Forward reads binary file \"");
		failmessage.append(infF.c_str());
		failmessage.append("\" could not be opened, aborting.\n");
		fail = true;
	}
	ifstream ifr;
	ifr.open(infR.c_str(),ios::binary);
	if (ifr.fail())
	{
		failmessage.append("ERROR: Reverse reads binary file \"");
		failmessage.append(infR.c_str());
		failmessage.append("\" could not be opened, aborting.\n");
		fail = true;
	}
*/
	
	ofstream ofi;
	ofi.open(outI.c_str(),ios::trunc);
	if (ofi.fail())
	{
		failmessage.append("ERROR: Output file \"");
		failmessage.append(outI.c_str());
		failmessage.append("\" could not be created, aborting.\n");
		fail = true;
	}

	ofstream ofc;
	ofc.open(outCorr.c_str(),ios::trunc);
	if (ofc.fail())
	{
		failmessage.append("ERROR: Output file \"");
		failmessage.append(outCorr.c_str());
		failmessage.append("\" could not be created, aborting.\n");
		fail = true;
	}

	ofstream ofcnt;
	ofcnt.open(outCnt.c_str(),ios::trunc);
	if (ofcnt.fail())
	{
		failmessage.append("ERROR: Output file \"");
		failmessage.append(outCnt.c_str());
		failmessage.append("\" could not be created, aborting.\n");
		fail = true;
	}
	ofstream ofp;
	int truncValF,truncValR;
	int estMinD = minD, estMaxD = maxD, estLambdaD = lambdaD;
	double* distDens;
	vector<string> distDensV;

	ofp.open(outP.c_str(),ios::trunc);
	if (ofp.fail())
	{
		failmessage.append("ERROR: Paramater file \"");
		failmessage.append(outP.c_str());
		failmessage.append("\" could not be created, aborting.\n");
		fail = true;
	}

	if (fail)
	{
		cerr << endl << failmessage.c_str() << endl << errorLine << endl;
		return(-1);
	}
	
	ofi << "! Command:";
	for (int i = 0; i < argc; i++)
		ofi << " " << argv[i];
	ofi << endl;
	
	vlevel = verbose;
	
	/*
	 * Check file lengths
	 */

	int minPos[infFCnt], maxPos[infFCnt], minF[infFCnt], maxF[infFCnt], minR[infFCnt], maxR[infFCnt];
	checkFileLengths(iff, ifr, minPos, maxPos, minR, minF, maxF, maxR, infFCnt);
	
	/*
	 *  Estimate parameters
	 */
	
	ofp << "! Command:";
	for (int i = 0; i < argc; i++)
		ofp << " " << argv[i];
	ofp << endl;
	
	vcerr(1) << "*** Identifying truncation limits and data distributions ***" << endl;
	calculateTruncVal(iff,ifr,&ofcnt,
					  &truncValF, &truncValR, truncLimit,
					  minPos, maxPos,
					  minR, minF,
					  maxF, maxR, infFCnt);
	ofcnt.close();
	
	distDens = new double[maxD-minD+1];

	if (infFCnt == 1)
	{
		estimateDistance(&iff[0],&ifr[0],&ofc,
						 &estMinD, &estMaxD, &estLambdaD,
						 minD, maxD,
						 minPos[0], maxPos[0],
						 minR[0], minF[0],
						 maxF[0], maxR[0],
						 truncValF, truncValR,distDens,0.3);
		ofc.close();
	}
	
	/*
	if (useemp)
	{
		vcerr(2) << "\t* Estimating forward-reverse distance" << endl;
		estimateDistance(&iff,&ifr,&ofc,
						 &estMinD, &estMaxD, &estLambdaD,
						 minD, maxD,
						 minPos, maxPos,
						 minR, minF,
						 maxF, maxR,
						 truncValF, truncValR,distDens,0.3);
		
		ofc.close();
	}
	else
	{
	*/
	for (int dist = minD; dist <= maxD; dist++)
		distDens[dist-minD] = gsl_ran_poisson_pdf(dist, lambdaD);
	/*
	  }
	*/
	
	vcerr(1) << "*** Parameter estimation ***" << endl;
	if (estimateParameters(iff,ifr,&ofi,
						   estMinD,estMaxD,estLambdaD,
						   pF, pR, lambdaF, lambdaR,
						   sampleSize,burnins,iterations,
						   seed,
						   minPos, maxPos,
						   minR, minF,
						   maxF, maxR,
						   truncValF, truncValR, infFCnt) < 0)
	{
		cerr << "ERROR: estimateParameters failed, aborting." << endl;
		
		for (int i=0; i<infFCnt; i++)
		{
			iff[i].close();
			ifr[i].close();
		}		
		ofi.close();
		ofp.close();
		delete[] distDens;
		return(-1);
	}
 	else
 	{
 		if (writeParameters(&ofp, pF, pR, lambdaF, lambdaR, lambdaD,
							truncValF, truncValR,
							minD, maxD, estLambdaD,
							estMinD, estMaxD, distDens) < 0)
 		{
 			cerr << "WARNING: could not write parameters to file, skipping." << endl;
 		}
 	}
	ofi.close();
	ofp.close();

	for (int i=0; i<infFCnt; i++)
	{
		iff[i].close();
		ifr[i].close();
	}

	delete[] distDens;
	
	vcerr(3) << setprecision(3);
	vcerr(3) << endl << "\t\tParameter estimates:" << endl;
	vcerr(3) << "\t\tpF: S = " << pF[0] << " NotS = " << pF[1] << endl;
	vcerr(3) << "\t\tpR: E = " << pR[0] << " NotE = " << pR[1] << endl;
	vcerr(3) << "\t\tlambdaF: S = " << lambdaF[0] << " NotS = " << lambdaF[1] << endl;
	vcerr(3) << "\t\tlambdaR: E = " << lambdaR[0] << " NotE = " << lambdaR[1] << endl;
	vcerr(3) << "\t\testMinD:  = " << estMinD << " estMaxd = " << estMaxD << endl;
}
dfsp_table*create_dfsp_lookuptable(urdme_model *model, const double tauD, const double error_tolerance, const int max_jump_in,
                          const int report_level){
	
    //----------------------------------------
    int Ndofs = model->Ncells*model->Mspecies;
    dfsp_table*table = (dfsp_table*)malloc(sizeof(dfsp_table));  //the return element
    //----------------------------------------
    int max_jump = max_jump_in;
    if(max_jump<0){ max_jump=3; } // set default
    //----------------------------------------
	int last_percent_reported=0;
    //----------------------------------------
    clock_t start_timer,end_timer;
    double elapsed_time;
   
  
    int i,j,k,m;
    //----------------------------------------
    if(report_level>0){ printf("Starting State-Space Exploration (uniformization): tau=%e tol=%e max=%i\n",tauD,error_tolerance,max_jump); }

    start_timer=clock();

    /* To hold the output */
    size_t *jcD_out,*irD_out;
    double *prD_out;
	
    /* Uniformization parameters. */
    
    /*
     * MAX_ITER affects the chosen timestep by the solver,
     * since we modify the timestep such that uniformization converges
     * in at most this number of iterations
     */
    
    int MAX_ITER = 50;
    double lambda_max;
	double poisspdf[MAX_ITER],totp=0.0,normp,max_error=0.0,rhs;

	size_t ix;
    
    jcD_out = (size_t *)malloc((Ndofs+1)*sizeof(size_t));
    jcD_out[0] = 0;
	
    /* To hold the current pvd */
    double *pdvi,*temp1,*temp2;
    pdvi = (double *)malloc(Ndofs*sizeof(double));
    temp1 = (double *)malloc(Ndofs*sizeof(double));
    temp2 = (double *)malloc(Ndofs*sizeof(double));

    /* Compute lambda_max */
    lambda_max = 0.0;
    for (i=0;i<Ndofs;i++){
      for (j=model->jcD[i];j<model->jcD[i+1];j++){	
        if (model->irD[j]==i)
          if (-model->prD[j]>lambda_max)
            lambda_max = -model->prD[j];
      }
    }
    if (report_level>1){
        printf("lambda_max %.4e\n",lambda_max);
    }
	
	/*
       We get a proposed timestep passed to the function (from error estimation). 
       If this is too large for uniformization to converge in MAX_ITER iterations,
	   we reduce the timestep.
     */
    double dt = tauD;
	totp = 1.0;
	do {
		totp=1.0;
		for (i=0; i<MAX_ITER; i++) {
			totp-=gsl_ran_poisson_pdf(i,lambda_max*dt);
		}
        if(totp>error_tolerance/2.0){
            dt/=2.0;
        }
	}while (totp>error_tolerance/2.0);
    if (dt<tauD){
       if (report_level>1){
          printf("Uniformization: overriding suggested tauD. Using tau_d = %e\n",dt);
        }
    }
	
    int start,stop;
    size_t nnz_coli=0;
    size_t nnz_coli_T=0;
    
	double cumsum;
	size_t *index;
	index = (size_t *)malloc(Ndofs*sizeof(size_t));
    
    /* Create the uniformized matrix. If memory is an issue, it is not necessary to  
	   form A expplicitly, but the code will run faster with A. */
    size_t *jcA,*irA;
    double *prA;
	int nnztot = model->jcD[Ndofs];
	
    jcA = (size_t *)malloc((Ndofs+1)*sizeof(size_t));
    irA = (size_t *)malloc(nnztot*sizeof(size_t));
    prA = (double *)malloc(nnztot*sizeof(double));
	
    memcpy(jcA,model->jcD,(Ndofs+1)*sizeof(size_t));
    memcpy(irA,model->irD,nnztot*sizeof(size_t));
    memcpy(prA,model->prD,nnztot*sizeof(double));
    
    /* Rescaled matrix. Obs. we do not add the eye-matrix here, since this may
     * cause error in the case of an all zero column. Instead we do that in 
     * the main matrix-vector multiply loop */ 
    for (i=0;i<Ndofs;i++){
      for (j=jcA[i];j<jcA[i+1];j++){
		prA[j]=prA[j]/lambda_max;
      }
    }
	
    /* Compute the Poisson PDF and determine how many iterations we need to do in the main loop. */
    int NUM_ITER=MAX_ITER;
    totp = 1.0;
    for (k=0;k<MAX_ITER;k++){
        poisspdf[k] = gsl_ran_poisson_pdf(k,lambda_max*dt);
        totp-=poisspdf[k];
        if (totp <= error_tolerance/2.0){
            NUM_ITER = k+1;
            break;
        }
    }
	
    for(i=0;i<Ndofs;i++){
        
       /* report every 5% */
       if(report_level>1){
            int cur_percent = (int) floor(((double)i/(double)Ndofs)*100.0);
            if(cur_percent > last_percent_reported + 4){
                last_percent_reported = cur_percent;
                end_timer=clock();
                elapsed_time = (double)(end_timer-start_timer)/CLOCKS_PER_SEC;
                printf("%i%% complete\t\telapsed: %es\n",last_percent_reported,elapsed_time);
            }
        }
        
        /* Initial condition */
		memset(pdvi,0.0,Ndofs*sizeof(double));
		memset(temp1,0.0,Ndofs*sizeof(double));
		memset(temp2,0.0,Ndofs*sizeof(double));

		temp1[i]=1.0;
		
		double *val,*valend;
		size_t *ind;
        
		totp=1.0;
		for(k=0; k<NUM_ITER; k++){
		  
			/* Add to pdvi */
			cblas_daxpy(Ndofs,poisspdf[k],temp1,1,pdvi,1);
			
			/* Sparse matrix-dense vector product. */
			for (m=0;m<Ndofs;m++){
				rhs = temp1[m];
				if (rhs > 0.0){ // > works since we know that temp1 will always be positive. 
                    
                   start = jcA[m];
                   stop  = jcA[m+1];
				   ix = (stop-start) % 4;
				   for (j=start; j<start+ix; j++) {
					  temp2[irA[j]] += rhs*prA[j];  
					  
				   }
				   for (j=start+ix; j+3<stop; j += 4) {
					   temp2[irA[j]]   += rhs*prA[j];  
					   temp2[irA[j+1]] += rhs*prA[j+1];  
					   temp2[irA[j+2]] += rhs*prA[j+2];  
					   temp2[irA[j+3]] += rhs*prA[j+3];
				   }
				   /* Add unit diagonal */
				   temp2[m] += 1.0*rhs;
				}
			}
			
			memcpy(temp1,temp2,Ndofs*sizeof(double));
			memset(temp2,0.0,Ndofs*sizeof(double));
        
		}
		
		/* Sort pdvi in decending order */
		memset(index,0,Ndofs*sizeof(size_t));
		gsl_sort_index(index,pdvi,1,(size_t)Ndofs);
        
		#if 0 //OLD WAY
		/* Count the number of non-zeros in this column (PDV) */
        cumsum = 0.0; j=Ndofs-1; nnz_coli=0;
        for (j=Ndofs-1; j>=0; j--){
            cumsum+=pdvi[index[j]];
            if (pdvi[index[j]]==0.0)
                break;
            nnz_coli++;	
        }
		#else  //CORRECT WAY
		/* Count the number of non-zeros in this column (PDV) */
		int min_col_sz = model->jcD[i+1]-model->jcD[i]; //size of the column of the D matrix
        cumsum = 0.0; j=Ndofs-1; nnz_coli=0;
        for (j=Ndofs-1; j>=0; j--){
            cumsum+=pdvi[index[j]];
            if (pdvi[index[j]]==0.0)
                break;
            nnz_coli++;
			if(nnz_coli>=min_col_sz && 1.0-cumsum < error_tolerance){ 
				break;
			}
        }
		#endif
        nnz_coli_T+=nnz_coli;
		
		/* Assemble into the lookup-table (sparse matrix) */
		jcD_out[i+1]=jcD_out[i]+(size_t)nnz_coli;  //record the begining of this colum
		
		if(i==0){
			irD_out = (size_t*) malloc(nnz_coli*sizeof(size_t));
		}else{
			irD_out = (size_t*) realloc(irD_out,jcD_out[i+1]*sizeof(size_t));
		}
		if(i==0){
		    prD_out = (double*) malloc(nnz_coli*sizeof(double));
		}else{
		    prD_out = (double*) realloc(prD_out,jcD_out[i+1]*sizeof(double));
		}

        /* For optimization purposes, we store the CDF rather than the PDF 
           here, since all we are going to do with this matrix is 
           inverse transform sampling during the DFSP step. */
		start = (int)jcD_out[i]; k=0; cumsum = 0.0;
		for (k=0;k<nnz_coli;k++){
			irD_out[start+k]  = index[Ndofs-k-1];  
			cumsum += pdvi[index[Ndofs-k-1]];
			prD_out[start+k] = cumsum;
		}
		
		/**
          * Renormalize, so that the PDF sums to 1.0
          * We spread the epsilon error equal on all the remaining states.
          * This is why we needed to compute to tol/2 accuracy.
        */
		for (k=0; k<nnz_coli; k++) {
			prD_out[start+k]/=cumsum;
		}
	
    }
    
    end_timer=clock();
    elapsed_time = (double)(end_timer-start_timer)/CLOCKS_PER_SEC;
   
    
    /* Stats */
    
    /* Average serch depth */
    /*double sdepth=0.0;
    for (i=0;i<Ndofs;i++){
        k=0;
        start = jcD_out[i];
        stop  = jcD_out[i+1];
        for (j=start; j<stop;j++){
           if(prD_out[j]>0.5) 
                break;
           k++;
        }      
        if (k>sdepth)
            sdepth = k;
    }*/
    
    int nnzi;
    int maxnnzi = 0;
    for (i=0;i<Ndofs;i++){
       nnzi = jcD_out[i+1]-jcD_out[i];
       if (nnzi>maxnnzi)
           maxnnzi = nnzi;
    }
    
    if(report_level>0){ printf("\tComplete: elapsed: %es, error=%e Nmax=%i\n",elapsed_time,error_tolerance,maxnnzi);}
    if(report_level>1){printf("Number of iterations: %i\n",NUM_ITER); }
   // printf("Max search depth: %f\n",sdepth);
    //----------------------------------------
   
    table->Ndofs = Ndofs;
    table->error_tolerance = max_error;
    table->tau_d = dt; 
    table->max_jump = max_jump;
   
    table->jcD = jcD_out;
    table->irD = irD_out;
    table->prD = prD_out;
	#ifdef DFSP_PROFILER
		profiler_addmemory("Lookup tables",(Ndofs+1)*sizeof(size_t) + jcD_out[Ndofs]*(sizeof(size_t)+sizeof(double)));
	#endif
    //----------------------------------------
    free(index);
    free(pdvi);
    free(temp1);
    free(temp2);
    
    free(jcA);
    free(irA);
    free(prA);
    //----------------------------------------
    return table;
}
Exemple #12
0
double pw_get_poisson(int k, int lambda){
  return gsl_ran_poisson_pdf(k, lambda);
}
Exemple #13
0
int main(int argc, char **argv){
    distlist distribution = Normal;
    char	 msg[10000], c;
    int      pval = 0, qval = 0;
    double   param1 = GSL_NAN, param2 =GSL_NAN, findme = GSL_NAN;
    char     number[1000];
	sprintf(msg, "%s [opts] number_to_lookup\n\n"
    "Look up a probability or p-value for a given standard distribution.\n"
    "[This is still loosely written and counts as beta. Notably, negative numbers are hard to parse.]\n"
    "E.g.:\n"
    "%s -dbin 100 .5 34\n"
    "sets the distribution to a Binomial(100, .5), and find the odds of 34 appearing.\n"
    "%s -p 2     \n"
    "find the area of the Normal(0,1) between -infty and 2.  \n"
    "\n"
    "-pval Find the p-value: integral from -infinity to your value\n"
    "-qval Find the q-value: integral from your value to infinity\n"
    "\n"
    "After giving an optional -p or -q, specify the distribution. \n"
    "Default is Normal(0, 1). Other options:\n"
    "\t\t-binom Binomial(n, p)\n"
    "\t\t-beta Beta(a, b)\n"
    "\t\t-f F distribution(df1, df2)\n"
    "\t\t-norm Normal(mu, sigma)\n"
    "\t\t-negative bin Negative binomial(n, p)\n"
    "\t\t-poisson Poisson(L)\n"
    "\t\t-t t distribution(df)\n"
    "I just need enough letters to distinctly identify a distribution.\n"
, argv[0], argv[0], argv[0]); 

    opterr=0;
	if(argc==1){
		printf("%s", msg);
		return 0;
	}
	while ((c = getopt (argc, argv, "B:b:F:f:N:n:pqT:t:")) != -1){
		switch (c){
		  case 'B':
		  case 'b':
              if (optarg[0]=='i')
                  distribution = Binomial;
              else if (optarg[0]=='e')
                  distribution = Beta;
            else {
                printf("I can't parse the option -b%s\n", optarg);
                exit(0);
            }
              param1 = atof(argv[optind]);
              param2 = atof(argv[optind+1]);
              findme =  atof(argv[optind+2]);
			  break;
          case 'F':
          case 'f':
            distribution = F;
            param1 = atof(argv[optind]);
            findme =  atof(argv[optind+1]);
            break;
          case 'H':
		  case 'h':
			printf("%s", msg);
			return 0;
          case 'n':
          case 'N':
            if (optarg[0]=='o'){ //normal
                  param1 = atof(argv[optind]);
                  param2 = atof(argv[optind+1]);
                  findme =  atof(argv[optind+2]);
            } else if (optarg[0]=='e'){
                  distribution = Negbinom;
                  param1 = atof(argv[optind]);
                  param2 = atof(argv[optind+1]);
                  findme =  atof(argv[optind+2]);
            } else {
                printf("I can't parse the option -n%s\n", optarg);
                exit(0);
            }
			  break;
          case 'p':
            if (!optarg || optarg[0] == 'v')
                pval++;
            else if (optarg[0] == 'o'){
                distribution = Poisson;
                param1 = atof(argv[optind]);
                findme =  atof(argv[optind+1]);
            } else {
                printf("I can't parse the option -p%s\n", optarg);
                exit(0);
            }
            break;
          case 'q':
            qval++;
            break;
          case 'T':
          case 't':
            distribution = T;
            param1 = atof(argv[optind]);
            findme =  atof(argv[optind+1]);
            break;
          case '?'://probably a negative number
            if (optarg)
                 snprintf(number, 1000, "%c%s", optopt, optarg);
            else snprintf(number, 1000, "%c", optopt);
            if (gsl_isnan(param1)) param1 = -atof(number);
            else if (gsl_isnan(param2)) param2 = -atof(number);
            else if (gsl_isnan(findme)) findme = -atof(number);
		}
	}
    if (gsl_isnan(findme)) findme =  atof(argv[optind]);
    //defaults, as promised
    if (gsl_isnan(param1)) param1 = 0;
    if (gsl_isnan(param2)) param2 = 1;
    if (!pval && !qval){
        double val =
        distribution == Beta ? gsl_ran_beta_pdf(findme, param1, param2)
        : distribution == Binomial ? gsl_ran_binomial_pdf(findme, param2, param1)
        : distribution == F ? gsl_ran_fdist_pdf(findme, param1, param2) 
        : distribution == Negbinom ? gsl_ran_negative_binomial_pdf(findme, param2, param1)
        : distribution == Normal ? gsl_ran_gaussian_pdf(findme, param2)+param1
        : distribution == Poisson ? gsl_ran_poisson_pdf(findme, param1) 
        : distribution == T ? gsl_ran_tdist_pdf(findme, param1) : GSL_NAN;
        printf("%g\n", val); 
        return 0;
    }
    if (distribution == Binomial){
        printf("Sorry, the GSL doesn't have a Binomial CDF.\n");
        return 0; }
    if (distribution == Negbinom){
        printf("Sorry, the GSL doesn't have a Negative Binomial CDF.\n");
        return 0; }
    if (distribution == Poisson){
        printf("Sorry, the GSL doesn't have a Poisson CDF.\n");
        return 0; }
    if (pval){
        double val =
        distribution == Beta ? gsl_cdf_beta_P(findme, param1, param2)
        : distribution == F ? gsl_cdf_fdist_P(findme, param1, param2) 
        : distribution == Normal ? gsl_cdf_gaussian_P(findme-param1, param2)
        : distribution == T ? gsl_cdf_tdist_P(findme, param1) : GSL_NAN;
        printf("%g\n", val); 
        return 0;
    }
    if (qval){
        double val =
        distribution == Beta ? gsl_cdf_beta_Q(findme, param1, param2)
        : distribution == F ? gsl_cdf_fdist_Q(findme, param1, param2) 
        : distribution == Normal ? gsl_cdf_gaussian_Q(findme-param1, param2)
        : distribution == T ? gsl_cdf_tdist_Q(findme, param1) : GSL_NAN;
        printf("%g\n", val); 
    }
}
Exemple #14
0
double
gen_poission_pdf(int k, double mu)
{
	return (gsl_ran_poisson_pdf(k, mu));
}