示例#1
0
double apop_rng_GHgB3(gsl_rng * r, double* a){
    Apop_assert_nan((a[0]>0) && (a[1] > 0) && (a[2] > 0), "apop_GHgB3_rng took a zero parameter; bad.");
double		aa	= gsl_ran_gamma(r, a[0], 1),
		b	= gsl_ran_gamma(r, a[1], 1),
		c	= gsl_ran_gamma(r, a[2], 1);
int		p	= gsl_ran_poisson(r, aa*b/c);
	return p;
}
示例#2
0
double
gsl_ran_beta (const gsl_rng * r, const double a, const double b)
{
  double x1 = gsl_ran_gamma (r, a, 1.0);
  double x2 = gsl_ran_gamma (r, b, 1.0);

  return x1 / (x1 + x2);
}
示例#3
0
double
gsl_ran_fdist (const gsl_rng * r, const double nu1, const double nu2)
{

  double Y1 =  gsl_ran_gamma (r, nu1 / 2, 2.0);
  double Y2 =  gsl_ran_gamma (r, nu2 / 2, 2.0);

  double f = (Y1 * nu2) / (Y2 * nu1);

  return f;
}
示例#4
0
void Metropolis::regression_adapt(int numSteps, int stepSize)
{
	std::vector<STM::ParName> parNames (parameters.names());
	
	std::map<STM::ParName, std::map<std::string, double *> > regressionData;
	for(const auto & par : parNames)
	{
		regressionData[par]["log_variance"] = new double [numSteps];
		regressionData[par]["variance"] = new double [numSteps];
		regressionData[par]["acceptance"] = new double [numSteps];
	}
	
	for(int i = 0; i < numSteps; i++)
	{
		// compute acceptance rates for the current variance term
		parameters.set_acceptance_rates(do_sample(stepSize));
	
		for(const auto & par : parNames)
		{
			// save regression data for each parameter
			regressionData[par]["log_variance"][i] = std::log(parameters.sampler_variance(par));
			regressionData[par]["variance"][i] = parameters.sampler_variance(par);
			regressionData[par]["acceptance"][i] = parameters.acceptance_rate(par);
			
			// choose new variances at random for each parameter; drawn from a gamma with mean 2.38 and sd 2
			parameters.set_sampler_variance(par, gsl_ran_gamma(rng.get(), 1.4161, 1.680672));
		}
	
	}
	
	// perform regression for each parameter and clean up
	for(const auto & par : parNames)
	{
		// first compute the correlation for variance and log_variance, use whichever is higher
		double corVar = gsl_stats_correlation(regressionData[par]["variance"], 1,
				regressionData[par]["acceptance"], 1, numSteps);
		double corLogVar = gsl_stats_correlation(regressionData[par]["log_variance"], 1,
				regressionData[par]["acceptance"], 1, numSteps);

		double beta0, beta1, cov00, cov01, cov11, sumsq, targetVariance;
		if(corVar >= corLogVar)
		{
			gsl_fit_linear(regressionData[par]["variance"], 1, 
					regressionData[par]["acceptance"], 1, numSteps, &beta0, &beta1, 
					&cov00, &cov01, &cov11, &sumsq);
			targetVariance = (parameters.optimal_acceptance_rate() - beta0)/beta1;
		} else
		{
			gsl_fit_linear(regressionData[par]["log_variance"], 1, 
					regressionData[par]["acceptance"], 1, numSteps, &beta0, &beta1,
					&cov00, &cov01, &cov11, &sumsq);
			targetVariance = std::exp((parameters.optimal_acceptance_rate() - beta0)/beta1);
		}
		parameters.set_sampler_variance(par, targetVariance);

		delete [] regressionData[par]["log_variance"];
		delete [] regressionData[par]["variance"];
		delete [] regressionData[par]["acceptance"];
	}
}
示例#5
0
文件: dirichlet.c 项目: lemahdi/mglib
void
gsl_ran_dirichlet (const gsl_rng * r, const size_t K,
                   const double alpha[], double theta[])
{
  size_t i;
  double norm = 0.0;

  for (i = 0; i < K; i++)
    {
      theta[i] = gsl_ran_gamma (r, alpha[i], 1.0);
    }
  
  for (i = 0; i < K; i++)
    {
      norm += theta[i];
    }

  if (norm < GSL_SQRT_DBL_MIN)  /* Handle underflow */
    {
      ran_dirichlet_small (r, K, alpha, theta);
      return;
    }

  for (i = 0; i < K; i++)
    {
      theta[i] /= norm;
    }
}
示例#6
0
unsigned int
gsl_ran_negative_binomial (const gsl_rng * r, double p, double n)
{
  double X = gsl_ran_gamma (r, n, 1.0) ;
  unsigned int k = gsl_ran_poisson (r, X*(1-p)/p) ;
  return k ;
}
示例#7
0
文件: GSLRNG.c 项目: B-Rich/breve
int GSLRNG_gamma(stEval *args, stEval *result, void *i) {
    gsl_rng *r = STPOINTER(&args[0]);
    double a = STDOUBLE(&args[1]);
	double b = STDOUBLE(&args[2]);
    STDOUBLE(result) = gsl_ran_gamma(r,a,b);
    return EC_OK;
}
示例#8
0
double pp_model::draw_mean_from_posterior(changepoint *obj1, changepoint *obj2){
   calculate_posterior_mean_parameters(obj1,obj2);
   if(!m_rng){
      m_rng = gsl_rng_alloc(gsl_rng_taus);
      gsl_rng_set (m_rng,0);
   }
   m_mean=gsl_ran_gamma(m_rng,m_alpha_star,1.0/m_beta_star);
   return m_mean;
}
示例#9
0
文件: test.c 项目: CNMAT/gsl
double
test_gamma_vlarge (void)
{
  /* Scale the distribution to get it into the range [-5,5] */
  double c = 2.71828181565;
  double b = 6.32899304917e-10;
  double d = 1e4;
  return (gsl_ran_gamma (r_global, 4294967296.0, b) - c) * d;
}
示例#10
0
int DPMHC_K(struct str_DPMHC *ptr_DPMHC_data)
{
    int i_K = ptr_DPMHC_data->i_K;
    gsl_vector *v_u = ptr_DPMHC_data->v_u;
    gsl_vector *v_v = ptr_DPMHC_data->v_v;
    gsl_vector *v_w  = ptr_DPMHC_data->v_w;
    gsl_matrix *m_DPtheta = ptr_DPMHC_data->m_DPtheta;
    double d_DPalpha = ptr_DPMHC_data->d_DPalpha;

    int K_tmp, K_new,j;
    double a,v_j,w_j,csum,min_u;
    //gsl_vector_view theta_j;

  //int k_asset_number = P -> size1; /* number of assets in model */

    K_tmp = i_K;
    min_u = gsl_vector_min ( v_u );
    a = 1.0 - min_u;

    if( a == 1.0 )
        printf("**********min_u = %g *************\n",min_u);

    csum = 0.0;
    j=0;

    while ( csum <= a ){

        /* check if new v_j,w_j and theta_j should be generated */
        if( j >= K_tmp ){

            v_j = gsl_ran_beta ( rng , 1.0, d_DPalpha );
            vset( v_v, j, v_j);

            w_j = v_j * (vget( v_w, j-1 )/vget(v_v,j-1))*(1.0-vget(v_v,j-1));
            vset( v_w, j, w_j);

        /* generate new mu, xi, tau from prior G_0 */
            mset(m_DPtheta, j, 0,
                ptr_DPMHC_data->d_m0 + gsl_ran_gaussian_ziggurat(rng, sqrt(ptr_DPMHC_data->d_s2m)));

            mset(m_DPtheta, j, 1,
                gsl_ran_gaussian_ziggurat(rng, ptr_DPMHC_data->d_A));

            mset(m_DPtheta, j, 2,
                gsl_ran_gamma(rng, 0.5, 0.5) );
        }

        csum += vget(v_w,j);
        K_new = j + 1;
        j++;
    }

    ptr_DPMHC_data->i_K = K_new;

    return 0;
}
示例#11
0
int MoveRho::move()
{
    int r=param->getRecTree()->numRecEdge();
    double T=param->getRecTree()->getTTotal();
    double rho=param->getRho();
    double rho2=gsl_ran_gamma(rng,1.0+r,1.0/(param->hyperPriorOfRho()+T*0.5));
    param->setRho(rho2);
    dlog(1)<<"Gibbs update of rho from "<<rho<<" to "<<rho2<<"..."<<endl;
    numcalls++;numaccept++;
    return(1);
}
示例#12
0
void librdist_gamma(gsl_rng *rng, int argc, void *argv, int bufc, float *buf){
	t_atom *av = (t_atom *)argv;
	if(argc != librdist_getnargs(ps_gamma)){
		return;
	}
	const double a = librdist_atom_getfloat(av);
	const double b = librdist_atom_getfloat(av + 1);
	int i;
	for(i = 0; i < bufc; i++)
	       	buf[i] = (float)gsl_ran_gamma(rng, a, b);
}
 virtual double sample(gsl_rng* rng, arma::uword i, arma::uword j) {
   auto shape = lf->f(wshape(i,j));
   auto scale = lf->f(wscale(i,j));
   auto z = gsl_ran_gamma(rng, shape, scale);
   //LOG_IF(fatal, (z < 1e-320) || (!isfinite(z)))
   //       << "shape=" << shape
   //      << " scale=" << scale
   //       << " z=" << z;
   z = max(z, min_gamma_sample);
   assert(z >= 1e-300);
   return z;
 }
示例#14
0
int DPMHC_tau_smplr(struct str_DPMHC *ptr_DPMHC_data)
{
    int i,j;
    int i_n = ptr_DPMHC_data->v_y->size;
    int i_K = ptr_DPMHC_data->i_K;
    int i_nj;
    double d_muj,d_yi;
    double d_yhat;

    double d_xij,d_tauj;



    gsl_vector *v_y = ptr_DPMHC_data->v_y;
    gsl_vector_int *vi_S = ptr_DPMHC_data->vi_S;
    gsl_vector_int *vi_n = ptr_DPMHC_data->vi_n;
    gsl_matrix *m_theta = ptr_DPMHC_data->m_DPtheta;


 //  printf("\ni_K = %d\n",i_K);
    for(j=0;j<i_K;j++){

        d_muj = mget(m_theta,j, 0);
        d_xij = mget(m_theta,j, 1);

        d_yhat = 0.;
        i_nj = 0;
        for(i=0;i<i_n;i++){
            if( vget_int(vi_S,i) == j ){
                d_yi = vget(v_y,i);
                d_yhat += (d_yi/fabs(d_xij) - d_muj/fabs(d_xij)) * (d_yi/fabs(d_xij) - d_muj/fabs(d_xij));
                i_nj++;
            }
        }
        if (vget_int(vi_n,j) != i_nj){
            fprintf(stderr,"Error in  DPMN_tau_smplr(): vi_n[%d] does not equal i_nj\n", j);
            exit(1);
        }

        d_tauj = gsl_ran_gamma(rng, 0.5 + (double)i_nj/2.0, 0.5 + d_yhat/2.0);

        mset(m_theta,j, 2, d_tauj);


       // printf("%d: eta = %g lambda^2 = %g\n",j, mget(m_theta,j,0), mget(m_theta,j,1) );
     }





    return 0;
}
示例#15
0
文件: dpeta.c 项目: juapebe/HPC
void DP_eta_theta(PARAM *param, PRIOR *prior, DATA *data, const gsl_rng *r, int pid, int *inuse) {
  int i, j, id, accept, pass;
  float Delta, mhratio, newval, scale, tmp_lambda, tmp;
  scale = prior->gamma_eta[pid] / (1.0 - prior->gamma_eta[pid]);
  if(inuse[pid] == 0) {
    pass = 0;
    while(!pass) {
      newval = 1.0 / gsl_ran_gamma(r, 100.0, 1.0);
      if(newval < 2.0) pass = 1;
    }
    Delta = newval - prior->theta_eta[pid];
    prior->theta_eta[pid] = newval;
  }
  else {
    /* metropolis-hastings */
    mhratio = 0.0;
    Delta = gsl_ran_gaussian(r, 0.1);
    if(prior->theta_eta[pid] + Delta <= 0.0 || prior->theta_eta[pid] + Delta > 2.0) {
      accept = 0;
    }
    else {
      for(i=0;i<data->nprey;i++) {
        if(prior->w_eta[i] == pid) {
          for(j=0;j<data->preyNinter[i];j++) {
            id = data->p2i[i][j];
            if(param->Z[data->a2u[id]] && data->miss[id] == 0) {
              tmp_lambda = param->lambda_true[id];
              tmp = data->d[id];
              mhratio += log_gaussian(tmp, (tmp_lambda), prior->theta_eta[pid]+Delta)
                       - log_gaussian(tmp, (tmp_lambda), prior->theta_eta[pid]);
            }
          }
        }
      }
      mhratio += log_inv_gamma( (prior->theta_eta[pid]+Delta), prior->shape_eta, prior->scale_eta)
               - log_inv_gamma( prior->theta_eta[pid], prior->shape_eta, prior->scale_eta);
      accept = gsl_ran_flat(r, 0.0, 1.0) <= GSL_MIN(1.0, exp(mhratio)) ? 1 : 0 ;
    }

    /* if accepted, update param and lambda */
    if(accept) {
      prior->theta_eta[pid] += Delta;
      for(i=0;i<data->nprey;i++) {
        if(prior->w_eta[i] == pid) {
          param->eta[i] += Delta;
        }
      }
    }
  }
}
示例#16
0
文件: bayes_gfl.c 项目: tansey/gfl
double sample_lambda_doublepareto2(const gsl_rng *random, double *beta, 
                               int dk_rows, int *dk_rowbreaks, int *dk_cols, double *deltak,
                               double a, double b, double gamma, double *tau)
{
    int i;
    double *x;
    double lambda;

    x = (double *) malloc(dk_rows * sizeof(double));

    /* Sample the global lambda parameter */
    lambda = gsl_ran_gamma(random, a + gamma * dk_rows, 1.0 / (b + vec_sum(dk_rows, tau)));

    /* Sample the local tau parameters */
    mat_dot_vec(dk_rows, dk_rowbreaks, dk_cols, deltak, beta, x);
    vec_abs(dk_rows, x);
    for (i = 0; i < dk_rows; i++){
        tau[i] = gsl_ran_gamma(random, gamma+1, 1.0 / (x[i] + lambda));
    }

    free(x);

    return lambda;
}
示例#17
0
文件: bayes_gfl.c 项目: tansey/gfl
void sample_tau_laplace_gamma(const gsl_rng *random, double *beta,
                              int dk_rows, int *dk_rowbreaks, int *dk_cols, double *dk_vals,
                              double lambda, double tau_hyperparameter, 
                              double *tau)
{
    int i;
    int prev_break;
    double x;

    prev_break = 0;
    for(i = 0; i < dk_rows; i++){
        x = lambda + fabs(vec_dot_beta(dk_rowbreaks[i] - prev_break, dk_cols + prev_break, dk_vals + prev_break, beta));
        tau[i] = gsl_ran_gamma(random, tau_hyperparameter+1, 1.0 / x);
        prev_break = dk_rowbreaks[i];
    }
}
示例#18
0
void main(){
	int N=50000;
	int thin=1000;
	int i,j;
	gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937);
	double x=0;
	double y=0;
	printf("Iter x y\n");
	for (i=0;i<N;i++) {
		for (j=0;j<thin;j++) {
			x=gsl_ran_gamma(r,3.0,1.0/(y*y+4));
			y=1.0/(x+1)+gsl_ran_gaussian(r,1.0/sqrt(2*x+2));
		}
		printf("%d %f %f\n",i,x,y);
	}
}
示例#19
0
文件: bayes_gfl.c 项目: tansey/gfl
double sample_lambda_laplace(const gsl_rng *random, double *beta, 
                               int dk_rows, int *dk_rowbreaks, int *dk_cols, double *deltak,
                               double a, double b)
{
    double *x;
    double lambda;

    x = (double *) malloc(dk_rows * sizeof(double));
    mat_dot_vec(dk_rows, dk_rowbreaks, dk_cols, deltak, beta, x);
    vec_abs(dk_rows, x);

    lambda = gsl_ran_gamma(random, a+dk_rows, 1.0 / (b + vec_sum(dk_rows, x)));

    free(x);

    return lambda;
}
示例#20
0
double draw_gamma_or_uniform(const gsl_rng * rng, double shape, double scale) {
    double draw;
    if ((shape > 0.0) && (scale > 0.0)) {
        draw = gsl_ran_gamma(rng, shape,
                scale);
    }
    else {
        double tau_a, tau_b;
        tau_a = fabs(shape);
        tau_b = fabs(scale);
        if (tau_a < tau_b) {
            draw = gsl_ran_flat(rng, tau_a, tau_b);
        }
        else {
            draw = gsl_ran_flat(rng, tau_b, tau_a);
        }
    }
    return draw;
}
示例#21
0
int ran_dirichlet(const gsl_rng *r, const gsl_vector *alpha,
		  gsl_vector *sample)
{
    const int k = alpha->size;
    double sum =0.0;
    double alphai =0.0;
    double xi;
    int i;
    
    for(i=0; i<k; i++)
    {
	alphai = gsl_vector_get(alpha, i);
	xi = gsl_ran_gamma(r, alphai, 1.0);
	gsl_vector_set(sample, i, xi);
	sum += xi;
    }
    gsl_vector_scale(sample, 1/sum);

    return 0;
}
示例#22
0
int DPMHC_init(struct str_DPMHC *ptr_DPMHC_data, int i_draws){

    int j;
    int i_T = (ptr_DPMHC_data->vi_S)->size;
    if (i_T == 0){
        fprintf(stderr,"Error in DPMHC_init(): DPMHC_alloc() has not been called.\n");
        exit(1);
    }

    int i_K;
    gsl_matrix *m_DPtheta = ptr_DPMHC_data->m_DPtheta;

    ptr_DPMHC_data->m_DPmcmc = gsl_matrix_alloc(i_draws, 2); // for draw of i_K and d_DPalpha

    // initialize slice truction to K = 4 and one alive cluster, i_m = 1
    i_K = ptr_DPMHC_data->i_K = 4;
    ptr_DPMHC_data->i_m = 1;
    gsl_vector_int_set_all(ptr_DPMHC_data->vi_S,0);
    vset_int(ptr_DPMHC_data->vi_n,0,i_T);

    // draw DP precision parameter d_DPalpha ~ Gamma(a,b)
    double d_DPalpha;
    d_DPalpha = ran_gamma(rng, ptr_DPMHC_data->d_a, ptr_DPMHC_data->d_b);
    ptr_DPMHC_data->d_DPalpha = d_DPalpha;

    // Draw initial mixture locations for K clusters
    for(j = 0; j < i_K; j++){

        mset(m_DPtheta, j, 0,
                ptr_DPMHC_data->d_m0 + gsl_ran_gaussian_ziggurat(rng, sqrt(ptr_DPMHC_data->d_s2m)));

        mset(m_DPtheta, j, 1,
                gsl_ran_gaussian_ziggurat(rng, ptr_DPMHC_data->d_A));

        mset(m_DPtheta, j, 2,
                gsl_ran_gamma(rng, 0.5, 0.5) );

    }

    return 0;
}
示例#23
0
文件: rnd.cpp 项目: cran/mvabund
int rmvt(const gsl_rng *r, const unsigned int n, const gsl_vector *location, const gsl_matrix *scale, const unsigned int dof, gsl_vector *result)
{
    unsigned int k;
    gsl_matrix *work = gsl_matrix_alloc(n,n);
    double ax = 0.5*dof; 

    ax = gsl_ran_gamma(r,ax,(1/ax));     /* gamma distribution */

    gsl_matrix_memcpy(work,scale);
    gsl_matrix_scale(work,(1/ax));       /* scaling the matrix */
    gsl_linalg_cholesky_decomp(work);

    for(k=0; k<n; k++)
	gsl_vector_set( result, k, gsl_ran_ugaussian(r) );

    gsl_blas_dtrmv(CblasLower, CblasNoTrans, CblasNonUnit, work, result);
    gsl_vector_add(result, location);

    gsl_matrix_free(work);

    return 0;
}
示例#24
0
void
gsl_ran_dirichlet (const gsl_rng * r, const size_t K,
                   const double alpha[], double theta[])
{
  size_t i;
  double norm = 0.0;

  for (i = 0; i < K; i++)
    {
      theta[i] = gsl_ran_gamma (r, alpha[i], 1.0);
    }

  for (i = 0; i < K; i++)
    {
      norm += theta[i];
    }

  for (i = 0; i < K; i++)
    {
      theta[i] /= norm;
    }
}
示例#25
0
文件: dirichlet.c 项目: lemahdi/mglib
static void 
ran_dirichlet_small (const gsl_rng * r, const size_t K,
                     const double alpha[], double theta[])
{
  size_t i;
  double norm = 0.0, umax = 0;

  for (i = 0; i < K; i++)
    {
      double u = log(gsl_rng_uniform_pos (r)) / alpha[i];
      
      theta[i] = u;

      if (u > umax || i == 0) {
        umax = u;
      }
    }
  
  for (i = 0; i < K; i++)
    {
      theta[i] = exp(theta[i] - umax);
    }
  
  for (i = 0; i < K; i++)
    {
      theta[i] = theta[i] * gsl_ran_gamma (r, alpha[i] + 1.0, 1.0);
    }

  for (i = 0; i < K; i++)
    {
      norm += theta[i];
    }

  for (i = 0; i < K; i++)
    {
      theta[i] /= norm;
    }
}
示例#26
0
double
gsl_ran_gamma (const gsl_rng * r, const double a, const double b)
{
  /* assume a > 0 */

  if (a < 1)
    {
      double u = gsl_rng_uniform_pos (r);
      return gsl_ran_gamma (r, 1.0 + a, b) * pow (u, 1.0 / a);
    }

  {
    double x, v, u;
    double d = a - 1.0 / 3.0;
    double c = (1.0 / 3.0) / sqrt (d);

    while (1)
      {
        do
          {
            x = gsl_ran_gaussian_ziggurat (r, 1.0);
            v = 1.0 + c * x;
          }
        while (v <= 0);

        v = v * v * v;
        u = gsl_rng_uniform_pos (r);

        if (u < 1 - 0.0331 * x * x * x * x) 
          break;

        if (log (u) < 0.5 * x * x + d * (1 - v + log (v)))
          break;
      }
    
    return b * d * v;
  }
}
示例#27
0
double
gsl_ran_chisq (const gsl_rng * r, const double nu)
{
  double chisq = 2 * gsl_ran_gamma (r, nu / 2, 1.0);
  return chisq;
}
示例#28
0
int main(int argc, char* argv[])
{	int N;
	N = atoi(argv[1]);
	int width  = N; 
	int height = N;
  
	int i,j,k,r;
	long delta_ms; 
	gsl_rng * rgen = gsl_rng_alloc(gsl_rng_taus);
	double a,b;
 	a=31,15;
 	b=-1,444445;
	struct timeval T1, T2;

	float * A = (float *)malloc(sizeof(float)*width*height);
	float * B = (float *)malloc(sizeof(float)*width*height);
	float * C = (float *)malloc(sizeof(float)*width*height);
	float * Res = (float *)malloc(sizeof(float)*width);
	float * D= (float *)malloc(sizeof(float)*width*height);

	cl_device_id device_id = NULL;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_mem memobjA = NULL;
	cl_mem memobjB = NULL;
	cl_mem memobjC = NULL;
	cl_mem memobjRes = NULL;
	cl_mem rowA = NULL;
	cl_mem colC = NULL;
	cl_program program = NULL;
	cl_kernel kernelMatrixMult = NULL;
	cl_kernel kernelVectMult = NULL;
	cl_kernel kernelVectSred = NULL;
	cl_platform_id platform_id = NULL;
	cl_uint ret_num_devices;
	cl_uint ret_num_platforms;
	cl_int ret;

	//char string[MEM_SIZE];

	FILE *fp;
	char fileName[] = "./multi.cl";
	char *source_str;
	size_t source_size;
	int row = width;
	int col = height;

	/* Load the source code containing the kernel*/
	fp = fopen(fileName, "r");
	if (!fp) {
		printf("Failed to load kernel.\n");
		exit(1);
	}

	source_str = (char*)malloc(MAX_SOURCE_SIZE);
	source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose( fp );

	/* Get Platform and Device Info */
	ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
	ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
	/* Create OpenCL context */
	context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
	/* Create Command Queue */
	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
	/* Create Kernel Program from the source */
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str,(const size_t *)&source_size, &ret);
	/* Build Kernel Program */
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	/* Create OpenCL Kernel */
	kernelMatrixMult = clCreateKernel(program, "matrixMultiplication", &ret);
	kernelVectMult = clCreateKernel(program, "matrixVectorMultiplication", &ret);
	kernelVectSred = clCreateKernel(program, "matrixVectorSred", &ret);

	/* Create Memory Buffer */
	memobjA = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(float), NULL, &ret);
	memobjB = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(float), NULL, &ret);
	memobjC = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(float), NULL, &ret);
	memobjRes = clCreateBuffer(context, CL_MEM_READ_WRITE, width * sizeof(float), NULL, &ret);
	rowA = clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(int), NULL, &ret);
	colC = clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(int), NULL, &ret);

	gettimeofday(&T1, NULL);
        printf("Started\n");

	for(r=0; r<2; r++){
		//generate matrix
		printf("Matrix A Result  %i\n|",r);
		for(i = 0;i < width; i++) {
			for(j=0;j<height;j++) {
			    *(A+i*height+j) = gsl_ran_gamma(rgen,a,b);;
			    printf("%f|",*(A+i*height+j));
			}
			printf("\n");
		}

		// Copy the lists A and B to their respective memory buffers
		ret = clEnqueueWriteBuffer(command_queue,memobjA, CL_TRUE, 0, width * height * sizeof(float), A, 0, NULL, NULL);;
		ret = clEnqueueWriteBuffer(command_queue, rowA, CL_TRUE, 0, sizeof(int), &row, 0, NULL, NULL);
		ret = clEnqueueWriteBuffer(command_queue, colC, CL_TRUE, 0, sizeof(int), &col, 0, NULL, NULL);	
		/* Set OpenCL Kernel Arguments */
		ret = clSetKernelArg(kernelVectMult, 0, sizeof(cl_mem), (void *)&memobjA);
		ret = clSetKernelArg(kernelVectMult, 1, sizeof(cl_mem), (void *)&memobjB);
		ret = clSetKernelArg(kernelVectMult, 2, sizeof(int), (void *)&row);
		ret = clSetKernelArg(kernelVectMult, 3, sizeof(int), (void *)&col);
		/* Execute OpenCL Kernel */
		size_t globalThreads[2] = {width, height};
		size_t localThreads[2] = {1,1};
		clEnqueueNDRangeKernel(command_queue, kernelVectMult, 2, NULL, globalThreads, localThreads, NULL, 0, NULL);
		/* Copy results from the memory buffer */
		ret = clEnqueueReadBuffer(command_queue, memobjB, CL_TRUE, 0, width * height * sizeof(float),B, 0, NULL, NULL);
		
		printf("Matrix Mult Result  %i\n|",r);		
		for(i = 0;i < width; i++) {
			for(j=0;j<height;j++) {
			    printf("%f|",*(B+i*height+j));
			}
			printf("\n");
		}

		// Copy the lists A and B to their respective memory buffers
		ret = clEnqueueWriteBuffer(command_queue,memobjA, CL_TRUE, 0, width * height * sizeof(float), A, 0, NULL, NULL);
		ret = clEnqueueWriteBuffer(command_queue, memobjB, CL_TRUE, 0, width * height * sizeof(float), B, 0, NULL, NULL);
		ret = clEnqueueWriteBuffer(command_queue, rowA, CL_TRUE, 0, sizeof(int), &row, 0, NULL, NULL);
		ret = clEnqueueWriteBuffer(command_queue, colC, CL_TRUE, 0, sizeof(int), &col, 0, NULL, NULL);	
		/* Set OpenCL Kernel Arguments */
		ret = clSetKernelArg(kernelMatrixMult, 0, sizeof(cl_mem), (void *)&memobjA);
		ret = clSetKernelArg(kernelMatrixMult, 1, sizeof(cl_mem), (void *)&memobjB);
		ret = clSetKernelArg(kernelMatrixMult, 2, sizeof(cl_mem), (void *)&memobjC);
		ret = clSetKernelArg(kernelMatrixMult, 3, sizeof(int), (void *)&row);
		ret = clSetKernelArg(kernelMatrixMult, 4, sizeof(int), (void *)&col);
		/* Execute OpenCL Kernel */
		clEnqueueNDRangeKernel(command_queue, kernelMatrixMult, 2, NULL, globalThreads, localThreads, NULL, 0, NULL);
		/* Copy results from the memory buffer */
		ret = clEnqueueReadBuffer(command_queue, memobjC, CL_TRUE, 0, width * height * sizeof(float), C, 0, NULL, NULL);

		printf("Matrix Result GPU - multiplication %i\n|",r);
		for(i = 0;i < width; i++) {
			for(j=0;j<height;j++) {
			    printf("%f|",*(C + i*height + j));
			}
			printf("\n");
		}

		// Copy the lists A and B to their respective memory buffers
		ret = clEnqueueWriteBuffer(command_queue,memobjC, CL_TRUE, 0, width * height * sizeof(float), C, 0, NULL, NULL);
		ret = clEnqueueWriteBuffer(command_queue, rowA, CL_TRUE, 0, sizeof(int), &row, 0, NULL, NULL);	
		/* Set OpenCL Kernel Arguments */
		ret = clSetKernelArg(kernelVectSred, 0, sizeof(cl_mem), (void *)&memobjC);
		ret = clSetKernelArg(kernelVectSred, 1, sizeof(cl_mem), (void *)&memobjRes);
		ret = clSetKernelArg(kernelVectSred, 2, sizeof(int), (void *)&row);		
		/* Execute OpenCL Kernel */
		size_t global_item_size = 4;
		size_t local_item_size = 1;
		/* Execute OpenCL kernel as data parallel */
		ret = clEnqueueNDRangeKernel(command_queue, kernelVectSred, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
		//ret =  clEnqueueNDRangeKernel(command_queue, kernelVectSred, 2, NULL, globalThreads, localThreads, NULL, 0, NULL);
		/* Copy results from the memory buffer */
		ret = clEnqueueReadBuffer(command_queue, memobjRes, CL_TRUE, 0, width * sizeof(float),Res, 0, NULL, NULL);

		printf("Matrix Result GPU - Res vector %i\n|",r);
		for(i = 0;i < width; i++) {
			printf("%f|",*(Res+i));
		}
		printf("\n");

		shellsort(Res,width);
		
		printf("Matrix Sorted %i\n|",r);

		for(i = 0;i < width; i++) {
			printf("%f|",*(Res+i));
		}
		printf("\n");

	}
 	printf("\nStopt\n");
        gettimeofday(&T2, NULL);
    	delta_ms =  1000*(T2.tv_sec - T1.tv_sec) + (T2.tv_usec - T1.tv_usec)/1000;
    	printf("\nN=%d. Milliseconds passed: %ld\n", N, delta_ms);

	ret = clFlush(command_queue);
	ret = clFinish(command_queue);
	ret = clReleaseKernel(kernelVectMult);
	ret = clReleaseKernel(kernelMatrixMult);
	ret = clReleaseKernel(kernelVectSred);
	ret = clReleaseProgram(program);
	ret = clReleaseMemObject(memobjA);
	ret = clReleaseMemObject(memobjB);
	ret = clReleaseMemObject(memobjC);
	ret = clReleaseCommandQueue(command_queue);
	ret = clReleaseContext(context);
	free(source_str);	
	return 0;
}
示例#29
0
int
main (int argc, char *argv[])
{
  size_t i,j;
  size_t n = 0;
  double mu = 0, nu = 0, nu1 = 0, nu2 = 0, sigma = 0, a = 0, b = 0, c = 0;
  double zeta = 0, sigmax = 0, sigmay = 0, rho = 0;
  double p = 0;
  double x = 0, y =0, z=0  ;
  unsigned int N = 0, t = 0, n1 = 0, n2 = 0 ;
  unsigned long int seed = 0 ;
  const char * name ;
  gsl_rng * r ;

  if (argc < 4) 
    {
      printf (
"Usage: gsl-randist seed n DIST param1 param2 ...\n"
"Generates n samples from the distribution DIST with parameters param1,\n"
"param2, etc. Valid distributions are,\n"
"\n"
"  beta\n"
"  binomial\n"
"  bivariate-gaussian\n"
"  cauchy\n"
"  chisq\n"
"  dir-2d\n"
"  dir-3d\n"
"  dir-nd\n"
"  erlang\n"
"  exponential\n"
"  exppow\n"
"  fdist\n"
"  flat\n"
"  gamma\n"
"  gaussian-tail\n"
"  gaussian\n"
"  geometric\n"
"  gumbel1\n"
"  gumbel2\n"
"  hypergeometric\n"
"  laplace\n"
"  landau\n"
"  levy\n"
"  levy-skew\n"
"  logarithmic\n"
"  logistic\n"
"  lognormal\n"
"  negative-binomial\n"
"  pareto\n"
"  pascal\n"
"  poisson\n"
"  rayleigh-tail\n"
"  rayleigh\n"
"  tdist\n"
"  ugaussian-tail\n"
"  ugaussian\n"
"  weibull\n") ;
      exit (0);
    }

  argv++ ; seed = atol (argv[0]); argc-- ;
  argv++ ; n = atol (argv[0]); argc-- ;
  argv++ ; name = argv[0] ; argc-- ; argc-- ;

  gsl_rng_env_setup() ;

  if (gsl_rng_default_seed != 0) {
    fprintf(stderr, 
            "overriding GSL_RNG_SEED with command line value, seed = %ld\n", 
            seed) ;
  }
  
  gsl_rng_default_seed = seed ;

  r = gsl_rng_alloc(gsl_rng_default) ;


#define NAME(x) !strcmp(name,(x))
#define OUTPUT(x) for (i = 0; i < n; i++) { printf("%g\n", (x)) ; }
#define OUTPUT1(a,x) for(i = 0; i < n; i++) { a ; printf("%g\n", x) ; }
#define OUTPUT2(a,x,y) for(i = 0; i < n; i++) { a ; printf("%g %g\n", x, y) ; }
#define OUTPUT3(a,x,y,z) for(i = 0; i < n; i++) { a ; printf("%g %g %g\n", x, y, z) ; }
#define INT_OUTPUT(x) for (i = 0; i < n; i++) { printf("%d\n", (x)) ; }
#define ARGS(x,y) if (argc != x) error(y) ;
#define DBL_ARG(x) if (argc) { x=atof((++argv)[0]);argc--;} else {error( #x);};
#define INT_ARG(x) if (argc) { x=atoi((++argv)[0]);argc--;} else {error( #x);};

  if (NAME("bernoulli"))
    {
      ARGS(1, "p = probability of success");
      DBL_ARG(p)
      INT_OUTPUT(gsl_ran_bernoulli (r, p));
    }
  else if (NAME("beta"))
    {
      ARGS(2, "a,b = shape parameters");
      DBL_ARG(a)
      DBL_ARG(b)
      OUTPUT(gsl_ran_beta (r, a, b));
    }
  else if (NAME("binomial"))
    {
      ARGS(2, "p = probability, N = number of trials");
      DBL_ARG(p)
      INT_ARG(N)
      INT_OUTPUT(gsl_ran_binomial (r, p, N));
    }
  else if (NAME("cauchy"))
    {
      ARGS(1, "a = scale parameter");
      DBL_ARG(a)
      OUTPUT(gsl_ran_cauchy (r, a));
    }
  else if (NAME("chisq"))
    {
      ARGS(1, "nu = degrees of freedom");
      DBL_ARG(nu)
      OUTPUT(gsl_ran_chisq (r, nu));
    }
  else if (NAME("erlang"))
    {
      ARGS(2, "a = scale parameter, b = order");
      DBL_ARG(a)
      DBL_ARG(b)
      OUTPUT(gsl_ran_erlang (r, a, b));
    }
  else if (NAME("exponential"))
    {
      ARGS(1, "mu = mean value");
      DBL_ARG(mu) ;
      OUTPUT(gsl_ran_exponential (r, mu));
    }
  else if (NAME("exppow"))
    {
      ARGS(2, "a = scale parameter, b = power (1=exponential, 2=gaussian)");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_exppow (r, a, b));
    }
  else if (NAME("fdist"))
    {
      ARGS(2, "nu1, nu2 = degrees of freedom parameters");
      DBL_ARG(nu1) ;
      DBL_ARG(nu2) ;
      OUTPUT(gsl_ran_fdist (r, nu1, nu2));
    }
  else if (NAME("flat"))
    {
      ARGS(2, "a = lower limit, b = upper limit");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_flat (r, a, b));
    }
  else if (NAME("gamma"))
    {
      ARGS(2, "a = order, b = scale");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_gamma (r, a, b));
    }
  else if (NAME("gaussian"))
    {
      ARGS(1, "sigma = standard deviation");
      DBL_ARG(sigma) ;
      OUTPUT(gsl_ran_gaussian (r, sigma));
    }
  else if (NAME("gaussian-tail"))
    {
      ARGS(2, "a = lower limit, sigma = standard deviation");
      DBL_ARG(a) ;
      DBL_ARG(sigma) ;
      OUTPUT(gsl_ran_gaussian_tail (r, a, sigma));
    }
  else if (NAME("ugaussian"))
    {
      ARGS(0, "unit gaussian, no parameters required");
      OUTPUT(gsl_ran_ugaussian (r));
    }
  else if (NAME("ugaussian-tail"))
    {
      ARGS(1, "a = lower limit");
      DBL_ARG(a) ;
      OUTPUT(gsl_ran_ugaussian_tail (r, a));
    }
  else if (NAME("bivariate-gaussian"))
    {
      ARGS(3, "sigmax = x std.dev., sigmay = y std.dev., rho = correlation");
      DBL_ARG(sigmax) ;
      DBL_ARG(sigmay) ;
      DBL_ARG(rho) ;
      OUTPUT2(gsl_ran_bivariate_gaussian (r, sigmax, sigmay, rho, &x, &y), 
              x, y);
    }
  else if (NAME("dir-2d"))
    {
      OUTPUT2(gsl_ran_dir_2d (r, &x, &y), x, y);
    }
  else if (NAME("dir-3d"))
    {
      OUTPUT3(gsl_ran_dir_3d (r, &x, &y, &z), x, y, z);
    }
  else if (NAME("dir-nd"))
    {
      double *xarr;  
      ARGS(1, "n1 = number of dimensions of hypersphere"); 
      INT_ARG(n1) ;
      xarr = (double *)malloc(n1*sizeof(double));

      for(i = 0; i < n; i++) { 
        gsl_ran_dir_nd (r, n1, xarr) ; 
        for (j = 0; j < n1; j++) { 
          if (j) putchar(' '); 
          printf("%g", xarr[j]) ; 
        } 
        putchar('\n'); 
      } ;

      free(xarr);
    }  
  else if (NAME("geometric"))
    {
      ARGS(1, "p = bernoulli trial probability of success");
      DBL_ARG(p) ;
      INT_OUTPUT(gsl_ran_geometric (r, p));
    }
  else if (NAME("gumbel1"))
    {
      ARGS(2, "a = order, b = scale parameter");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_gumbel1 (r, a, b));
    }
  else if (NAME("gumbel2"))
    {
      ARGS(2, "a = order, b = scale parameter");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_gumbel2 (r, a, b));
    }
  else if (NAME("hypergeometric"))
    {
      ARGS(3, "n1 = tagged population, n2 = untagged population, t = number of trials");
      INT_ARG(n1) ;
      INT_ARG(n2) ;
      INT_ARG(t) ;
      INT_OUTPUT(gsl_ran_hypergeometric (r, n1, n2, t));
    }
  else if (NAME("laplace"))
    {
      ARGS(1, "a = scale parameter");
      DBL_ARG(a) ;
      OUTPUT(gsl_ran_laplace (r, a));
    }
  else if (NAME("landau"))
    {
      ARGS(0, "no arguments required");
      OUTPUT(gsl_ran_landau (r));
    }
  else if (NAME("levy"))
    {
      ARGS(2, "c = scale, a = power (1=cauchy, 2=gaussian)");
      DBL_ARG(c) ;
      DBL_ARG(a) ;
      OUTPUT(gsl_ran_levy (r, c, a));
    }
  else if (NAME("levy-skew"))
    {
      ARGS(3, "c = scale, a = power (1=cauchy, 2=gaussian), b = skew");
      DBL_ARG(c) ;
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_levy_skew (r, c, a, b));
    }
  else if (NAME("logarithmic"))
    {
      ARGS(1, "p = probability");
      DBL_ARG(p) ;
      INT_OUTPUT(gsl_ran_logarithmic (r, p));
    }
  else if (NAME("logistic"))
    {
      ARGS(1, "a = scale parameter");
      DBL_ARG(a) ;
      OUTPUT(gsl_ran_logistic (r, a));
    }
  else if (NAME("lognormal"))
    {
      ARGS(2, "zeta = location parameter, sigma = scale parameter");
      DBL_ARG(zeta) ;
      DBL_ARG(sigma) ;
      OUTPUT(gsl_ran_lognormal (r, zeta, sigma));
    }
  else if (NAME("negative-binomial"))
    {
      ARGS(2, "p = probability, a = order");
      DBL_ARG(p) ;
      DBL_ARG(a) ;
      INT_OUTPUT(gsl_ran_negative_binomial (r, p, a));
    }
  else if (NAME("pareto"))
    {
      ARGS(2, "a = power, b = scale parameter");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_pareto (r, a, b));
    }
  else if (NAME("pascal"))
    {
      ARGS(2, "p = probability, n = order (integer)");
      DBL_ARG(p) ;
      INT_ARG(N) ;
      INT_OUTPUT(gsl_ran_pascal (r, p, N));
    }
  else if (NAME("poisson"))
    {
      ARGS(1, "mu = scale parameter");
      DBL_ARG(mu) ;
      INT_OUTPUT(gsl_ran_poisson (r, mu));
    }
  else if (NAME("rayleigh"))
    {
      ARGS(1, "sigma = scale parameter");
      DBL_ARG(sigma) ;
      OUTPUT(gsl_ran_rayleigh (r, sigma));
    }
  else if (NAME("rayleigh-tail"))
    {
      ARGS(2, "a = lower limit, sigma = scale parameter");
      DBL_ARG(a) ;
      DBL_ARG(sigma) ;
      OUTPUT(gsl_ran_rayleigh_tail (r, a, sigma));
    }
  else if (NAME("tdist"))
    {
      ARGS(1, "nu = degrees of freedom");
      DBL_ARG(nu) ;
      OUTPUT(gsl_ran_tdist (r, nu));
    }
  else if (NAME("weibull"))
    {
      ARGS(2, "a = scale parameter, b = exponent");
      DBL_ARG(a) ;
      DBL_ARG(b) ;
      OUTPUT(gsl_ran_weibull (r, a, b));
    }
  else
    {
      fprintf(stderr,"Error: unrecognized distribution: %s\n", name) ;
    }

  return 0 ;
}
示例#30
0
int
main (int argc, char *argv[])
{
  double N1, N2, Nanc, NancLower, *uniqTauArray = NULL, *taxonTauArray = NULL,
         *descendant1ThetaArray = NULL, *descendant2ThetaArray = NULL,
         *ancestralThetaArray = NULL, spTheta, thetaMean, tauequalizer, gaussTime = 0.0,
         mig, rec, BottStr1, BottStr2, BottleTime;
  double *recTbl;
  int tauClass, *PSIarray = NULL, i;
  unsigned int numTauClasses = -1, u, locus, taxonID, zzz;
  unsigned long randSeed;
  unsigned long long rep;
  extern const gsl_rng *gBaseRand;
  int comp_nums (const void *, const void *);

  int b_constrain = 0;
  int *subParamConstrainConfig = NULL;

#ifndef HOMOGENEOUS_MUT
  double *mutScalerTbl;
#endif

  /* set up gParam and gMutParam, as well as gConParam if constrain */
  LoadConfiguration (argc, argv);

  /* set the lower Nanc */
  /* NancLower = 0.00001 * gParam.lowerTheta; */
  /* if (NancLower < 0.00000000004) { /1* 4 * (mu=10^(-11)) * (Ne=1) *1/ */
  /*   NancLower = 0.00000000004; */
  /* } */

  /* set b_constrain to 1 if constrain */
  if (gParam.constrain > 0)
    {
      //initialize constrain indicator
      b_constrain = 1;

      //initialize subParamConstrainConfig array
      subParamConstrainConfig = calloc (NUMBER_OF_CONPARAM, sizeof (int));
      if (subParamConstrainConfig == NULL)
	{
	  fprintf (stderr,
		   "ERROR: Not enough memory for subParamConstrainConfig\n");
	  exit (EXIT_FAILURE);
	}

      for (i = 0; i < strlen (gParam.subParamConstrain); i++)
	{
	  char a = (gParam.subParamConstrain)[i];

	  if (a == '1')
	    subParamConstrainConfig[i] = 1;
	  else if (a == '0')
	    subParamConstrainConfig[i] = 0;
	  else {
	    fprintf(stderr, "ERROR: subParamConstrain string in the config file"
		    "should be either 0 or 1\n");
	    exit (EXIT_FAILURE);
	  }
	}
    }

  /* for initiating the gsl random number generator */
  /* initialize PRNG */
  srand (gParam.prngSeed);	/* Better way of seeding here ? */
  randSeed = rand ();
  if (debug_level > 0)
    randSeed = 1;

  gBaseRand = gsl_rng_alloc (gsl_rng_mt19937);	/* set the base PRNG to
						   Mersenne Twister */
  gsl_rng_set (gBaseRand, randSeed);	/* seed the PRNG */

  /* print out all of the parameters */
  if(gParam.printConf) {
    PrintParam(stdout);
    exit (0);
  }

  /* set up arrays */
  /* Sizes are set to the number of taxon pairs (Max number of tau's) */
  if ((b_constrain == 1) && (subParamConstrainConfig[0] == 1)) {
    uniqTauArray = calloc (gParam.numTaxonLocusPairs, sizeof (double));
    PSIarray = calloc (gParam.numTaxonLocusPairs, sizeof (int));
    taxonTauArray = calloc(gParam.numTaxonLocusPairs, sizeof (double));
  } else {
    uniqTauArray = calloc (gParam.numTaxonPairs, sizeof (double));
    PSIarray = calloc (gParam.numTaxonPairs, sizeof (int));
    taxonTauArray = calloc(gParam.numTaxonPairs, sizeof (double));
  }
  descendant1ThetaArray = calloc (gParam.numTaxonPairs, sizeof (double));
  descendant2ThetaArray = calloc (gParam.numTaxonPairs, sizeof (double));
  ancestralThetaArray = calloc (gParam.numTaxonPairs, sizeof (double));

  recTbl = calloc (gParam.numLoci, sizeof (double));

  if (uniqTauArray == NULL || PSIarray == NULL || recTbl == NULL ||
          taxonTauArray == NULL || descendant1ThetaArray == NULL ||
          descendant2ThetaArray == NULL || ancestralThetaArray == NULL)
    {
      fprintf (stderr, "ERROR: Not enough memory for uniqTauArray, PSIarray, or recTbl\n");
      exit (EXIT_FAILURE);
    }

  /* deal with num tau classes */
  if (b_constrain == 0 || subParamConstrainConfig[0] != 1)
    {
      /* fixed numTauClasses configuration */
      if (gParam.numTauClasses != 0)
	{
	  if (gParam.numTauClasses > gParam.numTaxonPairs)
	    {
	      fprintf (stderr, "WARN: numTauClasses (%u) is larger than "
		       "numTaxonPairs (%u). Setting numTauClasses to %u",
		       gParam.numTauClasses, gParam.numTaxonPairs,
		       gParam.numTaxonPairs);
	      gParam.numTauClasses = gParam.numTaxonPairs;
	    }
	  numTauClasses = gParam.numTauClasses;
	}
    }  /* when tau is constrained numTauClasses are set later */

  /* deal with the case when tau is constrained */
  if ((b_constrain == 1) && (subParamConstrainConfig[0] == 1)) {
    int jj, kk;
    double *tempTauArray;
    if ((tempTauArray = calloc(gParam.numTaxonLocusPairs, sizeof(double))) 
	== NULL) {
      fprintf (stderr, "ERROR: Not enough memory for tempTauArray\n");
      exit (EXIT_FAILURE);
    }
    for (jj = 0; jj < gParam.numTaxonLocusPairs; jj++) {
      tempTauArray[jj] = (gConParam.conData[jj]).conTau;
    }
    numTauClasses = UniqueDouble(tempTauArray, uniqTauArray, 
			   gParam.numTaxonLocusPairs, DBL_EPSILON);
    
    if (gParam.numTauClasses != numTauClasses) {
      fprintf (stderr, "WARN: tau's are constrained and found %u different "
	       "classes in the constrain table. But numTauClasses = %u was set."
	       " Using the value found in the constrain table.\n", numTauClasses,
	       gParam.numTauClasses);
      gParam.numTauClasses = numTauClasses;
    } 
    
    /* count tau's to create PSIarray */
    for (jj = 0; jj < gParam.numTaxonLocusPairs; jj++) {
      PSIarray[jj] = 0;
    }
    for (jj = 0; jj < gParam.numTaxonLocusPairs; jj++) {
      for (kk = 0; kk < numTauClasses; kk++) {
	/* there shouldn't be fabs() below */
	if (tempTauArray[jj] - uniqTauArray[kk] < DBL_EPSILON) {
	  PSIarray[kk]++;
	  break;
	}
      }
    }
    free (tempTauArray);
  }

#ifndef HOMOGENEOUS_MUT
  if ((mutScalerTbl = calloc(gParam.numLoci, sizeof(double))) == NULL) {
    fprintf (stderr, "ERROR: Not enough memory for mutScalerTbl\n");
    exit(EXIT_FAILURE);
  }
#endif

  thetaMean = 1.0;
  if (gParam.timeInSubsPerSite == 0) {
    thetaMean = (gParam.lowerTheta + gParam.upperTheta) / 2.0;
  }

  /* Beginning of the main loop */
  for (rep = 0; rep < gParam.reps; rep++)
    {
      int lociTaxonPairIDcntr = 1;
      /*
       * Each taxon pair was separated at a time tau in the past.  Of
       * all pairs, some of them may have been separated at the same
       * time.  numTauClasses is the number of classes with different
       * divergence time.
       *
       * If gParam.numTauClasses is not set, we are sampling
       * numTauClasses from a uniform prior dist'n.
       */
      if (gParam.numTauClasses == 0)
	{			/* numTauClasses is NOT fixed */
	  numTauClasses =
	    1 + gsl_rng_uniform_int (gBaseRand, gParam.numTaxonPairs);
	}
      
      /* create the recombination rate table for each gene */
      rec = gsl_ran_flat (gBaseRand, 0.0, gParam.upperRec);
      for (u=0; u < gParam.numLoci; u++)
	{
	  /* all loci shares same recombination rate */
	  recTbl[u] = rec;
	  /* each locus has different recomb. rate 
	     recTbl[u] = gsl_ran_flat (gBaseRand, 0.0, gParam.upperRec);
	  */
	}
      
#ifndef HOMOGENEOUS_MUT
      /* create regional heterogeneity in the mutation rate */
      if (gParam.numLoci > 1) {
	double shape, scale;
	
	/* arbitrary sample the shape parameter from uniform dist'n */
	shape = gsl_ran_flat(gBaseRand, 1.0, 20);
	/* shape = 1 is exponential with lambda=1, 
	   larger shape -> normal dist'n with smaller var */
	scale = 1/shape; /* E[x] = 1, Var[x] = shape * scale^2 = 1/shape */
	
	/* use gamma */
	for (u=0; u < gParam.numLoci; u++) {
	  mutScalerTbl[u] = gsl_ran_gamma(gBaseRand, shape, scale);
	}
      } else {
	mutScalerTbl[0] = 1.0;
      }
#endif

      // Randomly generate TauArray only when NOT constrain
      if ((b_constrain == 0) || (subParamConstrainConfig[0] != 1))
	{
	  int counter;
	  /* sample tau's from uniform prior dist'n */
	  for (u = 0; u < numTauClasses; u++)
// JRO - modified - 11/17/2011
//	    uniqTauArray[u] = gsl_ran_flat (gBaseRand, 0.0, gParam.upperTau);
	    uniqTauArray[u] = gsl_ran_flat (gBaseRand, gParam.lowerTau,
	                                    gParam.upperTau);

          qsort(uniqTauArray, numTauClasses, sizeof(double),comp_nums);

          for (counter = 0; counter < numTauClasses; counter++) 
	    {
	      taxonTauArray[counter] = uniqTauArray[counter];
	      PSIarray[counter] = 1;
	    }

          for (counter = numTauClasses; 
	       counter < gParam.numTaxonPairs; counter++)
	    {
	      tauClass = gsl_rng_uniform_int(gBaseRand, numTauClasses);
	      taxonTauArray[counter] = uniqTauArray[tauClass];
	      PSIarray[tauClass] = PSIarray[tauClass] + 1;
	    }

	  /* randomly shuflling the order of taxonTauArray */
	  gsl_ran_shuffle(gBaseRand, taxonTauArray, 
			  gParam.numTaxonPairs, sizeof (double));
	}
      
      for (taxonID = 0; taxonID < gParam.numTaxonPairs; taxonID++)
	{
	  //Check upperAncPopSize before doing anything
	  /* ancestral population size prior */
	  if (gParam.upperAncPopSize < gParam.lowerTheta)
	    {
	      fprintf (stderr,
		       "The upper bound (%lf * %lf) of ancestral pop. size is "
		       "smaller than the lower bound (%lf)\n",
		       gParam.upperAncPopSize, gParam.upperTheta, gParam.lowerTheta);
	      exit (EXIT_FAILURE);
	    }

	  constrainedParameter conTaxonPairDat;

	  /* Population sizes during the bottleneck after the divergence of 2 
	     pops. This is same as the population sizes, immediately after the 
	     divergence/separation of the 2 pops. These are relative sizes. */
	  BottStr1 = gsl_ran_flat (gBaseRand, 0.01, 1.0);
	  BottStr2 = gsl_ran_flat (gBaseRand, 0.01, 1.0);

	  /* After the populations diverge, they experience pop. bottleneck.
	     Then the population size exponentially grows until current size.
	     BottleTime indicate the time when population started to grow.  
	     BottleTime of 1 means, populations start to expand immediately
	     after divergence. Closer to 0 means, populations hasn't started
	     to expand until very recently.  */
	  BottleTime = gsl_ran_flat (gBaseRand, 0.000001, 1.0);

	  /* migration rate prior */
	  mig = gsl_ran_flat (gBaseRand, 0.0, gParam.upperMig);
	  /* spTheta prior */
	  while ((spTheta = gsl_ran_flat (gBaseRand, gParam.lowerTheta,
					  gParam.upperTheta)) <= 0);

	  /* The ratio of current population sizes.  The populations
	     exponentially grow to these sizes after bottkleneck is done. */
	  /* both ends excluded for symmetry */
	  while ((N1 = gsl_ran_flat (gBaseRand, 0.01, 1.99)) == 0.01)
	    ;
	  
	  N2 = 2.0 - N1;

	  /* The upper limit of ancestral theta is defined by the product
	     of upper Theta (e.g. 40) and upper AncPopSize (e.g. 0.5) */
	  /* JRO - changing the following hard coded lower limit on ancestral
	     theta to the lower limit specified by user */
	  /* Nanc = gsl_ran_flat (gBaseRand, 0.01,
			       gParam.upperAncPopSize * gParam.upperTheta);*/
	  Nanc = gsl_ran_flat (gBaseRand, gParam.lowerTheta,
			       gParam.upperAncPopSize * gParam.upperTheta);

      descendant1ThetaArray[taxonID] = spTheta * N1;
      descendant2ThetaArray[taxonID] = spTheta * N2;
      ancestralThetaArray[taxonID] = Nanc;
	  
	  /* pick a tau for every taxon-pair with replacement from the
	     array of X taxon-pairs, where X is a uniform discrete RV
	     from 1 to number of taxon-pairs */
	  if ((b_constrain == 0) || (subParamConstrainConfig[0] != 1))
	    {
	      gaussTime = taxonTauArray[taxonID];
	    }

	  /* use the following if simulating a particular fixed history */
	  /* gaussTime = uniqTauArray[taxonID]; */
	  
	  /* print out the results by going through each locus */
	  for (locus = 0; locus < gParam.numLoci; locus++)
	    {
	      double locTheta, thisNanc, scaledGaussTime, scaledBottleTime;
	      /* check if this locus exist for this taxon pair */
	      /* this table contains 0-offset index for corresponding 
		 taxon:locus mutPara */
	      int mpIndex = gMutParam.locTbl->tbl[taxonID][locus];
	      
	      if(mpIndex<0) { /* this taxon:locus is not in the data */
		continue;
	      }

	      if (b_constrain == 1)
		{  /* If constrained, override with the fixed paras */
		  /* This part is not debugged well 2/14/2008, Naoki */
		  int mpIndex = gMutParam.locTbl->tbl[taxonID][locus];
		  conTaxonPairDat = gConParam.conData[mpIndex];

		  /* tau */
		  /* This allow that tau could differ between loci
		     within a single taxon pair */
		  if (subParamConstrainConfig[0] == 1)
		    gaussTime = conTaxonPairDat.conTau;

		  /** bottleneck priors **/
		  /* severity of bottle neck (how small the pop become) */
		  /* these should be [0,1] */
		  if (subParamConstrainConfig[1] == 1)
		    BottStr1 = conTaxonPairDat.conBottPop1;
		  if (subParamConstrainConfig[2] == 1)
		    BottStr2 = conTaxonPairDat.conBottPop2;
		  
		  /* timing of bottle neck */
		  /* should be [0,1] */
		  if (subParamConstrainConfig[3] == 1)
		    BottleTime = conTaxonPairDat.conBottleTime;
		  
		  /* migration rate prior */
		  if (subParamConstrainConfig[4] == 1)
		    mig = conTaxonPairDat.conMig;
		  
		  /* theta per site */
		  if (subParamConstrainConfig[5] == 1)
		    spTheta = conTaxonPairDat.conTheta;
		  
		  /* population sizes immediately after the separation, and 
		     what it grows to after the bottleneck (today) */
		  /* (0.01, 1.99) */
		  if (subParamConstrainConfig[6] == 1) {
		    N1 = conTaxonPairDat.conN1;
		    N2 = 2.0 - N1;
		  }
		  
		  /* The upper limit of ancestral theta is defined by the 
		     product of upper Theta (e.g. 40) and upper 
		     AncPopSize (e.g. 0.5), then converted to relative size 
		     to spTheta */
		  if (subParamConstrainConfig[7] == 1)
		    Nanc = conTaxonPairDat.conNanc * gParam.upperTheta;
		  
		  /* recombination rate per neighboring site */
		  if (subParamConstrainConfig[8] == 1)
		    recTbl[locus] = conTaxonPairDat.conRec;
		}  /* end of constrai */

	      /* access sample sizes, mutational model for this taxon:locus */
	      mutParameter taxonPairDat;
	      taxonPairDat = gMutParam.data[mpIndex];
	      
	      /* scale the theta for each locus */
	      /* Note that species wide theta (represents pop size) is 
	         4 Ne mu with mu per site, not per gene.
		 Assumes mu is constant.  This may be a problem with
	         mitochondoria */
	      locTheta = spTheta * taxonPairDat.seqLen * 
		taxonPairDat.NScaler * taxonPairDat.mutScaler;
#ifndef HOMOGENEOUS_MUT
	      locTheta *=  mutScalerTbl[locus];
#endif

	      /* thisNanc is basically a random deviate from a uniform dist'n:
		 [gParam.lowerTheta / spTheta, 
		   gParam.upperAncPopSize * gParam.upperTheta/spTheta) 
		 For example, if upperTheta = 10 & upperAncPopSize = 0.5,
		 upperAncTheta become 10 * 0.5 = 5.
		 msDQH specify the past population sizes in terms of the 
		 ratio of N_anc / N_theta, so the following division
		 by locTheta is required.
	      */
	      /* thisNanc = Nanc * taxonPairDat.seqLen / locTheta; */
	      thisNanc = Nanc / spTheta; /* this can be done outside of locus loop */

	      /* this scaling is done inside of locus loop to accomodate 
		 the gamma dist'n of mut rate for each locus */

	      /* tauequalizer = gParam.upperTheta / */ 
		/* 2 / (spTheta * taxonPairDat.NScaler); */
          tauequalizer = thetaMean / (spTheta * taxonPairDat.NScaler);
	      /* WORK, CONFIRM THIS. Naoki Nov 2, 2009.  IT USED TO BE
		 tauequalizer = gParam.upperTheta * taxonPairDat.seqLen / 
		 2 / locTheta;

	      */

	      /* Division by 2 is coming from N1 + N2 = 2.
		 We are considering that N_0 in theta_0 (=4 N_0 mu) specified 
		 for -t option (we use -t locTheta) of msDQH is equal to 
		 (N1+N2)/2 */

	      scaledGaussTime = gaussTime * tauequalizer;
	      /* 1 unit of tau (gaussTime) = 2 N_max (N_max is the 
		 N assumed in upperTheta) */
	      /* I think we should get rid of /2 from tauequalizer */

          /* JRO: Yes the following is weird and the threshold of 0.0001
           * coalescent units can actually be thousands of generations which
           * is not trivial. Also, the hack to avoid unrealistic growth rates
           * is the wrong approach. If the div time is essentially zero, then
           * there should simply be no bottleneck. Updating to make the
           * threshold smaller, and simply preventing a bottleneck if the
           * div time is smaller.*/
	      /* The following if is a little weird */
	      /* if (scaledGaussTime < 0.0001) { */
		/* scaledGaussTime  = 0.0001; */
		/* scaledBottleTime = 0.00005; */
	      /* } else { */
		/* scaledBottleTime = BottleTime * 0.95 * scaledGaussTime; */
	      /* } */
            if (scaledGaussTime < 0.000001) {
                // no bottleneck if div time is essentially zero
                BottStr1 = 1.0;
                BottStr2 = 1.0;
            }
            scaledBottleTime = BottleTime * 0.95 * scaledGaussTime;
	      
	      if (debug_level)
		fprintf (stderr, 
			 "DEBUG: scaled BottleTime:%lf\tgaussTime:%lf\n",
			 scaledBottleTime, scaledGaussTime);

	      /* We can send some extra info to msbayes.pl here */
	      printf ("%u %u %u ", lociTaxonPairIDcntr, taxonID+1, locus+1);
	      lociTaxonPairIDcntr ++; /* seriral id: 1 to # taxon:locus pairs */
	      printf ("%.17lf %.17lf %.17lf %.17lf ",
		      locTheta, scaledGaussTime, mig, 
		      recTbl[locus] * (taxonPairDat.seqLen - 1));
	      printf ("%.17lf %.17lf %.17lf ", scaledBottleTime, 
		      BottStr1 * N1, BottStr2 * N2);
	      printf ("%u %u %u %lf %lf %lf ",
		      taxonPairDat.numPerTaxa,
		      taxonPairDat.sample[0], taxonPairDat.sample[1],
		      taxonPairDat.tstv[0], taxonPairDat.tstv[1],
		      taxonPairDat.gamma);
	      printf ("%u %.17lf %.17lf %.17lf ",
		      taxonPairDat.seqLen, N1, N2, thisNanc);
	      printf ("%lf %lf %lf %lf\n",
		      taxonPairDat.freqA, taxonPairDat.freqC,
		      taxonPairDat.freqG, taxonPairDat.freqT);

	      /* These feed into the system command line (msDQH) within
	         the perl shell msbayes.  Some of these are used directly
	         by msDQH, but some are also passed on to the sumstats
	         programs via the msDQH commabnd line, .... like bp[taxonID],
	         theta, gaussTime, NumPerTax[taxonID], yy, */
	    }
	}

      /* The followings are used to calculate prior, processed in msbayes.pl */
      printf ("# TAU_PSI_TBL setting: %d realizedNumTauClasses: %u", 
	      gParam.numTauClasses, numTauClasses);
      printf(" tauTbl:");
      for (zzz = 0; zzz < gParam.numTaxonPairs; zzz++)
          printf (",%.11lf", taxonTauArray[zzz]);
      printf(" d1ThetaTbl:");
      for (zzz = 0; zzz < gParam.numTaxonPairs; zzz++)
          printf (",%lf", descendant1ThetaArray[zzz]);
      printf(" d2ThetaTbl:");
      for (zzz = 0; zzz < gParam.numTaxonPairs; zzz++)
          printf (",%lf", descendant2ThetaArray[zzz]);
      printf(" aThetaTbl:");
      for (zzz = 0; zzz < gParam.numTaxonPairs; zzz++)
          printf (",%lf", ancestralThetaArray[zzz]);
      printf("\n");

    }

  free (uniqTauArray);
  free (taxonTauArray);
  free (PSIarray);
  free (descendant1ThetaArray);
  free (descendant2ThetaArray);
  free (ancestralThetaArray);
  free (recTbl);
  free (subParamConstrainConfig);
  exit (0);
}