void random_initialize_model(vblda_model * model, vblda_corpus* corpus, vblda_ss* ss){ int n, j, d; double* beta = malloc(sizeof(double)*model->n); double* nu = malloc(sizeof(double)*model->n); for (n = 0; n < model->n; n++){ beta[n] = 0.0; nu[n] = 0.0; } for (j = 0; j < model->m; j++){ for (n = 0; n < model->n; n++){ nu[n] = model->nu; } gsl_ran_dirichlet (r, model->n, nu, beta); for (n = 0; n < model->n; n++){ //model->mu[j][n] = beta[n]; //model->mu[j][n] = gsl_ran_gamma(r, 100.0, 1.0/100.); model->mu[j][n] = model->nu + 1.0 + gsl_rng_uniform(r); } } free(beta); free(nu); }
/* Sample-based estimate of Normalization constant Assume the normalized distribution P(x), and a sample-generating distribution p(x), such that: P(x) = zp(x) We would like to find z. The domain of x here is the 4-simplex with volume V. Each sample point is centered around a sub-volume, where all N sub-volumes constitute V (a 'tesselation' if you like). One notional definition of "fair sample points" is that one can estimate the total mass of the density in the sub-volume as volume times density, where the estimate of the density over that subvolume is taken to be constant and having the value of the density at that point. Taking total mass to be 1, each sub-volume contributes 1/N mass. And, volume = mass/density, so by summing over sub-volumes, we have: sum_i{(1/N) / P(x_i)} = V sum_i{(1/N) / (zp(x_i)} = V (1/Nz) sum_i{1/p(x_i)} = V z = (1/NV) sum_i{1/p(x_i)} ~ (1/N) sum_i{1/p(x_i)} */ void populate_points(struct wpoint *points, unsigned npoints, double *alpha_dist, double *alpha_weights, gsl_rng *rand, const char *name, double *dist_norm) { struct wpoint *s, *se = points + npoints; /* double ln_dist_norm = NAN; */ double dn = 0; for (s = points; s != se; ++s) { gsl_ran_dirichlet(rand, NUCS, alpha_dist, s->x); s->d = gsl_ran_dirichlet_pdf(NUCS, alpha_dist, s->x); s->o = gsl_ran_dirichlet_pdf(NUCS, alpha_weights, s->x); s->ln_d = gsl_ran_dirichlet_lnpdf(NUCS, alpha_dist, s->x); s->ln_o = gsl_ran_dirichlet_lnpdf(NUCS, alpha_weights, s->x); strcpy(s->dist, name); s->num_samples = npoints; dn += 1.0 / s->d; /* ln_dist_norm = safe_sum(ln_dist_norm, -s->ln_d); */ } /* ln_dist_norm -= gsl_sf_log(npoints); */ /* *dist_norm = gsl_sf_exp(ln_dist_norm); */ dn /= (double)npoints; *dist_norm = dn; }
void test_dirichlet(void){ double alpha[SIZE] = { .4, .9, .4, .2 }; double theta[SIZE] = { 0.0, 0.0, 0.0, 0.0 }; double prob; int i; gsl_ran_dirichlet (rng, SIZE, alpha, theta); printf("gsl_ran_dirichlet\t"); print_double_array(alpha, SIZE); printf("\t"); print_double_array(theta, SIZE); printf("\n"); theta[0] = 0.107873072217; theta[1] = 0.518033738502; theta[2] = 0.220000000209; theta[3] = 0.154093189072; /* theta and alpha flipped in perl interface */ prob = gsl_ran_dirichlet_pdf (SIZE, alpha, theta); printf("gsl_ran_dirichlet_pdf\t"); print_double_array(theta, SIZE); printf("\t"); print_double_array(alpha, SIZE); printf("\t%.12f\n", prob); prob = gsl_ran_dirichlet_lnpdf (SIZE, alpha, theta); printf("gsl_ran_dirichlet_lnpdf\t"); print_double_array(theta, SIZE); printf("\t"); print_double_array(alpha, SIZE); printf("\t%.12f\n", prob); }
void rdirichlet(double alpha, int* n, int K, double* theta) { //alpha - symmetric hyperparameter //n - integer hyperparameter offsets (can be posterior counts) int k; double* a = (double*) malloc(sizeof(double)*K); for (k=0; k < K; k++) a[k] = alpha + n[k]; gsl_ran_dirichlet (RANDOM_NUMBER, (size_t)K, a, theta); free(a); }
/* DIRICHLET */ CAMLprim value ml_gsl_ran_dirichlet(value rng, value alpha, value theta) { const size_t K = Double_array_length(alpha); if(Double_array_length(theta) != K) GSL_ERROR("alpha and theta must have same size", GSL_EBADLEN); gsl_ran_dirichlet(Rng_val(rng), K, Double_array_val(alpha), Double_array_val(theta)); return Val_unit; }
/* linear interpolation: */ if (i >= PDFLEN - 1) { i = PDFLEN - 1; pdf_x = y * pdf_stdnormal[i]; } else pdf_x = y * (pdf_stdnormal[i] + (z - i * X_STEP_PDF) * (pdf_stdnormal[i + 1] - pdf_stdnormal[i]) / X_STEP_PDF); return (pdf_x); STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_density_approx */ double ighmm_rand_dirichlet(int seed, int len, double *alpha, double *theta){ if (seed != 0) { GHMM_RNG_SET(RNG, seed); } #ifdef DO_WITH_GSL gsl_ran_dirichlet(RNG, len, alpha, theta); #else printf("not implemted without gsl. Compile with gsl to use dirichlet"); #endif }
double test_dirichlet_small (void) { size_t K = 2; double alpha[2] = { 2.5e-3, 5.0e-3}; double theta[2] = { 0.0, 0.0 }; gsl_ran_dirichlet (r_global, K, alpha, theta); return theta[0]; }
double test_dirichlet (void) { /* This is a bit of a lame test, since when K=2, the Dirichlet distribution becomes a beta distribution */ size_t K = 2; double alpha[2] = { 2.5, 5.0 }; double theta[2] = { 0.0, 0.0 }; gsl_ran_dirichlet (r_global, K, alpha, theta); return theta[0]; }
void librdist_dirichlet(gsl_rng *rng, int argc, void *argv, int bufc, float *buf){ t_atom *av = (t_atom *)argv; int i, j; size_t k = argc; double alpha[argc]; double theta[argc]; for(i = 0; i < k; i++){ alpha[i] = librdist_atom_getfloat(av + i); } for(j = 0; j < floor(bufc / k); j++){ gsl_ran_dirichlet(rng, k, alpha, theta); for(i = 0; i < k; i++){ buf[i] = theta[i]; } } }
//#define SEQ_DEPTH 100//测序深度 int create_sampling(double *alpha,double *theta,int nclass) { //int n; //n=SAMPLE_CNT;//dirchlet sampling count /* set up GSL RNG */ const gsl_rng_type *T; gsl_rng *r ; gsl_rng_env_setup(); T=gsl_rng_default; r=gsl_rng_alloc(T); /* end of GSL setup */ gsl_ran_dirichlet(r,nclass,alpha,theta); gsl_rng_free(r); return(0); }
double mc_integrate(gsl_matrix *saxs_pre, gsl_vector *saxs_exp, gsl_vector *err_saxs, int k, int N) { double energy_final; double *alphas; double *samples; double *alpha_ens; size_t Ntrials = 10000; alphas = (double * ) malloc( k * sizeof( double )); samples = (double * ) malloc( k * sizeof( double )); alpha_ens = (double * ) malloc( k * sizeof( double )); const gsl_rng_type *T; gsl_rng *r; gsl_vector *weights = gsl_vector_alloc(k); gsl_vector *saxs_ens = gsl_vector_alloc(N); for (int i = 0; i<k; i++) alphas[i] = 0.5; double energy_trial=0.0; double saxs_scale = 0.0; double alpha_zero; for (int i=0; i<Ntrials; i++) { gsl_ran_dirichlet(r, k, alphas, samples); alpha_zero = 0.0; for (int j = 0; j < k; j++) { alpha_zero += samples[j]; } for( int j = 0; j< N; j++) { alpha_ens[j] = 0.0; for (int l = 0; l < k; l++) { alpha_ens[j]+=gsl_matrix_get(saxs_pre,j,l)*samples[l]; } gsl_vector_set(weights, j, alpha_ens[j]/alpha_zero); } saxs_scale = SaxsScaleMean(weights,saxs_exp,err_saxs,N); gsl_blas_dgemv(CblasNoTrans, 1.0, saxs_pre, weights, 0.0, saxs_ens); energy_trial+=ModelEvidenceEnergy(saxs_ens,saxs_exp,err_saxs,saxs_scale,N); } energy_final/=Ntrials; gsl_rng_free (r); return energy_final; }
void test_dirichlet_moments (void) { double alpha[DIRICHLET_K]; double theta[DIRICHLET_K]; double theta_sum[DIRICHLET_K]; double alpha_sum = 0.0; double mean, obs_mean, sd, sigma; int status, k, n; for (k = 0; k < DIRICHLET_K; k++) { alpha[k] = gsl_ran_exponential (r_global, 0.1); alpha_sum += alpha[k]; theta_sum[k] = 0.0; } for (n = 0; n < N; n++) { gsl_ran_dirichlet (r_global, DIRICHLET_K, alpha, theta); for (k = 0; k < DIRICHLET_K; k++) theta_sum[k] += theta[k]; } for (k = 0; k < DIRICHLET_K; k++) { mean = alpha[k] / alpha_sum; sd = sqrt ((alpha[k] * (1. - alpha[k] / alpha_sum)) / (alpha_sum * (alpha_sum + 1.))); obs_mean = theta_sum[k] / N; sigma = sqrt ((double) N) * fabs (mean - obs_mean) / sd; status = (sigma > 3.0); gsl_test (status, "test gsl_ran_dirichlet: mean (%g observed vs %g expected)", obs_mean, mean); } }
// Sample topic parameters // Function written from perspective of sampling user topics parameters // Switch roles of user-item inputs to sample item topics parameters void sampleTopicParams(const mxArray* exampsByUser, int KU, int numUsers, double alpha, double* logthetaU, uint32_t* zU){ // Array of static random number generators gsl_rng** rngs = getRngArray(); // Prior term for Dirichlet const double ratio = alpha/KU; // Allocate memory for storing topic counts double* counts[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++) counts[thread] = mxMalloc(KU*sizeof(**counts)); #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); // Initialize to prior term for(int i = 0; i < KU; i++) counts[thread][i] = ratio; // Iterate over user's examples computing sufficient stats mxArray* exampsArray = mxGetCell(exampsByUser, u); mwSize len = mxGetN(exampsArray); uint32_t* examps = (uint32_t*) mxGetData(exampsArray); for(int j = 0; j < len; j++) counts[thread][zU[examps[j]-1]-1]++; // Sample new topic parameters double* logthetaPtr = logthetaU + u*KU; gsl_ran_dirichlet(rngs[omp_get_thread_num()], KU, counts[thread], logthetaPtr); // Take logs for(int i = 0; i < KU; i++) logthetaPtr[i] = log(logthetaPtr[i]); } // Clean up for(int thread = 0; thread < MAX_NUM_THREADS; thread++) mxFree(counts[thread]); }
struct pair_comp gen_pair_comp(double *alpha, double dist, gsl_rng *rg) { struct pair_comp pc; double s1[3], min_v, max_dist = -1.0; unsigned i, min_i; double alpha_low[] = { 0.1, 0.1, 0.1, 0.1 }; if (dist == 1) { unsigned long c1 = gsl_rng_uniform_int(rg, 4); unsigned long c2 = (c1 + 1 + gsl_rng_uniform_int(rg, 3)) % 4; memcpy(pc.c1, bary_corners[c1], sizeof(pc.c1)); memcpy(pc.c2, bary_corners[c2], sizeof(pc.c2)); } else if (dist == 0) { gsl_ran_dirichlet(rg, 4, (dist > 0.9 ? alpha_low : alpha), pc.c1); memcpy(pc.c2, pc.c1, sizeof(pc.c2)); } else if (dist > 0.95) { fprintf(stderr, "Cannot handle distances not equal to 1 but > 0.95\n"); exit(1); } else { /* generate a valid c1 */ double s_corner[3]; while (max_dist < dist * (1.0 / 0.95)) { gsl_ran_dirichlet(rg, 4, (dist > 0.9 ? alpha_low : alpha), pc.c1); min_v = pc.c1[0], min_i = 0; for (i = 1; i != 4; ++i) if (min_v > pc.c1[i]) min_v = pc.c1[i], min_i = i; barycentric_to_simplex(pc.c1, s1); barycentric_to_simplex(bary_corners[min_i], s_corner); max_dist = dist3(s_corner, s1); } /* sample from the unit sphere until we find a point in the simplex */ double sph[3], s1p[3]; while (1) { unit_sphere3(sph, rg); for (i = 0; i != 3; ++i) s1p[i] = s1[i] + dist * sph[i]; if (inside_simplex(s1p)) break; } simplex_to_barycentric(s1p, pc.c2); } /* fprintf(stderr, */ /* "%5.3f,%5.3f,%5.3f,%5.3f\t" */ /* "%5.3f,%5.3f,%5.3f,%5.3f\t" */ /* "%f\t%f\n", */ /* pc.c1[0], pc.c1[1], pc.c1[2], pc.c1[3], */ /* pc.c2[0], pc.c2[1], pc.c2[2], pc.c2[3], */ /* dist, */ /* dist4_scaled(pc.c1, pc.c2) */ /* ); */ return pc; }
int main() { gsl_rng * r; const gsl_rng_type * T; gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc (T); // printf ("generator type: %s\n", gsl_rng_name (r)); // printf ("seed = %lu\n", gsl_rng_default_seed); // printf ("first value = %lu\n", gsl_rng_get (r)); // Step 1 int k = 3; // k is the # of communities const double alpha = double (1.0) / k; // printf ("k = %d and alpha = %f\n", k, alpha); double eta = 1; // eta is the hidden variable of the Beta distribution double comm_str [k]; for (uint32_t i = 0; i < k; ++i) comm_str[i] = gsl_ran_beta(r, eta, eta); // Step 2 int n = 10; // Number of nodes double theta = 1; // Used for gsl_ran_dirichlet double alpha_array [k]; // Array for gsl_ran_dirichlet for (uint32_t i = 0; i < k; ++i) alpha_array[i] = alpha; double pi [n][k]; // Array to store pi // Populate pi for (uint32_t i = 0; i < n; ++i) gsl_ran_dirichlet(r, k, alpha_array, pi[i]); // // Convert to binary // for (uint32_t i = 0; i < n; ++i){ // for (uint32_t j = 0; j < k ; ++j){ // if (pi[i][j] > 0.5){ // pi[i][j] = 1; // } // else{ // pi[i][j] = 0; // } // } // } // for (uint32_t i = 0; i < n; ++i){ // for (uint32_t j = 0; j < k ; ++j){ // printf("%d, %d, %f\n", i, j ,pi[i][j]); // } // } // Step 4 double mean = 0.0; // Set mean double var = 1; // Set var double prob = 0.5; // Set probability for Bernoulli // Save input values std::ofstream inputs ("inputs.txt"); if (inputs.is_open()){ inputs << "k = " << k << "\n"; inputs << "alpha = " << alpha << "\n"; inputs << "eta = " << eta << "\n"; inputs << "mean = " << mean << "\n"; inputs << "var = " << var << "\n"; inputs << "prob = " << prob << "\n"; inputs << "comm_str = "; for (uint32_t i = 0; i < k; ++i) inputs << comm_str[i] << " "; inputs << "\n" << "n = " << n << "\n" << "pi = " << "\n"; for (uint32_t i = 0; i < n; ++i){ for (uint32_t j = 0; j < k ; ++j){ inputs << pi[i][j] << "\t"; } inputs << "\n"; } inputs.close(); } // Save attribute matrix std::ofstream attributes ("attributes.txt"); if (attributes.is_open()){ for (uint32_t i = 0; i < n; ++i){ attributes << gsl_ran_gaussian(r, var) << "\t"; attributes << gsl_ran_gaussian(r, var) << "\t"; attributes << gsl_ran_bernoulli(r, prob) << "\t"; attributes << gsl_ran_bernoulli(r, prob) << "\t"; attributes << "\n"; } attributes.close(); } // Step 3 int num_edge = 20; // Define number of edges double epsilon = 1e-30; int adj_matrix [n][n]; // Populate adjancency matrix with 0s for (uint32_t i = 0; i < n; ++i) for (uint32_t j = 0; j < n ; ++j) adj_matrix[i][j] = 0; int count = 0; while (count < num_edge){ // printf("count %d\n", count); int a = gsl_rng_uniform_int(r, n); int b = gsl_rng_uniform_int(r, n); // printf("%d, %d\n", a, b); if (a == b){ continue; } double a_probs [k]; double b_probs [k]; for (uint32_t i = 0; i < k; ++i){ a_probs[i] = pi[a][i]; b_probs[i] = pi[b][i]; } int a_val = gsl_ran_discrete(r, gsl_ran_discrete_preproc(k, a_probs)); int b_val = gsl_ran_discrete(r, gsl_ran_discrete_preproc(k, b_probs)); if (a_val == b_val){ // printf("%d, %d\n", a_val, b_val); int x = gsl_ran_bernoulli(r, comm_str[a_val]); if (x == 1){ // printf("x = 1\n"); if (adj_matrix[a][b] == 0){ adj_matrix[a][b] = 1; ++count; } } } else{ int x = gsl_ran_bernoulli(r, epsilon); if (x == 1){ // printf("x = 1\n"); if (adj_matrix[a][b] == 0){ adj_matrix[a][b] = 1; ++count; } } } // for (uint32_t j = 0; j < k ; ++j){ // if (std::pi[a][j] == pi[b][j]){ // int x = gsl_ran_bernoulli(r, comm_str[j]); // // printf("Match 1 %d\n", x); // if (x == 1){ // if (adj_matrix[a][b]) // continue; // else // adj_matrix[a][b] = 1; // run_eps = 0; // ++count; // continue; // } // } // if (run_eps == 1){ // int x = gsl_ran_bernoulli(r, epsilon); // // printf("0 %d\n", x); // if (x == 1){ // if (adj_matrix[a][b]) // continue; // else // adj_matrix[a][b] = 1; // ++count; // continue; // } // } // } } std::ofstream matrix ("matrix.txt"); if (matrix.is_open()){ for (uint32_t i = 0; i < n; ++i){ for (uint32_t j = 0; j < n ; ++j){ if (adj_matrix [i][j]) matrix << i << "\t" << j << '\n'; } } matrix.close(); } }