double LDAStats::ComputeOneDocLLH(LDADocument* doc) { double one_doc_llh = log_doc_normalizer_; wood::light_hash_map doc_topic_counter(1024); doc->GetDocTopicCounter(doc_topic_counter); int num_words = doc->size(); if (num_words == 0) return 0.0; int32_t capacity = doc_topic_counter.capacity(); int32_t *key = doc_topic_counter.key(); int32_t *value = doc_topic_counter.value(); int32_t nonzero_num = 0; real_t ll_alpha = 0.01; for (int i = 0; i < capacity; ++i) { if (key[i] > 0) { one_doc_llh += LogGamma(value[i] + ll_alpha); ++nonzero_num; } } one_doc_llh += (K_ - nonzero_num) * LogGamma(ll_alpha); one_doc_llh -= LogGamma(num_words + ll_alpha * K_); CHECK_EQ(one_doc_llh, one_doc_llh) << "one_doc_llh is nan."; return one_doc_llh; }
double LightDocSampler::ComputeOneDocLLH(LDADocument* doc) { double doc_ll = 0; double one_doc_llh = log_doc_normalizer_; // Compute doc-topic vector on the fly. int num_tokens = doc->size(); if (num_tokens == 0) { return doc_ll; } doc_topic_counter_.clear(); doc->GetDocTopicCounter(doc_topic_counter_); int32_t capacity = doc_topic_counter_.capacity(); int32_t *key = doc_topic_counter_.key(); int32_t *value = doc_topic_counter_.value(); int32_t nonzero_num = 0; for (int i = 0; i < capacity; ++i) { if (key[i] > 0) { one_doc_llh += LogGamma(value[i] + ll_alpha_); ++nonzero_num; } } one_doc_llh += (K_ - nonzero_num) * LogGamma(ll_alpha_); one_doc_llh -= LogGamma(num_tokens + ll_alpha_ * K_); doc_ll += one_doc_llh; return doc_ll; }
double LightDocSampler::ComputeWordLLH(int32_t lower, int32_t upper) { // word_llh is P(w|z). double word_llh = 0; double zero_entry_llh = LogGamma(beta_); // Since some vocabs are not present in the corpus, use num_words_seen to // count # of words in corpus. int num_words_seen = 0; for (int w = lower; w < upper; ++w) { auto word_topic_row = get_word_row(w); int32_t total_count = 0; double delta = 0; if (word_topic_row.is_dense()) { int32_t* memory = word_topic_row.memory(); int32_t capacity = word_topic_row.capacity(); int32_t count; for (int i = 0; i < capacity; ++i) { count = memory[i]; total_count += count; delta += LogGamma(count + beta_); } } else { int32_t* key = word_topic_row.key(); int32_t* value = word_topic_row.value(); int32_t capacity = word_topic_row.capacity(); int32_t count; int32_t nonzero_num = 0; for (int i = 0; i < capacity; ++i) { if (key[i] > 0) { count = value[i]; total_count += count; delta += LogGamma(count + beta_); ++nonzero_num; } } delta += (K_ - nonzero_num) * zero_entry_llh; } if (total_count) { word_llh += delta; } } return word_llh; }
/*************************************************************************** * L O G F A C T * * * * Returns the natural logarithm of n factorial. For efficiency, some * * of the values are cached, so they need be computed only once. * * * ***************************************************************************/ double LogFact( int n ) { static const int Cache_Size = 100; static double c[ Cache_Size ] = { 0.0 }; // Cache some of the values. if( n <= 1 ) return 0.0; if( n < Cache_Size ) { if( c[n] == 0.0 ) c[n] = LogGamma((double)(n+1)); return c[n]; } return LogGamma((double)(n+1)); // gamma(n+1) == n! }
double Eval::NormalizeWordLLH(Trainer* trainer) { Row<int64_t>& params = trainer->GetRow<int64_t>(kSummaryRow, 0); double llh = Config::num_topics * (LogGamma(Config::beta * Config::num_vocabs) - Config::num_vocabs * LogGamma(Config::beta)); for (int32_t k = 0; k < Config::num_topics; ++k) { llh -= LogGamma(params.At(k) + Config::num_vocabs * Config::beta); } return llh; }
double LogFactorial( int n ) { static double smallLF[ 100 ]; if ( n <= 1 ) return 0.; if ( n < 100 ) { if ( smallLF[n] != 0. ) return smallLF[n]; else return (smallLF[n] = LogGamma( n + 1. )); } return LogGamma( n + 1.); }
double Eval::ComputeOneWordLLH(int32_t word, Trainer* trainer) { Row<int32_t>& params = trainer->GetRow<int32_t>( kWordTopicTable, word); if (params.NonzeroSize() == 0) return 0.0; double word_llh = 0.0; int32_t nonzero_num = 0; RowIterator<int32_t> iter = params.Iterator(); while (iter.HasNext()) { word_llh += LogGamma(iter.Value() + Config::beta); ++nonzero_num; iter.Next(); } word_llh += (Config::num_topics - nonzero_num) * LogGamma(Config::beta); return word_llh; }
double LightDocSampler::NormalizeWordLLH() { double word_llh = K_ * log_topic_normalizer_; for (int k = 0; k < K_; ++k) { word_llh -= LogGamma(summary_row_[k] + beta_sum_); } return word_llh; }
LightDocSampler::LightDocSampler( int32_t K, int32_t V, int32_t num_threads, int32_t mh_step, float beta, float alpha_sum, std::vector<lda::hybrid_map> &word_topic_table, std::vector<int64_t> &summary_row, std::vector<lda::hybrid_alias_map> &alias_kv, int32_t &beta_height, float& beta_mass, std::vector<wood::alias_k_v> &beta_k_v) : doc_topic_counter_(1024), word_topic_table_(word_topic_table), summary_row_(summary_row), alias_k_v_(alias_kv), beta_height_(beta_height), beta_mass_(beta_mass), beta_k_v_(beta_k_v), K_(K), V_(V), num_threads_(num_threads), mh_step_for_gs_(mh_step), beta_(beta), alpha_sum_(alpha_sum) { beta_sum_ = beta_ * V_; alpha_ = alpha_sum_ / K_; ll_alpha_ = (lda::real_t)0.01; ll_alpha_sum_ = ll_alpha_ * K_; // Precompute LLH parameters log_doc_normalizer_ = LogGamma(ll_alpha_ * K_) - K_ * LogGamma(ll_alpha_); log_topic_normalizer_ = LogGamma(beta_sum_) - V_ * LogGamma(beta_); alias_rng_.Init(K_); q_w_proportion_.resize(K_); delta_summary_row_.resize(K_); word_topic_delta_.resize(num_threads_); rehashing_buf_ = new int32_t[K_ * 2]; }
LDAStats::LDAStats() { // Topic model parameters. util::Context& context = util::Context::get_instance(); K_ = context.get_int32("num_topics"); V_ = context.get_int32("num_vocabs"); CHECK_NE(-1, V_); beta_ = context.get_double("beta"); beta_sum_ = beta_ * V_; alpha_ = context.get_double("alpha"); alpha_sum_ = K_ * alpha_; num_threads_ = context.get_int32("num_worker_threads"); // Precompute LLH parameters real_t ll_alpha = 0.01; log_doc_normalizer_ = LogGamma(K_ * ll_alpha) - K_ * LogGamma(ll_alpha); // log_doc_normalizer_ = LogGamma(alpha_sum_) - K_ * LogGamma(alpha_); log_topic_normalizer_ = LogGamma(beta_sum_) - V_ * LogGamma(beta_); }
double LDAStats::NormalizeWordLLH(petuum::ClientSummaryRow& summary_row) { double word_llh = K_ * log_topic_normalizer_; for (int k = 0; k < K_; ++k) { int64_t count = summary_row.GetSummaryCount(k); CHECK_GE(count, 0); word_llh -= LogGamma(count + beta_sum_); CHECK_EQ(word_llh, word_llh) << "word_llh is nan after -LogGamma"; } return word_llh; }
float LogModBesselFirstAlt(int orderX2, float x, float accuracy, int limit) { // Special case problem values... if (orderX2==0) return LogModBesselFirst(orderX2, x, accuracy, limit); if (x<1e-12) return -1e32; accuracy = log(accuracy); // Create the very first term, set ret to be it... float term = x - 0.5 * log(2.0 * M_PI * x) - LogGamma(orderX2+1); float inc_gam = LogLowerIncompleteGamma(orderX2 + 1, 2.0 * x); term += inc_gam; int sign = 1; float ret = term; // Keep summing in terms until the desired accuracy is reached, or we obtain a term with zero in, meaning we have obtained 100% accuracy and can stop... int n; float log2x = log(2.0 * x); for (n=1; n<limit; n++) { // Move to next inc_gam, factoring it into the term... term -= inc_gam; float smo = 0.5 * (orderX2 + 2*n - 1); inc_gam += log(smo); inc_gam += log(1.0 - exp(smo * log2x - 2.0 * x - inc_gam)); term += inc_gam; // Update term... int mult2X = 1 - orderX2 + 2 * (n - 1); if (mult2X==0) break; // Term has zero in - this term and all further add nil. if (mult2X<0) { sign *= -1; mult2X = -mult2X; } term += log(0.5 * mult2X); term -= log(n) + log2x; // Add in or subtract in term - we can always assume its less than the previous... ret += log(1.0 + sign * exp(term - ret)); // If the accuracy is high enough, terminate... if (term<accuracy) break; } // Return... return ret; }
double WordSampler::ComputeWordLikelihood() { Context& context = Context::get_instance(); int32_t num_topics = context.get_int32("num_topics"); double beta = context.get_double("beta"); double word_log_likelihood = 0.0; int32_t num_non_zero = 0; int32_t word_topic_count; int length = topic_counts_.length; for (int j = 0; j < length; ++j) { cnt_topic_t cnt_top = topic_counts_.items[j]; int word_topic_count = cnt_top.choose.cnt; word_log_likelihood += LogGamma(word_topic_count + beta_); } // This can be removed if this function is solely used to compute delta. word_log_likelihood -= length * LogGamma(beta_); return word_log_likelihood; }
double Eval::ComputeOneDocLLH(Document* doc, Row<int32_t>& doc_topic_counter) { if (doc->Size() == 0) return 0.0; double one_doc_llh = LogGamma(Config::num_topics * Config::alpha) - Config::num_topics * LogGamma(Config::alpha); int32_t nonzero_num = 0; doc_topic_counter.Clear(); doc->GetDocTopicVector(doc_topic_counter); Row<int32_t>::iterator iter = doc_topic_counter.Iterator(); while (iter.HasNext()) { one_doc_llh += LogGamma(iter.Value() + Config::alpha); ++nonzero_num; iter.Next(); } one_doc_llh += (Config::num_topics - nonzero_num) * LogGamma(Config::alpha); one_doc_llh -= LogGamma(doc->Size() + Config::alpha * Config::num_topics); return one_doc_llh; }
double BirthDeathImmuneUpdater::Logposterior(double value, curr_par_obj* current_values, mach_dat_obj* data, global_par_obj* global_pars) { curr_par_obj* ptr; double deathdatemultiplier=0.0; double runcount; double loglike; double kappa = hyperpar[0]; double theta = hyperpar[1]; if(value == current_value[0]) { ptr = current_values; } else { if(value == proposal_value[0]) { ptr = temp_parameters; } else { cout << "BirthDeathImmuneUpdater Error: neither proposal nor current value entered into log posterior" << endl; } } if(ptr->Deathdate() < ptr->T) { deathdatemultiplier = global_pars->survival_rate[ptr->Deathdate()]; } loglike = (ptr->Is_immune())*log(global_pars->immune_rate) + (1-ptr->Is_immune())*( log(1.0-global_pars->immune_rate) + log(1.0-deathdatemultiplier) + sum_nonoff_transitions ); loglike += LogGamma((double)(ptr->NumOffOffTrans())+kappa) - ((double)(ptr->NumOffOffTrans())+kappa)*log((double)(ptr->NumOffSeq()) + (1.0/theta) ); runcount = (double)first_nonoff_transition - (double)(ptr->Birthdate()); if( runcount > 0) loglike -= LogGamma(runcount + 1.0); runcount = (double)(ptr->Deathdate()) - (double)last_nonoff_transition; if( runcount > 0) loglike -= LogGamma(runcount + 1.0); return(loglike); }
void LDAStats::ComputeWordLLHSummary(int32_t ith_llh, int iter) { double word_llh = log_topic_normalizer_; // log(\prod_j (1 / \gamma(n_j^* + W\beta))) term. petuum::RowAccessor summary_row_acc; summary_table_.Get(0, &summary_row_acc); const auto& summary_row = summary_row_acc.Get<petuum::DenseRow<int32_t> >(); for (int k = 0; k < K_; ++k) { word_llh -= LogGamma(summary_row[k] + beta_sum_); CHECK_EQ(word_llh, word_llh) << "word_llh is nan after -LogGamma(summary_row[k] + beta_). " << "summary_row[k] = " << summary_row[k]; } CHECK_EQ(word_llh, word_llh) << "word_llh is nan."; llh_table_.Inc(ith_llh, kColIdxLLHTableLLH, word_llh); // Since only 1 client should call this ComputeWordLLH, we set the first // column to be iter-# llh_table_.Inc(ith_llh, kColIdxLLHTableIter, static_cast<double>(iter)); }
LDAStats::LDAStats() { // Topic model parameters. Context& context = Context::get_instance(); K_ = context.get_int32("num_topics"); V_ = context.get_int32("num_vocabs"); CHECK_NE(-1, V_); beta_ = context.get_double("beta"); beta_sum_ = beta_ * V_; alpha_ = context.get_double("alpha"); alpha_sum_ = K_ * alpha_; loggamma_alpha_offset_.resize(kNumLogGammaAlpha_); loggamma_alpha_sum_offset_.resize(kNumLogGammaAlphaSum_); loggamma_beta_offset_.resize(kNumLogGammaBeta_); for (int i = 0; i < kNumLogGammaAlpha_; ++i) { loggamma_alpha_offset_[i] = LogGamma(i + alpha_); } for (int i = 0; i < kNumLogGammaAlphaSum_; ++i) { loggamma_alpha_sum_offset_[i] = LogGamma(i + alpha_sum_); } for (int i = 0; i < kNumLogGammaBeta_; ++i) { loggamma_beta_offset_[i] = LogGamma(i + beta_); } // Precompute LLH parameters log_doc_normalizer_ = LogGamma(alpha_sum_) - K_ * LogGamma(alpha_); log_topic_normalizer_ = K_ * (LogGamma(beta_sum_) - V_ * LogGamma(beta_)); // PS tables. int32_t summary_table_id = context.get_int32("summary_table_id"); int32_t word_topic_table_id = context.get_int32("word_topic_table_id"); int32_t llh_table_id = context.get_int32("llh_table_id"); summary_table_ = petuum::PSTableGroup::GetTableOrDie<int>(summary_table_id); word_topic_table_ = petuum::PSTableGroup::GetTableOrDie<int>( word_topic_table_id); llh_table_ = petuum::PSTableGroup::GetTableOrDie<double>(llh_table_id); }
void StateUpdater::Propose(curr_par_obj* current_values, mach_dat_obj* data, global_par_obj* global_pars) { double val; int range; int end_range; short int original_state; short int states_to_choose[2]; int pre_index_nondecay; int post_index_nondecay; int path_off_off_transitions; int path_num_off_runs; int j; double alphaval; double lambdaval; double spikemult; int beginend[4]; int addsub[2]; vector<double> prop1_trans; vector<double> prop1_pois; vector<double> prop2_trans; vector<double> prop2_pois; vector<double> curr_trans; vector<double> curr_pois; double s1, s2, s3; double kappa, theta; kappa = hyperpar[0]; theta = hyperpar[1]; curr_hfun1 = curr_hfun2 = prop_hfun1 = prop_hfun2 = 0.0; /*uniform assumption of weights */ if(current_values->Deathdate() >= current_values->T) end_range = current_values->T-1; else end_range = current_values->Deathdate(); //range = end_range - current_values->birthdate + 1; distributions->Simulate(3, 1, &val, (double)current_values->Birthdate(), (double)(end_range)+1.0); index_selected = (int)floor(val); if( (index_selected > end_range) || (index_selected < current_values->Birthdate())) { cout << "In StateUpdater::Logposterior : index selected is out of range" << endl; } if(PRINT_MCMC_STEP) { cout << "_" << data->uid << "_" << index_selected << "_" << "ct_" << data->GetTotalCount(index_selected); } // if((index_selected != current_values->beginval) && (index_selected != current_values->endval)) // { // if((current_values->state[index_selected - 1] == 1) || (current_values->state[index_selected + 1] == 1)) //cout << endl << " spike vicinty : "<< current_values->state[index_selected - 1] << " " << current_values->state[index_selected] << " " << current_values->state[index_selected + 1] << endl; // } temp_parameters->SetFromCopy(current_values); //current_values->Print(0); //temp_parameters->Print(0); original_state = current_values->State(index_selected); current_value[0] = (double) original_state; PathToChange(index_selected, current_values, data, beginend); pre_index_nondecay = beginend[0]; post_index_nondecay = beginend[1]; path_off_off_transitions = beginend[2]; path_num_off_runs = beginend[3]; curr_hfun1 = EtaPropH(index_selected,pre_index_nondecay, post_index_nondecay, data, temp_parameters, global_pars, &(curr_trans), &(curr_pois)); if(data->GetTotalCount(index_selected) > 0) { /* This may need to be adapted for transition states with prob 0 (eg spike->spike) */ if(temp_parameters->State(index_selected) == 1) proposal_value[0] = 2; else proposal_value[0] = 1; states_to_choose[0] = (short int)(proposal_value[0]); states_to_choose[1] = -1; temp_parameters->SetState(index_selected, (short int)(proposal_value[0] )); prop_hfun1 = EtaPropH(index_selected, pre_index_nondecay, post_index_nondecay, data, temp_parameters, global_pars, &(prop1_trans), &(prop1_pois)); if(prop_hfun1 == (-1.0*INFINITY)) { //cout << endl << prop_hfun1 << endl << (short int)prop_hfun1 ; //cout << endl << "Cannot change state: " << endl << "st:"<< current_values->state[max(0,index_selected - 1)] << " " << original_state << " " << current_values->state[min(current_values->T, index_selected + 1)] << endl << "fl:"<< data->fl[max(0,index_selected - 1)] << " " << data->fl[index_selected] << " " << data->fl[min(current_values->T, index_selected + 1)] << endl; proposal_value[0] = original_state; } prop_hfun2 = 0.0; curr_hfun2 = 0.0; temp_parameters->SetState(index_selected, original_state); } else { /* y = 0; choose between two other states */ /*And do a lambda update as well*/ prop_hfun2 = 0.0; curr_hfun2 = 0.0; j = 0; spikemult = 0.5; if(original_state != 0) { states_to_choose[j] = 0; j++; } if(original_state != 2) { states_to_choose[j] = 2; j++; } if(original_state != 1) { states_to_choose[j] = 1; j++; spikemult = 0.8; } distributions->Simulate(3, 1, &alphaval, 0.0, 1.0); if(alphaval < (spikemult)) proposal_value[0] = states_to_choose[0]; else proposal_value[0] = states_to_choose[1]; temp_parameters->SetState(index_selected, (short int)(proposal_value[0])); //cout << "current state = " << (int)(original_state) << " num-off-off, num-off-seq = " << current_values->NumOffOffTrans() << ", " << current_values->NumOffSeq() << endl; //cout << "proposed state= " << (int)(proposal_value[0]) << " num-off-off, num-off-seq = " << temp_parameters->NumOffOffTrans() << ", " << temp_parameters->NumOffSeq() << endl; //if(temp_parameters->NumOffSeq() > 0) distributions->Simulate(4,1, &lambdaval,(double)(temp_parameters->NumOffOffTrans())+kappa,(double)1.0/(((double)(temp_parameters->NumOffSeq()))+ (1.0/theta) )); //else //lambdaval = current_values->off_lambda; //cout << "Lambda simulated = " << lambdaval << " current = " << current_values->off_lambda << endl; /* if( (lambdaval < hyperpar[0]) || (lambdaval > hyperpar[1])) { lambdaval = current_values->off_lambda; //proposal_value[0] = (double)(current_values->State(index_selected)); } else { temp_parameters->off_lambda = lambdaval; }*/ prop_hfun1 = EtaPropH(index_selected, pre_index_nondecay, post_index_nondecay, data, temp_parameters, global_pars, &(prop1_trans), &(prop1_pois)); //add in log jump probabilities prop_hfun1 += LogSpikeProb(0.8, (short int)original_state, (short int)(proposal_value[0])); curr_hfun1 += LogSpikeProb(0.8, (short int)(proposal_value[0]), (short int)original_state); //add in the extra bits from the gamma proposal constants prop_hfun1 += LogGamma((double)((double)(temp_parameters->NumOffOffTrans()) + hyperpar[0])); curr_hfun1 += LogGamma((double)((double)(current_values->NumOffOffTrans()) + hyperpar[0])); prop_hfun1 += (double)((double)(current_values->NumOffOffTrans())+kappa)*log((double)(current_values->NumOffSeq()) + (1.0/theta) ); //curr_hfun1 += ScaledGammaLogConstant(hyperpar[0], hyperpar[1], (double)(current_values->NumOffOffTrans())+1.0 , 1.0/double(current_values->NumOffSeq())); curr_hfun1 += (double)((double)(temp_parameters->NumOffOffTrans())+kappa)*log((double)(temp_parameters->NumOffSeq()) + (1.0/theta) ); //prop_hfun1 += ScaledGammaLogConstant(hyperpar[0], hyperpar[1], (double)(temp_parameters->NumOffOffTrans())+1.0 , 1.0/double(temp_parameters->NumOffSeq())); //last bit: add in the exp for the additional path variables //get addition/subtraction values for the proposed path as opposed to original StateRunChanges(current_values, temp_parameters, index_selected, pre_index_nondecay, post_index_nondecay, addsub); prop_hfun1 += temp_parameters->off_lambda*( (double)path_num_off_runs + (double)(addsub[1]) ); curr_hfun1 += current_values->off_lambda*((double)path_num_off_runs); prop_hfun1 += -1.0*((double)path_off_off_transitions + (double)(addsub[0]))* log(temp_parameters->off_lambda); curr_hfun1 += -1.0*((double)path_off_off_transitions)*log(current_values->off_lambda); } /* for(j = 0; j < 2; j++) { temp_parameters->SetState(index_selected, states_to_choose[j]); if(j == 0) prop_hfun1 = EtaPropH(index_selected, pre_index_nondecay, post_index_nondecay, data, temp_parameters, global_pars, &(prop1_trans), &(prop1_pois)); if(j == 1) prop_hfun2 = EtaPropH(index_selected, pre_index_nondecay, post_index_nondecay, data, temp_parameters, global_pars, &(prop2_trans), &(prop2_pois)); } if( ( prop_hfun1 == (-1.0*INFINITY)) || (prop_hfun2 == (-1.0*INFINITY))) { //cout << endl<<"_ct_0s impossible jump " << endl; //cout << endl << "_ct_0s Cannot change state at index: "<< index_selected << endl << "_ct_0s st:"<< current_values->state[max(0,index_selected - 1)] << " " << original_state << " " << current_values->state[min(current_values->T, index_selected + 1)] << endl << "_ct_0s fl:"<< data->fl[max(0,index_selected - 1)] << " " << data->fl[index_selected] << " " << data->fl[min(current_values->T, index_selected + 1)] << endl; if(prop_hfun1 != (-1.0*INFINITY)) proposal_value[0] = states_to_choose[0]; else if(prop_hfun2 != (-1.0*INFINITY)) proposal_value[0] = states_to_choose[1]; else proposal_value[0] = original_state; } else { distributions->Simulate(3, 1, &alphaval, 0.0, 1.0 + exp(prop_hfun2 - prop_hfun1)); if(alphaval < 1.0) { proposal_value[0] = states_to_choose[0]; curr_hfun2 = prop_hfun1; } else { proposal_value[0] = states_to_choose[1]; curr_hfun2 = prop_hfun2; } } */ return; }
inline double lgamma(double x) { return LogGamma(x); }
double LDAStats::GetLogGammaBetaOffset(int val) { if (val < kNumLogGammaBeta_) { return loggamma_beta_offset_[val]; } return LogGamma(val + beta_); }
void TestLogGamma() { struct TestCase { double input; double expected; }; TestCase test[] = { { 1e-12, 27.6310211159 }, { 0.9999, 5.77297915613e-05 }, { 1.0001, -5.77133422205e-05 }, { 3.1, 0.787375083274 }, { 6.3, 5.30734288962 }, { 11.9999, 17.5020635801 }, { 12, 17.5023078459 }, { 12.0001, 17.5025521125 }, { 27.4, 62.5755868211 } }; size_t numTests = sizeof(test) / sizeof(TestCase); double worst_absolute_error = 0.0; double worst_relative_error = 0.0; size_t worst_absolute_error_case = 0; size_t worst_relative_error_case = 0; for (size_t t = 0; t < numTests; t++) { double computed = LogGamma(test[t].input); double absolute_error = fabs(computed - test[t].expected); double relative_error = absolute_error / test[t].expected; if (absolute_error > worst_absolute_error) { worst_absolute_error = absolute_error; worst_absolute_error_case = t; } if (relative_error > worst_relative_error) { worst_relative_error = absolute_error; worst_relative_error_case = t; } } size_t t = worst_absolute_error_case; double x = test[t].input; double y = test[t].expected; std::cout << "Worst absolute error: " << fabs(LogGamma(x) - y) << "\nGamma( " << x << ") computed as " << LogGamma(x) << " but exact value is " << y << "\n"; t = worst_relative_error_case; x = test[t].input; y = test[t].expected; std::cout << "Worst relative error: " << (LogGamma(x) - y) / y << "\nGamma( " << x << ") computed as " << LogGamma(x) << " but exact value is " << y << "\n"; }
double Gamma( double x // We require x > 0 ) { if (x <= 0.0) { std::stringstream os; os << "Invalid input argument " << x << ". Argument must be positive."; throw std::invalid_argument(os.str()); } // Split the function domain into three intervals: // (0, 0.001), [0.001, 12), and (12, infinity) /////////////////////////////////////////////////////////////////////////// // First interval: (0, 0.001) // // For small x, 1/Gamma(x) has power series x + gamma x^2 - ... // So in this range, 1/Gamma(x) = x + gamma x^2 with error on the order of x^3. // The relative error over this interval is less than 6e-7. const double gamma = 0.577215664901532860606512090; // Euler's gamma constant if (x < 0.001) return 1.0 / (x * (1.0 + gamma * x)); /////////////////////////////////////////////////////////////////////////// // Second interval: [0.001, 12) if (x < 12.0) { // The algorithm directly approximates gamma over (1,2) and uses // reduction identities to reduce other arguments to this interval. double y = x; int n = 0; bool arg_was_less_than_one = (y < 1.0); // Add or subtract integers as necessary to bring y into (1,2) // Will correct for this below if (arg_was_less_than_one) { y += 1.0; } else { n = static_cast<int>(floor(y)) - 1; // will use n later y -= n; } // numerator coefficients for approximation over the interval (1,2) static const double p[] = { -1.71618513886549492533811E+0, 2.47656508055759199108314E+1, -3.79804256470945635097577E+2, 6.29331155312818442661052E+2, 8.66966202790413211295064E+2, -3.14512729688483675254357E+4, -3.61444134186911729807069E+4, 6.64561438202405440627855E+4 }; // denominator coefficients for approximation over the interval (1,2) static const double q[] = { -3.08402300119738975254353E+1, 3.15350626979604161529144E+2, -1.01515636749021914166146E+3, -3.10777167157231109440444E+3, 2.25381184209801510330112E+4, 4.75584627752788110767815E+3, -1.34659959864969306392456E+5, -1.15132259675553483497211E+5 }; double num = 0.0; double den = 1.0; int i; double z = y - 1; for (i = 0; i < 8; i++) { num = (num + p[i]) * z; den = den * z + q[i]; } double result = num / den + 1.0; // Apply correction if argument was not initially in (1,2) if (arg_was_less_than_one) { // Use identity gamma(z) = gamma(z+1)/z // The variable "result" now holds gamma of the original y + 1 // Thus we use y-1 to get back the orginal y. result /= (y - 1.0); } else { // Use the identity gamma(z+n) = z*(z+1)* ... *(z+n-1)*gamma(z) for (i = 0; i < n; i++) result *= y++; } return result; } /////////////////////////////////////////////////////////////////////////// // Third interval: [12, infinity) if (x > 171.624) { // Correct answer too large to display. Force +infinity. double temp = DBL_MAX; return temp * 2.0; } return exp(LogGamma(x)); }
double LDAStats::ComputeOneSliceWordLLH( ModelSlice& word_topic_table, int32_t thread_id) { double word_llh = 0; double zero_entry_llh = LogGamma(beta_); for (int32_t word_index = Begin(thread_id); word_index != End(thread_id); ++word_index) { int32_t num_entries = 0; hybrid_map row = word_topic_table.GetRowByIndex(word_index); int32_t total_count = 0; double delta = 0; if (row.is_dense()) { int32_t* memory = row.memory(); int32_t capacity = row.capacity(); int32_t count; for (int i = 0; i < capacity; ++i) { count = memory[i]; CHECK_LE(0, count) << "negative count . " << count; total_count += count; delta += LogGamma(count + beta_); } } else { int32_t* key = row.key(); int32_t* value = row.value(); int32_t capacity = row.capacity(); int32_t count = 0; int32_t nonzero_num = 0; for (int i = 0; i < capacity; ++i) { if (key[i] > 0) { count = value[i]; CHECK_LE(0, count) << "negative count . " << count; total_count += count; delta += LogGamma(count + beta_); ++nonzero_num; } } if (nonzero_num != 0) delta += (K_ - nonzero_num) * zero_entry_llh; } if (total_count) { word_llh += delta; } // int32_t word = word_topic_table.SliceId() * slice_size_ + word_index; // int32_t word = local_vocab_->IndexToWord() //for (int k = 0; k < K_; ++k) { // int32_t count = word_topic_table.GetIndexTopicCount(word_index, k); //GetWordTopicCount(word, k); // CHECK_GE(count, 0) << "Negative count"; // if (count > 0) { // word_llh += LogGamma(count + beta_); // ++num_entries; // } //} //if (num_entries != 0) { // word_llh += (K_ - num_entries) * zero_entry_llh; //} } CHECK_EQ(word_llh, word_llh) << "word_llh is nan."; return word_llh; }
double LDAStats::GetLogGammaAlphaSumOffset(int val) { if (val < kNumLogGammaAlphaSum_) { return loggamma_alpha_sum_offset_[val]; } return LogGamma(val + alpha_sum_); }