inline void verifyEqualRanges (T startT, T endT, U startU, U endU, Verdict verdict = Verdict::WA){ T itT = startT; U itU = startU; while(true){ if(itT == endT && itU == endU) return; if(itT == endT || itU == endU) QUIT(verdict, "Length differ, " << expectation(rangeToString(startT, endT), rangeToString(startU, endU))); if(*itT != *itU) QUIT(verdict, expectation(rangeToString(startT, endT), rangeToString(startU, endU))); ++itT; ++itU; } }
std::vector<double> DiffusionProcess::simulate1path(const std::vector<double> &dDates) const { std::size_t iNDates = dDates.size(); std::vector<double> dResult(iNDates, dX0_); std::tr1::normal_distribution<double> dist(0.0,1.0); double dOldValue = dX0_; for (std::size_t iDate = 1 ; iDate < iNDates ; ++iDate) { double t0 = dDates[iDate - 1], dt = dDates[iDate] - t0; dOldValue = expectation(t0, dOldValue, dt) + stdev(t0, dOldValue, dt) * dist(*m_eng); if (bFloorSimulation_ && dOldValue < dFloor_) { dResult[iDate] = dFloor_; if (bStartFromFloor_) { dOldValue = dFloor_; } } else if (bCapSimulation_ && dOldValue > dCap_) { dResult[iDate] = dCap_; if (bStartFromCap_) { dOldValue = dCap_; } } else { dResult[iDate] = dOldValue; } } return dResult; }
inline Disposable<Array> StochasticProcess1D::expectation( Time t0, const Array& x0, Time dt) const { #if defined(QL_EXTRA_SAFETY_CHECKS) QL_REQUIRE(x0.size() == 1, "1-D array required"); #endif Array a(1, expectation(t0, x0[0], dt)); return a; }
void IBM1::em(int T) { int total_counts; for(int i = 0; i < T; ++i){ cout << "Doing EM iteration " << i+1 << " for IBM1." << endl; expectation(); maximization(); } }
T read(IStream& stream) const { Case streamCase = effectiveCase(stream); std::string input = stream.readToken(); bool is_signed = std::numeric_limits<T>::is_signed; if(!is_signed && input[0] == '-') stream.quit(Verdict::PE, expectation("Unsigned integer", input)); const char* usedValue = input.c_str(); size_t length = input.length(); bool negative = false; if(input[0] == '-'){ negative = true; ++usedValue; --length; } static const std::vector<int> maxArray = absToArray(std::numeric_limits<T>::max()); if(length > maxArray.size()) stream.quit(Verdict::PE, expectation("Integer", input)); static std::vector<int> digits(maxArray.size()); for(size_t i = 0; i < length; ++i){ try { digits[i] = digitValue(usedValue[i], streamCase); } catch(NotDigitException& e){ stream.quit(Verdict::PE, expectation("Digit", e.character)); } if(digits[i] >= radix) stream.quit(Verdict::PE, expectation("Digit in radix " + toString(radix), usedValue[i])); } static const std::vector<int> minArray = absToArray(std::numeric_limits<T>::min()); if(negative && digits == minArray){ return std::numeric_limits<T>::min(); } if(digits.empty()) stream.quit(Verdict::PE, expectation("Integer", input)); if(digits[0] == 0 && (negative || length > 1)) stream.quit(Verdict::PE, expectation("Integer", input)); if(length == maxArray.size() && digits > maxArray) stream.quit(Verdict::PE, expectation("Integer", input)); T result = 0; for(size_t i = 0; i < length; ++i){ result = result * radix + digits[i]; } if(negative){ result = -result; } return result; }
/** * If expectations are to be recorded, record the bitmap expectations into the global * expectations array. * As is the case with reading expectations, the key used will combine the filename * parameter with the preferred config, as specified by "--config", matching the * pattern of gm_json.py's IMAGE_FILENAME_PATTERN: "filename_config.png" */ static void write_expectations(const skiagm::BitmapAndDigest& bitmapAndDigest, const char* filename) { const SkString name_config = create_json_key(filename); if (!FLAGS_createExpectationsPath.isEmpty()) { // Creates an Expectations object, and add it to the list to write. skiagm::Expectations expectation(bitmapAndDigest); Json::Value value = expectation.asJsonValue(); gExpectationsToWrite[name_config.c_str()] = value; } }
inline const Real GeneralizedJcirProcess<__URng_Poisson_Type, __URng_Exp_Type>::evolve(Time t0, Real x0, Time dt, Real dw) const { Real xContinuous, totalJump = 0.; xContinuous = apply(expectation(t0, x0, dt), stdDeviation(t0, x0, dt)*dw); InverseCumulativePoisson invP(jumpIntensity_*dt); int Pt = (int)invP(URng_Poisson_.next().value); for (int i = 0; i < Pt; ++i) { totalJump += -mean*std::log(1 - URng_Exp_.next().value); } return apply(xContinuous, totalJump); }
void mmEM::training(param myParam) { vector_2str wordX; vector_2str wordY; bool stillTrain = true; // reading file // readFileXY(myParam, myParam.inputFilename, &wordX, &wordY); // initialization // cout << "Initialization ... "; initialization(myParam, wordX, wordY); // maximization // cout << "Maximization ... " << endl; maximization(myParam); cout << endl; int atIter = 0; // still training // while (stillTrain) { atIter++; cout << "Iteration " << atIter << endl; // for each x and y pair // cout << "Expectation ... " ; for (int i = 0; i < wordX.size(); i++) { // expectation // expectation(myParam, wordX[i], wordY[i]); } cout << "Maximization ... "; long double totalChange = maximization(myParam); cout << "Total probability change = " << totalChange << endl; // stop by the probability change condition // if ((totalChange <= myParam.cutOff) && (myParam.cutOff < 1)) { stillTrain = false; } // stop by the number of iteration condition // if ((myParam.cutOff >= 1) && (atIter >= myParam.cutOff)) { stillTrain = false; } } cout << endl; }
GMMExpectationMaximization::Real GMMExpectationMaximization::getBIC(const MatrixX & dataset) const { const uint dim = dataset.cols(); const uint num_gaussians = m_means.size(); Real number_of_parameters = (num_gaussians * dim * (dim + 1) / 2) + num_gaussians * dim + num_gaussians - 1; uint data_count = dataset.rows(); Real sum = 0.0; for(uint i = 0; i < data_count; i++) sum += log(expectation(dataset.row(i).transpose())); return -sum + (number_of_parameters / 2.0) * log(Real(data_count)); }
void run() { _ns = string("perftest.") + name(); client().dropCollection(ns()); prep(); int hlm = howLongMillis(); dur::stats._intervalMicros = 0; // no auto rotate dur::stats.curr->reset(); Timer t; unsigned long long n = 0; const unsigned Batch = 50; do { unsigned i; for( i = 0; i < Batch; i++ ) timed(); n += i; } while( t.millis() < hlm ); client().getLastError(); // block until all ops are finished int ms = t.millis(); say(n, ms, name()); if( n < expectation() ) { cout << "\ntest " << name() << " seems slow n:" << n << " ops/sec but expect greater than:" << expectation() << endl; cout << endl; } { const char *test2name = timed2(); if( test2name ) { dur::stats.curr->reset(); Timer t; unsigned long long n = 0; while( 1 ) { unsigned i; for( i = 0; i < Batch; i++ ) timed2(); n += i; if( t.millis() > hlm ) break; } int ms = t.millis(); say(n, ms, test2name); } } }
double EmpiricalDistribution::deviation( double a, double b ) const { if ( a > b ) std::swap( a, b ); a = a > min ? a : min; b = b < max ? b : max; auto moment2 = [&]( double t ) { return t * t * density_func( t ); }; double ex = expectation( a, b ); double prob = this->operator()( b ) - this->operator()( a ); double m2 = integrate( moment2, a, b ); return m2 - ex * ex * prob; }
void MP1::em(CorpusCache& cache, int round, const string& out, bool knsmoothing) { SimplePhraseTable& pt_=*ppt_; alpha_=0.5; for(int i=0;i<round;i++){ if(out!=""){ pt_.print(out+".m1."+to_string(i)); } cerr<<"round "<<i<<", alpha:"<<alpha_<<endl; expectation(cache); if(!knsmoothing) pt_.normalize(); else pt_.knsmoothing(); } }
double DiffusionProcess::Generate1(double t0, double x0, double dt) const { std::tr1::normal_distribution<double> dist(expectation(t0, x0, dt),stdev(t0, x0, dt)); return dist(*m_eng); }
double *find_information(struct treenode *tree,struct treenode *tree2,unsigned int e, int factor_flag, double factor){ extern double **var; extern double **expect; extern double **rootedexpect; extern int mode; extern int nodecount; extern int branches; extern FILE *variance_file_p; extern int is_kappa; int a,b,d; double *det; CreatePMats (); /* Work out the information the user requires * If sampling then sample to estimate the nth percentile*/ if(ISMODE(PERCENTILE)){ det=calloc(1,sizeof(double)); det[0]=sample_percentile(tree,tree2,e,factor_flag,factor); if(ISMODE(CACHE)) wipe_cache(); } /* Else calculate the expected information matrix*/ else{ expectation(e,factor,factor_flag,tree,tree2); if(ISMODE(CACHE)) wipe_cache(); /* If the variances are wanted, then calculate and dump * the results to a file*/ if(ISMODE(VARIANCE)){ if(ISMODE(HKY) && NOTMODE(NOKAPPA)) is_kappa=1; d=branches+is_kappa; /* If we are working with rooted trees, the variance matrix * will already be in rooted form and so of a different size. * We must use the rooted expectation tree to create the variance * from E(X^2)*/ if(ISMODE(ROOTED)){ d=nodecount+is_kappa+((ISMODE(NODEASROOT))?1:2); planttree(expect,rootedexpect); for(a=0;a<d;a++) for(b=0;b<d;b++) var[a][b]-=rootedexpect[a][b]*rootedexpect[a][b]; } /* Same, but not rooted.*/ else{ for(a=0;a<d;a++) for(b=0;b<d;b++) var[a][b]-=expect[a][b]*expect[a][b]; } /* Dump the calculated variances to a file*/ for(a=0;a<d;a++){ for(b=0;b<d;b++) fprintf(variance_file_p,"%e,",var[a][b]); fprintf(variance_file_p,"\n"); } fprintf(variance_file_p,"\n"); is_kappa=0; } /* Work out the information required (either returns determinant * of expected information / estimated expected information or * an individual element, depending on what was required - * Note: recalculated rooted-expect again if doing variance calc. * Slightly wasteful*/ det=determinant(); } return det; }
void em(char* dataset, int k, char* start, char* dir) { FILE* lhood_fptr; char string[100]; int iteration; double convergence = 1, lhood = 0, lhood_old = 0; corpus* corpus; llna_model *model; llna_ss* ss; time_t t1,t2; double avg_niter, converged_pct, old_conv = 0; gsl_matrix *corpus_lambda, *corpus_nu, *corpus_phi_sum; short reset_var = 1; // read the data and make the directory corpus = read_data(dataset); mkdir(dir, S_IRUSR|S_IWUSR|S_IXUSR); // set up the log likelihood log file sprintf(string, "%s/likelihood.dat", dir); lhood_fptr = fopen(string, "w"); // run em model = em_initial_model(k, corpus, start); ss = new_llna_ss(model); corpus_lambda = gsl_matrix_alloc(corpus->ndocs, model->k); corpus_nu = gsl_matrix_alloc(corpus->ndocs, model->k); corpus_phi_sum = gsl_matrix_alloc(corpus->ndocs, model->k); time(&t1); init_temp_vectors(model->k-1); // !!! hacky iteration = 0; sprintf(string, "%s/%03d", dir, iteration); write_llna_model(model, string); do { printf("***** EM ITERATION %d *****\n", iteration); expectation(corpus, model, ss, &avg_niter, &lhood, corpus_lambda, corpus_nu, corpus_phi_sum, reset_var, &converged_pct); time(&t2); convergence = (lhood_old - lhood) / lhood_old; fprintf(lhood_fptr, "%d %5.5e %5.5e %5ld %5.5f %1.5f\n", iteration, lhood, convergence, (int) t2 - t1, avg_niter, converged_pct); if (((iteration % PARAMS.lag)==0) || isnan(lhood)) { sprintf(string, "%s/%03d", dir, iteration); write_llna_model(model, string); sprintf(string, "%s/%03d-lambda.dat", dir, iteration); printf_matrix(string, corpus_lambda); sprintf(string, "%s/%03d-nu.dat", dir, iteration); printf_matrix(string, corpus_nu); } time(&t1); if (convergence < 0) { reset_var = 0; if (PARAMS.var_max_iter > 0) PARAMS.var_max_iter += 10; else PARAMS.var_convergence /= 10; } else { maximization(model, ss); lhood_old = lhood; reset_var = 1; iteration++; } fflush(lhood_fptr); reset_llna_ss(ss); old_conv = convergence; } while ((iteration < PARAMS.em_max_iter) && ((convergence > PARAMS.em_convergence) || (convergence < 0))); sprintf(string, "%s/final", dir); write_llna_model(model, string); sprintf(string, "%s/final-lambda.dat", dir); printf_matrix(string, corpus_lambda); sprintf(string, "%s/final-nu.dat", dir); printf_matrix(string, corpus_nu); fclose(lhood_fptr); }
int EMclustering::EM(int k, int *IDX, bool spatial, bool att) { clusternum = k; MatrixXf x; /*if(spatial) { if(att) { x.resize(4,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataPos[i][0]; x(1,i) = dataPos[i][1]; x(2,i) = dataPos[i][2]; x(3,i) = dataDen[i]; } } else { x.resize(6,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataPos[i][0]; x(1,i) = dataPos[i][1]; x(2,i) = dataPos[i][2]; x(3,i) = dataVel[i][0]; x(4,i) = dataVel[i][1]; x(5,i) = dataVel[i][2]; } } } else {*/ if(att) { x.resize(1,dataDen.size()); for(int i=0;i<dataDen.size();i++) { x(0,i) = dataDen[i]; } //cerr<<x; //cerr<<endl; if(k>dataDen.size()) return -1; } else { x.resize(3,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataVel[i][0];//fabs(cos(-PI/4)*dataVel[i][0] - sin(-PI/4)*dataVel[i][1]); x(1,i) = dataVel[i][1];//fabs(sin(-PI/4)*dataVel[i][0] + cos(-PI/4)*dataVel[i][1]); x(2,i) = dataVel[i][2]; } if(k>dataSize) return -1; } //} //cout<<"EM for Gaussian mixture: running ... "<<endl; //cerr<<x<<endl; MatrixXf r =initialization(x,k);// kmeans(x,k);// //cerr<<"Initialization is Done"<<endl;//cerr<<r<<endl; VectorXi label(r.rows()); for(int i=0;i<r.rows();i++) { int index; float tmp1 = r.row(i).maxCoeff(&index); label(i) = index; }//cerr<<label<<endl; VectorXi tmpp(label.size()); VectorXi tmp2 = unique(label,tmpp); int tmpd = tmp2.size(); //cerr<<tmpd<<endl; MatrixXf tmpr(r.rows(),tmpd); for(int i=0;i<tmpd;i++) { tmpr.col(i) = r.col(tmp2(i)); }//cerr<<"done1"<<endl; r.resize(r.rows(),tmpd); r = tmpr;//cerr<<r.cols()<<endl; float tol = 1e-10; int max = 300; double llh = -9e+9; bool converged = false; int t = 1; //cerr<<"done1"<<endl; //gaussian_model model; int clusternum_error; MatrixXf tmpmodel; while(!converged&&t<max) { t = t + 1; gaussian_model model = maximization(x,r);//cerr<<t<<" "<<"max"<<endl; float tmpllh = llh; r = expectation(x,model,llh);//cerr<<t<<" "<<"exp"<<endl; for(int i=0;i<r.rows();i++) { int index; float tmp1 = r.row(i).maxCoeff(&index); label(i) = index; } VectorXi u = unique(label,tmpp);//cerr<<t<<" "<<u.size()<<" "<<r.cols()<<" "<<r.rows()<<endl; clusternum_error = clusternum - u.size(); if(r.cols()!=u.size()) { /* tmpr.resize(r.rows(),u.size()); for(int i=0;i<u.size();i++) { tmpr.col(i) = r.col(u(i)); } r.resize(r.rows(),u.size()); r = tmpr;//cerr<<"r"<<endl;*/ } else { if((llh - tmpllh)<tol*abs(llh)) converged = true; else converged = false; } //cerr<<"t"<<t<<endl; //return_model = model; tmpmodel.resize(model.mu.rows(),model.mu.cols()); //return_model = model.mu; tmpmodel = model.mu; u.resize(0); //cerr<<tmpmodel<<endl; } /*ofstream off2("rr"); off2<<r.row(0)<<endl; for(int i=1;i<r.rows();i++) if(r.row(i)!=r.row(i-1)) {off2<<x.col(i)<<" "; off2<<r.row(i)<<endl;} off2.close();*/ cerr<<clusternum_error<<endl; return_model = tmpmodel; //cerr<<label<<endl; if (converged) cerr<<"Converged in "<<t-1<<endl; else cerr<<max<<endl; //cerr<<t-1<<endl; for(int i=0;i<label.size();i++) { IDX[i] = label(i); //cerr<<IDX[i]<<" "; }//cerr<<endl; //cerr<<label.size()<<endl; x.resize(0,0); r.resize(0,0); tmpr.resize(0,0); tmpmodel.resize(0,0); label.resize(0); tmpp.resize(0); tmp2.resize(0); return clusternum_error; }
inline void verifySorted(T start, T end, Verdict verdict = Verdict::WA, Compare comp = Compare()){ verify(std::is_sorted(start, end, comp), verdict, expectation("Sorted range", rangeToString(start, end))); }
inline void verifyEqual(T&& t, T&& u, Verdict verdict = Verdict::WA, Equal equal = Equal()){ verify(equal(t, u), verdict, expectation(t, u)); }
void mmEM::training(param myParam) { vector_2str wordX; vector_2str wordY; bool stillTrain = true; // reading file // readFileXY(myParam, myParam.inputFilename, &wordX, &wordY); // initialization // cout << "Initialization ... "; initialization(myParam, wordX, wordY); // maximization // cout << "Maximization ... " << endl; maximization(myParam); // lhuang: M-step after initialize (count = 1) cout << endl; int atIter = 0; // still training // global_index--; vector<double> probs_vector(global_index), counts_vector(global_index), newprobs_vector(global_index),alpha_vector(global_index); while (stillTrain) { atIter++; cout << "Iteration " << atIter << endl; // for each x and y pair // cout << "Expectation ... " ; for (int i = 0; i < wordX.size(); i++) { // expectation // expectation(myParam, wordX[i], wordY[i]); } if (!myParam.ashish) { cout << "Maximization ... "; long double totalChange = maximization(myParam); // lhuang: real M-step cout << "Total probability change = " << totalChange << endl; // stop by the probability change condition // if ((totalChange <= myParam.cutOff) && (myParam.cutOff < 1)) { stillTrain = false; } } else { cout << "Ashish" ; double sum_probs = 0; for(hash_2StrDouble::iterator pos = index.begin(); pos != index.end(); pos++) { for (hash_StrDouble::iterator pos2 = (pos->second).begin(); pos2 != (pos->second).end(); pos2++) { counts_vector[index[pos->first][pos2->first]-1] = counts[pos->first][pos2->first]; // cout << pos->first << " "<< pos2->first << " " // << counts_vector[index[pos->first][pos2->first]-1] << endl; probs_vector[index[pos->first][pos2->first]-1] = probs[pos->first][pos2->first]; alpha_vector[index[pos->first][pos2->first]-1] = alphas[pos->first][pos2->first]; sum_probs += probs_vector[index[pos->first][pos2->first]-1]; } } cout << "sum " << sum_probs <<endl; // getchar(); sum_probs = 0; for (int i=0; i<newprobs_vector.size(); i++) { // cout << counts_vector[i] << " " << probs_vector[i] << " "; sum_probs += probs_vector[i]; } cout << "sum vector " << sum_probs <<endl; // getchar(); projectedGradientDescentWithArmijoRule(counts_vector, probs_vector, newprobs_vector,alpha_vector,myParam.beta,myParam.pgdIter); for(hash_2StrDouble::iterator pos = index.begin(); pos != index.end(); pos++) { for (hash_StrDouble::iterator pos2 = (pos->second).begin(); pos2 != (pos->second).end(); pos2++) { // cout << pos->first << " "<< pos2->first << " " // << probs[pos->first][pos2->first] << " " // << counts[pos->first][pos2->first] << " " // << newprobs_vector[index[pos->first][pos2->first]-1] << endl; probs[pos->first][pos2->first] = newprobs_vector[index[pos->first][pos2->first]-1]; } } // for (int i=0; i<newprobs_vector.size(); i++) // { // cout << newprobs_vector[i] << " " ; // } cout<< endl; // getchar(); // clean counts // counts.clear(); } // stop by the number of iteration condition // if ((myParam.cutOff >= 1) && (atIter >= myParam.cutOff)) { stillTrain = false; } } cout << endl; }