double ran_normal() { static int iset = 0; static double gset; double fac, rsq, v1, v2; if (iset == 0) { do { v1 = 2.0 * ran_uniform() - 1.0; v2 = 2.0 * ran_uniform() - 1.0; rsq = v1 * v1 + v2 * v2; } while (rsq >= 1.0 || rsq == 0.0); fac = sqrt(-2.0 * log(rsq) / rsq); gset = v1 * fac; iset = 1; return v2 * fac; } else { iset = 0; return gset; } }
void logistic_regression::fit(std::string train_dir,double stop){ srand(time(0)); File_Control fc(train_dir); stop *= (1000*1000); long pos = 0; long neg = 0; no_train = 0; update = true; //训练第几遍文件 int indx = 0; for(int i=0;i<iter;i++){ std::cout<<"训练第"<<i<<"遍文件"<<std::endl; while(fc.has_next() && indx <= stop){ std::string next = fc.next(); if(next.size()>3){ //subsampling double ran = ran_uniform(); std::pair<int,Sparse_Vector> result = sparse_vector_form(next); if(result.first==1 || (ran<1)){ if (result.first==1) pos ++ ; else if (result.first==0) neg ++ ; one_round(result.first,result.second); } indx ++ ; if (indx%(verbose*1000)==0) std::cout<<"indx "<<std::setw(5)<<(double)indx/(1000*1000)<<"M |w "<<w.size()<<" |z "<<z.size() \ <<" |g "<<g.size()<<" |no_train "<<no_train<<" |n "<<n.size() \ <<" |pos "<<pos<<" |neg "<<neg<<std::endl; } } if(indx>=stop){ indx = 0; } indx = 0; fc.restart(); } }
const int poisson_random_number(const double lambda) //Poisson distribution { int k=0; const int max_k = 1000; double p = ran_uniform(); double P = exp(-lambda); double sum=P; if (sum>=p) return 0; for (k=1; k<max_k; ++k) { P*=lambda/(double)k; sum+=P; if (sum>=p) break; } return k; }