QAction TableUtility::select_action (QState state) { size_t idx = getIndex(getEntry(state, REAL_MINIMUM)); size_t i; REAL total_prob = 0, p; for (i = 0; i < size[STATE_SIZE]; i++) total_prob += (prob[i] = exp(table[idx+i] / temperature)); p = randu() * total_prob; for (i = 0; p -= prob[i], p >= 0; i++) ; assert(i < size[STATE_SIZE]); return value[STATE_SIZE][i]; }
void bub_1_proc(void) { ADDRESS animframe; OBJECT *obj; find_ani2_part2(ANIM_F1_FRIEND); animframe=(ADDRESS)COMPUTE_ADDR((current_proc->pa8)->oheap,GET_LONG(current_proc->pa9)); current_proc->a11=(ADDRESS)current_proc->pa8; gso_dmawnz(obj,(ADDRESS)animframe,(current_proc->pa8)->oheap,0); alloc_cache((OIMGTBL *)animframe,(current_proc->pa8)->oheap,obj); lineup_1pwm(obj,(OBJECT*)current_proc->a11); insert_object(obj,&objlst); ((ADDRESS *)current_proc->pa9)++; framew(1); set_proj_vel(current_proc->pa8,SCX(0x20000)+randu(SCX(0x30000)),-1); (current_proc->pa8)->oyvel.pos=srandarc(SCY(0x20000)); process_sleep(20+randu(30)); framew(4); delobjp(current_proc->pa8); process_suicide(); }
/* NOTE: This is identical to randmtzig_gv_randn() below except for the random number generation */ double randmtzig_randn (dsfmt_t *dsfmt) { while (1) { /* arbitrary mantissa (selected by randi, with 1 bit for sign) */ const randmtzig_uint64_t r = randi(dsfmt); const randmtzig_int64_t rabs=r>>1; const int idx = (int)(rabs&0xFF); const double x = ( r&1 ? -rabs : rabs) * wi[idx]; if (rabs < (randmtzig_int64_t)ki[idx]) { return x; /* 99.3% of the time we return here 1st try */ } else if (idx == 0) { /* As stated in Marsaglia and Tsang * * For the normal tail, the method of Marsaglia[5] provides: * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, * then return r+x. Except that r+x is always in the positive * tail!!!! Any thing random might be used to determine the * sign, but as we already have r we might as well use it * * [PAK] but not the bottom 8 bits, since they are all 0 here! */ double xx, yy; do { xx = - ZIGGURAT_NOR_INV_R * log (randu(dsfmt)); yy = - log (randu(dsfmt)); } while ( yy+yy <= xx*xx); return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); } else if ((fi[idx-1] - fi[idx]) * randu(dsfmt) + fi[idx] < exp(-0.5*x*x)) { return x; } } }
void TableUtility::init () { assert(free_dimen == 0); assert(table != NULL); size_t i; for (i = 0; i < total_size; i++) table[i] = randu()*0; printf("total size is %d\n", total_size); #ifdef OUTPUT_STAT_MIN_MAX for (i = 0; i < dimen; i++) { stat_min[i] = vmax[i]; stat_max[i] = vmin[i]; } #endif }
/* fname: path to input image */ void Dip2::generateNoisyImages(string fname){ // load image, force gray-scale cout << "load original image" << endl; Mat img = imread(fname, 0); if (!img.data){ cerr << "ERROR: file " << fname << " not found" << endl; cout << "Press enter to exit" << endl; cin.get(); exit(-3); } // convert to floating point precision img.convertTo(img,CV_32FC1); cout << "done" << endl; // save original imwrite("original.jpg", img); // generate images with different types of noise cout << "generate noisy images" << endl; // some temporary images Mat tmp1(img.rows, img.cols, CV_32FC1); Mat tmp2(img.rows, img.cols, CV_32FC1); // first noise operation float noiseLevel = 0.15; randu(tmp1, 0, 1); threshold(tmp1, tmp2, noiseLevel, 1, CV_THRESH_BINARY); multiply(tmp2,img,tmp2); threshold(tmp1, tmp1, 1-noiseLevel, 1, CV_THRESH_BINARY); tmp1 *= 255; tmp1 = tmp2 + tmp1; threshold(tmp1, tmp1, 255, 255, CV_THRESH_TRUNC); // save image imwrite("noiseType_1.jpg", tmp1); // second noise operation noiseLevel = 50; randn(tmp1, 0, noiseLevel); tmp1 = img + tmp1; threshold(tmp1,tmp1,255,255,CV_THRESH_TRUNC); threshold(tmp1,tmp1,0,0,CV_THRESH_TOZERO); // save image imwrite("noiseType_2.jpg", tmp1); cout << "done" << endl; cout << "Please run now: dip2 restorate" << endl; }
void boomb1(void) { (OBJECT *)current_proc->a11=current_proc->pa8; get_jt_blood; a11_blood_lineup(SCX(0x0),SCY(0x20)); (current_proc->pa8)->oyvel.pos=srandarc(SCY(0x10000)); set_proj_vel(current_proc->pa8,SCX(0x10000)+randu(SCX(0x20000)),-1); flip_single(current_proc->pa8); insert_object(current_proc->pa8,&objlst); current_proc->pa9=a_bigger; framew(6); blood_death(current_proc->pa8); }
/* Generates <sample> from multivariate normal distribution, where <mean> - is an average row vector, <cov> - symmetric covariation matrix */ void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples ) { Mat mean = _mean.getMat(), cov = _cov.getMat(); int dim = (int)mean.total(); _samples.create(nsamples, dim, CV_32F); Mat samples = _samples.getMat(); randu(samples, 0., 1.); Mat utmat; Cholesky(cov, utmat); int flags = mean.cols == 1 ? 0 : GEMM_3_T; for( int i = 0; i < nsamples; i++ ) { Mat sample = samples.row(i); gemm(sample, utmat, 1, mean, 1, sample, flags); } }
Mat ELM::generateBias(){ /*Mat bias = (Mat_<double>(3,1)<<0.1097, 0.5658,0.2743);*/ Mat bias (ELM::network->getHiddenLayerCount(),1,CV_64F); //srand((unsigned)time(0)); //RAND_MAX << 3; //for (int y = 0; y < ELM::network->getHiddenLayerCount(); y++) { // bias.at<double>(y,0) = (double)rand()/(double)RAND_MAX; // if(((rand()%2)-1)==-1){ // bias.at<double>(y,0) = (double)rand()/(double)RAND_MAX * ((rand()%2)-1); // } //} randu(bias, Scalar::all(-0.9), Scalar::all(0.9)); return bias ; }
int multimodal_resampling(struct particle p[], int N) { //double *sum; //sum = (double*)calloc(N,sizeof(double)); double sum[N]; cum_sum(p,sum,N); double x = randu(0,1); for(int i = 0;i<N;i++){ if(sum[i]>x){ //free(sum); return i; } } return -1; //error }
TEST(Core_DFT, complex_output2) { for( int i = 0; i < 100; i++ ) { int type = theRNG().uniform(0, 2) ? CV_64F : CV_32F; int m = theRNG().uniform(1, 10); int n = theRNG().uniform(1, 10); Mat x(m, n, type), out; randu(x, -1., 1.); dft(x, out, DFT_ROWS | DFT_COMPLEX_OUTPUT); double nrm = cvtest::norm(out, NORM_INF); double thresh = n*m*2; if( nrm > thresh ) { cout << "x: " << x << endl; cout << "out: " << out << endl; ASSERT_LT(nrm, thresh); } } }
Mat ELM::generateInputWeight(){ /*Mat weight = (Mat_<double>(3,3)<<-0.2247, -0.2128, 0.900, 0.7915, 0.3509, 0.2494, 0.7755, -0.4956, -0.5861);*/ Mat weight(ELM::network->getHiddenLayerCount(),ELM::network->getInputCount(), CV_64F); //srand((unsigned)time(0)); //RAND_MAX << 3; //for (int y = 0; y < ELM::network->getHiddenLayerCount(); y++) { // for(int x = 0; x < ELM::network->getInputCount();x++) // { // weight.at<double>(y,x) = (double)rand()/(double)RAND_MAX; // if(((rand()%2)-1)==-1){ // weight.at<double>(y,x) = (double)rand()/(double)RAND_MAX * ((rand()%2)-1); // } // } //} randu(weight, Scalar::all(-0.9), Scalar::all(0.9)); return weight; }
int main() { h1 hist1, hist2; /* create two histograms */ int i; h1init(&hist1,0.,100.); /* init the histograms */ h1init(&hist2,0.,100.); /* using the range 0:100 */ for (i=0; i<1000; i++) { /* fill hist1 w/ uniformly distributed numbers*/ h1fill(&hist1,randu(0.,100.),1.0); h1fill(&hist2,randn(50.,20.),1.0); } h1dump(&hist1,""); // dumps histogram data to screen h1dump(&hist1,"hist1.dat"); // dumps histogram data to file h1plot(&hist1,""); // plots histogram to screen, X11 assumed h1plot(&hist1,"hist1.ps"); // plot histogram to in ps format to file h1dump(&hist2,""); // dumps histogram data to file h1plot(&hist2,"" ); // plots histogram to screen, X11 assumed return 0; }
void SaltAndPepperNoise(std::string destFoldPath, cv::Mat Im) { cv::Mat saltpepper_noise = cv::Mat::zeros(Im.rows, Im.cols, CV_8U); randu(saltpepper_noise, 0, 255); cv::Mat black = saltpepper_noise < 5; cv::Mat white = saltpepper_noise > 250; cv::Mat salt_img = Im.clone(); cv::Mat pepper_img = Im.clone(); cv::Mat saltpepper_img = Im.clone(); saltpepper_img.setTo(255, white); saltpepper_img.setTo(0, black); salt_img.setTo(255, white); pepper_img.setTo(0, black); cv::imwrite(destFoldPath + "_salt.jpg", salt_img); cv::imwrite(destFoldPath + "_pepper.jpg", pepper_img); cv::imwrite(destFoldPath + "_saltpepper.jpg", saltpepper_img); }
int main(){ h1 hist1, hist2; // create two histograms int i; h1init(&hist1,0.,100.); // init the histograms h1init(&hist2,0.,100.); // using the range 0:100 for (i=0; i<1000; i++) { double u1=randu(0.,100.); h1fill(&hist1,u1); // fill hist1 w/ uniform distro. double n1=randn(50.,20.); h1fill(&hist2,n1); // fill hist2 w/ normal distro. } h1dump(&hist1,""); // dumps histogram data to screen h1dump(&hist1,"hist1.dat"); // dumps histogram data to file h1plot(&hist1,""); // plots histogram to screen, X11 assumed h1plot(&hist1,"hist1.pdf"); // plots histogram to in pdf format to file h1dump(&hist2,"hist2.dat"); // dumps histogram data to file h1plot(&hist2,"" ); // plots histogram to screen, X11 assumed h1plot(&hist2,"hist2.pdf" ); return 0; }
/** Get a poisson random number. */ long randp(rand_t *rstat, double xm){ double g, t; const long thres=200;/*use gaussian distribution instead*/ double xu,xmu; long x; x=0; if(xm>thres){ x=(long)round(xm+randn(rstat)*sqrt(xm)); }else{ while(xm>0){ xmu = xm > 12 ? 12 : xm; xm-=xmu; g=exp(-xmu); xu=-1; t=1; while(t>g){ xu++; t*=randu(rstat); } x+=xu; } } return x; }
TEST(BarPlot, ColourCycler) { TestPlotArg< BarPlot >("BarPlot_ColourCycler", [](Plot & plot) { plot.AddColourCycler(Palette::Diverging::RdYlBu); PVec x = (vec)(round(randu(10) * 100)); BarPlot f(std::vector< std::pair< PVec, PVec >> { { x, (vec)(randu(10) * 100)}, { x, (vec)(randu(10) * 100) }, { x, (vec)(randu(10) * 100) }, { x, (vec)(randu(10) * 100) }, { x, (vec)(randu(10) * 100) } }, { "1", "2", "3", "4", "5" }); f.UseColourCycler(plot.GetColourCycler()); return f; }); }
GLFFTWater::GLFFTWater(GLFFTWaterParams ¶ms) { #ifdef _WIN32 m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4); #else posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N)); #endif m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N)); m_heightmap = new float3[(params.N)*(params.N)]; m_params = params; std::tr1::mt19937 prng(1337); std::tr1::normal_distribution<float> normal; std::tr1::uniform_real<float> uniform; std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal); std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform); for(int i=0, k=0; i<params.N; i++) { float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L); for(int j=0; j<params.N; j++, k++) { float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L); float A = randn(); float theta = randu()*2.f*3.141592654f; float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k])); m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta); } } m_kz = new float[params.N*(params.N / 2 + 1)]; m_kx = new float[params.N*(params.N / 2 + 1)]; const int hN = m_params.N / 2; for(int y=0; y<m_params.N; y++) { float kz = (float) (y - hN); for(int x=0; x<=hN; x++) { float kx = (float) (x - hN); float k = 1.f/sqrtf(kx*kx+kz*kz); m_kz[y*(hN+1)+x] = kz*k; m_kx[y*(hN+1)+x] = kx*k; } } if(!fftwf_init_threads()) { cerr << "Error initializing multithreaded fft." << endl; } else { fftwf_plan_with_nthreads(2); } m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, FFTW_ESTIMATE); glGenTextures(1, &m_texId); glBindTexture(GL_TEXTURE_2D, m_texId); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0); glBindTexture(GL_TEXTURE_2D, 0); }
void processNet(std::string weights, std::string proto, std::string halide_scheduler, const Mat& input, const std::string& outputLayer, const std::string& framework) { if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL) { #if defined(HAVE_OPENCL) if (!cv::ocl::useOpenCL()) #endif { throw cvtest::SkipTestException("OpenCL is not available/disabled in OpenCV"); } } if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) throw SkipTestException("Skip OpenCL target of Inference Engine backend"); randu(input, 0.0f, 1.0f); weights = findDataFile(weights, false); if (!proto.empty()) proto = findDataFile(proto, false); if (backend == DNN_BACKEND_HALIDE) { if (halide_scheduler == "disabled") throw cvtest::SkipTestException("Halide test is disabled"); if (!halide_scheduler.empty()) halide_scheduler = findDataFile(std::string("dnn/halide_scheduler_") + (target == DNN_TARGET_OPENCL ? "opencl_" : "") + halide_scheduler, true); } if (framework == "caffe") { net = cv::dnn::readNetFromCaffe(proto, weights); } else if (framework == "torch") { net = cv::dnn::readNetFromTorch(weights); } else if (framework == "tensorflow") { net = cv::dnn::readNetFromTensorflow(weights, proto); } else CV_Error(Error::StsNotImplemented, "Unknown framework " + framework); net.setInput(blobFromImage(input, 1.0, Size(), Scalar(), false)); net.setPreferableBackend(backend); net.setPreferableTarget(target); if (backend == DNN_BACKEND_HALIDE) { net.setHalideScheduler(halide_scheduler); } MatShape netInputShape = shape(1, 3, input.rows, input.cols); size_t weightsMemory = 0, blobsMemory = 0; net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory); int64 flops = net.getFLOPS(netInputShape); CV_Assert(flops > 0); net.forward(outputLayer); // warmup std::cout << "Memory consumption:" << std::endl; std::cout << " Weights(parameters): " << divUp(weightsMemory, 1u<<20) << " Mb" << std::endl; std::cout << " Blobs: " << divUp(blobsMemory, 1u<<20) << " Mb" << std::endl; std::cout << "Calculation complexity: " << flops * 1e-9 << " GFlops" << std::endl; PERF_SAMPLE_BEGIN() net.forward(); PERF_SAMPLE_END() SANITY_CHECK_NOTHING(); }
void Test_stack2h(CuTest * tc){ printf("Testing function: tensor_stack2h_3d\n"); size_t ii,jj,kk; size_t nvals_a[3]; nvals_a[0] = 3; nvals_a[1] = 2; nvals_a[2] = 4; struct tensor * a; init_tensor(&a, 3, nvals_a); for (ii = 0; ii < nvals_a[0]*nvals_a[1]*nvals_a[2]; ii++){ a->vals[ii] = randu(); } size_t nvals_b[3]; nvals_b[0] = 3; nvals_b[1] = 2; nvals_b[2] = 6; struct tensor * b; init_tensor(&b, 3, nvals_b); for (ii = 0; ii < nvals_b[0]*nvals_b[1]*nvals_b[2]; ii++){ b->vals[ii] = randu(); } struct tensor * c = tensor_stack2h_3d(a,b); CuAssertIntEquals(tc, 3, c->dim); CuAssertIntEquals(tc, nvals_a[0], c->nvals[0]); CuAssertIntEquals(tc, nvals_a[1], c->nvals[1]); CuAssertIntEquals(tc, nvals_a[2] + nvals_b[2], c->nvals[2]); size_t elem1[3]; size_t elem2[3]; double v1; double v2; for (ii = 0; ii < nvals_a[0]; ii++){ elem1[0] = ii; elem2[0] = ii; for (jj = 0; jj < nvals_a[1]; jj++){ elem1[1] = jj; elem2[1] = jj; for (kk = 0; kk < nvals_a[2]; kk++){ elem1[2] = kk; v1 = tensor_elem(c,elem1); v2 = tensor_elem(a,elem1); CuAssertDblEquals(tc, v2, v1, 1e-14); } for (kk = nvals_a[2]; kk < c->nvals[2]; kk++){ elem2[2] = kk - nvals_a[2]; elem1[2] = kk; v1 = tensor_elem(c,elem1); v2 = tensor_elem(b,elem2); CuAssertDblEquals(tc, v2, v1, 1e-14); } } } free_tensor(&a); free_tensor(&b); free_tensor(&c); }
int main(int argc, char *argv[]){ enum{ P_EXE, P_FRAC, P_NSTEP, P_TOT, }; if(argc!=P_TOT){ info2("Usage: \n\tenv MVM_CLIENT=hostname MVM_PORT=port MVM_SASTEP=sastep ./mvm_cpu fraction nstep\n"); _Exit(0); } int fraction=strtol(argv[P_FRAC], NULL, 10); int nstep=strtol(argv[P_NSTEP], NULL, 10); int nstep0=nstep>1?20:0;//warm up dmat *d_saind=dread("NFIRAOS_saind"); const int nsa=(d_saind->nx-1)/fraction; int *saind=mymalloc((1+nsa),int); for(int i=0; i<nsa+1; i++){ saind[i]=(int)d_saind->p[i]; } dfree(d_saind); const int totpix=saind[nsa]; const int nact=6981;//active subapertures. int ng=nsa*2; float FSMdelta=-0.2; smat *dm=snew(nact,1); smat *mvm=snew(nact, ng); smat *mtch=snew(totpix*2,1); smat *grad=snew(ng,1); smat *im0=snew(totpix,3); short *pix=mymalloc(totpix,short); short *pixbias=mymalloc(totpix,short); { rand_t rseed; seed_rand(&rseed, 1); srandu(mvm, 1e-7, &rseed); srandu(mtch, 1, &rseed); for(int i=0; i<totpix; i++){ pix[i]=(short)(randu(&rseed)*25565); pixbias[i]=(short)(randu(&rseed)*1000); } } smat *mvmt=strans(mvm); int sastep=200;//how many subapertures each time int nrep=1; if(getenv("MVM_NREP")){ nrep=strtol(getenv("MVM_NREP"), NULL, 10); } if(getenv("MVM_SECT")){ sastep=nsa/strtol(getenv("MVM_SECT"), NULL, 10); } if(getenv("MVM_TRANS")){ use_trans=strtol(getenv("MVM_TRANS"), NULL, 10); } if(getenv("MVM_SASTEP")){ sastep=strtol(getenv("MVM_SASTEP"), NULL, 10); } info2("use_trans=%d, nrep=%d, sastep=%d\n", use_trans, nrep, sastep); int sock=-1; char* MVM_CLIENT=getenv("MVM_CLIENT"); if(MVM_CLIENT){ short port=(short)strtol(getenv("MVM_PORT"), NULL, 10); sock=connect_port(MVM_CLIENT, port, 0 ,1); if(sock!=-1) { info2("Connected\n"); int cmd[7]; cmd[0]=nact; cmd[1]=nsa; cmd[2]=sastep; cmd[3]=totpix; cmd[4]=nstep; cmd[5]=nstep0; cmd[6]=2; if(stwriteintarr(sock, cmd, 7) || stwriteintarr(sock, saind, nsa+1) || stwrite(sock, pix, sizeof(short)*totpix)){ close(sock); sock=-1; warning("Failed: %s\n", strerror(errno)); } } } int ready=0; if(sock!=-1 && stwriteint(sock, ready)){ warning("error send ready signal: %s\n", strerror(errno)); close(sock); sock=-1; } smat *timing=snew(nstep, 1); TIC; float timtot=0, timmax=0, timmin=INFINITY; set_realtime(-1, -20); for(int jstep=-nstep0; jstep<nstep; jstep++){ int istep=jstep<0?0:jstep; tic; double theta=M_PI*0.5*istep+FSMdelta; float cd=cos(theta); float sd=cos(theta); szero(dm); for(int isa=0; isa<nsa; isa+=sastep){ int npixleft; int nsaleft; if(nsa<isa+sastep){//terminate npixleft=totpix-saind[isa]; nsaleft=nsa-isa; }else{ npixleft=saind[isa+sastep]-saind[isa]; nsaleft=sastep; } short *pcur=pix+saind[isa]; if(sock!=-1){ if(stread(sock, pcur, sizeof(short)*npixleft)){ warning("failed: %s\n", strerror(errno)); close(sock); sock=-1; _Exit(1); } if(isa==0) tic; } //Matched filter mtch_do(mtch->p, pix, pixbias, grad->p+isa*2, im0->p, im0->p+totpix, im0->p+totpix*2, saind+isa, nsaleft, cd, sd); //MVM for(int irep=0; irep<nrep; irep++){ if(use_trans){ mvmt_do(mvmt->p+isa*2, grad->p+isa*2,dm->p, nact, nsaleft*2, ng); }else{ mvm_do(mvm->p+isa*2*nact, grad->p+isa*2, dm->p, nact, nsaleft*2); } } }//for isa if(sock!=-1){ if(stwrite(sock, dm->p, sizeof(float)*nact)){ warning("error write dmres: %s\n", strerror(errno)); close(sock); sock=-1; _Exit(1); } if(streadint(sock, &ready)){//acknowledgement. warning("error read ack failed: %s\n", strerror(errno)); close(sock), sock=-1; _Exit(1); } timing->p[istep]=ready*1.e-6; }else{ timing->p[istep]=toc3;//do not tic. } if(jstep==istep){ timtot+=timing->p[istep]; if(timmax<timing->p[istep]){ timmax=timing->p[istep]; } if(timmin>timing->p[istep]){ timmin=timing->p[istep]; } } }//for istep float timmean=timtot/nstep; info2("Timing is mean %.3f, max %.3f min %.3f. BW is %.1f of 51.2GB/s\n", timmean*1e3, timmax*1e3, timmin*1e3, nrep*(nact*ng+nact+ng)*sizeof(float)/timmean/(1024*1024*1024)); writebin(timing, "cpu_timing_%s", HOST); if(nstep==1){ writearr("cpu_pix", 1, sizeof(short), M_INT16, NULL, pix, totpix, 1); writearr("cpu_pixbias", 1, sizeof(short), M_INT16, NULL, pixbias, totpix, 1); writebin(dm, "cpu_dm"); writebin(grad, "cpu_grad"); writebin(mvm, "cpu_mvm"); writebin(mtch, "cpu_mtch"); } }
/** * The implementation of the particle filter using OpenMP for a single image * @see http://openmp.org/wp/ * @note This function is designed to work with a single image. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @warning Use the other particle filter function for videos; the accuracy of this function decreases significantly as it is called repeatedly while processing video * @param I The image to be run * @param IszX The x dimension of the image * @param IszY The y dimension of the image * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used * @param x_loc The array that will store the x locations of the desired object * @param y_loc The array that will store the y locations of the desired object * @param prevX The starting x position of the object * @param prevY The starting y position of the object */ void particleFilter1F(int * I, int IszX, int IszY, int * seed, int Nparticles, double * x_loc, double * y_loc, double prevX, double prevY){ int max_size = IszX*IszY; /*original particle centroid*/ double xe = prevX; double ye = prevY; /*expected object locations, compared to center*/ int radius = 5; int diameter = radius*2 -1; int * disk = (int *)mxCalloc(diameter*diameter, sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)mxCalloc(countOnes*2, sizeof(double)); getneighbors(disk, countOnes, objxy, radius); /*initial weights are all equal (1/Nparticles)*/ double * weights = (double *)mxCalloc(Nparticles, sizeof(double)); #pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } /*initial likelihood to 0.0*/ double * likelihood = (double *)mxCalloc(Nparticles, sizeof(double)); double * arrayX = (double *)mxCalloc(Nparticles, sizeof(double)); double * arrayY = (double *)mxCalloc(Nparticles, sizeof(double)); double * xj = (double *)mxCalloc(Nparticles, sizeof(double)); double * yj = (double *)mxCalloc(Nparticles, sizeof(double)); double * CDF = (double *)mxCalloc(Nparticles, sizeof(double)); double * u = (double *)mxCalloc(Nparticles, sizeof(double)); mxArray * arguments[4]; mxArray * mxIK = mxCreateDoubleMatrix(IszX, IszY, mxREAL); mxArray * mxObj = mxCreateDoubleMatrix(countOnes, 2, mxREAL); mxArray * mxX = mxCreateDoubleMatrix(1, Nparticles, mxREAL); mxArray * mxY = mxCreateDoubleMatrix(1, Nparticles, mxREAL); double * Ik = (double *)mxCalloc(IszX*IszY, sizeof(double)); mxArray * result = mxCreateDoubleMatrix(1, Nparticles, mxREAL); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } int k; int indX, indY; /*apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction*/ #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += 1 + 5*randn(seed, x); arrayY[x] += -2 + 2*randn(seed, x); } /*particle filter likelihood*/ //get the current image for(x = 0; x < IszX; x++) { for(y = 0; y < IszY; y++) { Ik[x*IszX + y] = (double)I[x*IszY + y]; } } //copy arguments memcpy(mxGetPr(mxIK), Ik, sizeof(double)*IszX*IszY); memcpy(mxGetPr(mxObj), objxy, sizeof(double)*countOnes); memcpy(mxGetPr(mxX), arrayX, sizeof(double)*Nparticles); memcpy(mxGetPr(mxY), arrayY, sizeof(double)*Nparticles); arguments[0] = mxIK; arguments[1] = mxObj; arguments[2] = mxX; arguments[3] = mxY; mexCallMATLAB(1, &result, 4, arguments, "GetSimpleLikelihood"); memcpy(likelihood, result, sizeof(double)*Nparticles); /* update & normalize weights // using equation (63) of Arulampalam Tutorial*/ #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } double sumWeights = 0; #pragma omp parallel for private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } xe = 0; ye = 0; /* estimate the object location by expected values*/ #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } x_loc[0] = xe+.5; y_loc[0] = ye+.5; /*display(hold off for now) //pause(hold off for now) //resampling*/ CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } double u1 = (1/((double)(Nparticles)))*randu(seed, 0); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } int j, i; #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); /*i = findIndexBin(CDF, 0, Nparticles, u[j]);*/ if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } /*reassign arrayX and arrayY*/ #pragma omp parallel for shared(weights, arrayX, arrayY, xj, yj, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); arrayX[x] = xj[x]; arrayY[x] = yj[x]; } mxFree(disk); mxFree(weights); mxFree(objxy); mxFree(likelihood); mxFree(arrayX); mxFree(arrayY); mxFree(CDF); mxFree(u); mxFree(xj); mxFree(yj); mxFree(Ik); }
void function_minimizer::shmc_mcmc_routine(int nmcmc,int iseed0,double dscale, int restart_flag) { if (nmcmc<=0) { cerr << endl << "Error: Negative iterations for MCMC not meaningful" << endl; ad_exit(1); } uostream * pofs_psave=NULL; if (mcmc2_flag==1) { initial_params::restore_start_phase(); } initial_params::set_inactive_random_effects(); initial_params::set_active_random_effects(); int nvar_re=initial_params::nvarcalc(); int nvar=initial_params::nvarcalc(); // get the number of active parameters if (mcmc2_flag==0) { initial_params::set_inactive_random_effects(); nvar=initial_params::nvarcalc(); // get the number of active parameters } initial_params::restore_start_phase(); independent_variables parsave(1,nvar_re); // dvector x(1,nvar); // initial_params::xinit(x); // dvector pen_vector(1,nvar); // { // initial_params::reset(dvar_vector(x),pen_vector); // } initial_params::mc_phase=1; int old_Hybrid_bounded_flag=-1; int on,nopt = 0; //// ------------------------------ Parse input options // Step size. If not specified, will be adapted. If specified must be >0 // and will not be adapted. double eps=0.1; double _eps=-1.0; int useDA=1; // whether to adapt step size if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-hyeps",nopt))>-1) { if (!nopt) // not specified means to adapt, using function below to find reasonable one { cerr << "Warning: No step size given after -hyeps, ignoring" << endl; useDA=1; } else // read in specified value and do not adapt { istringstream ist(ad_comm::argv[on+1]); ist >> _eps; if (_eps<=0) { cerr << "Error: step size (-hyeps argument) needs positive number"; ad_exit(1); } else { eps=_eps; useDA=0; } } } // Chain number -- for console display purposes only int chain=1; if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-chain",nopt))>-1) { if (nopt) { int iii=atoi(ad_comm::argv[on+1]); if (iii <1) { cerr << "Error: chain must be >= 1" << endl; ad_exit(1); } else { chain=iii; } } } // Number of leapfrog steps. Defaults to 10. int L=10; if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-hynstep",nopt))>-1) { if (nopt) { int _L=atoi(ad_comm::argv[on+1]); if (_L < 1 ) { cerr << "Error: hynstep argument must be integer > 0 " << endl; ad_exit(1); } else { L=_L; } } } // Number of warmup samples if using adaptation of step size. Defaults to // half of iterations. int nwarmup= (int)nmcmc/2; if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-nwarmup",nopt))>-1) { if (nopt) { int iii=atoi(ad_comm::argv[on+1]); if (iii <=0 || iii > nmcmc) { cerr << "Error: nwarmup must be 0 < nwarmup < nmcmc" << endl; ad_exit(1); } else { nwarmup=iii; } } } // Target acceptance rate for step size adaptation. Must be // 0<adapt_delta<1. Defaults to 0.8. double adapt_delta=0.8; // target acceptance rate specified by the user if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_delta",nopt))>-1) { if (nopt) { istringstream ist(ad_comm::argv[on+1]); double _adapt_delta; ist >> _adapt_delta; if (_adapt_delta < 0 || _adapt_delta > 1 ) { cerr << "Error: adapt_delta must be between 0 and 1" " using default of 0.8" << endl; } else { adapt_delta=_adapt_delta; } } } // Use diagnoal covariance (identity mass matrix) int diag_option=0; if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcdiag"))>-1) { diag_option=1; cout << " Setting covariance matrix to diagonal with entries " << dscale << endl; } // Restart chain from previous run? int mcrestart_flag=option_match(ad_comm::argc,ad_comm::argv,"-mcr"); if(mcrestart_flag > -1){ cerr << endl << "Error: -mcr option not implemented for HMC" << endl; ad_exit(1); } if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcec"))>-1) { cerr << endl << "Error: -mcec option not yet implemented with HMC" << endl; ad_exit(1); // use_empirical_flag=1; // read_empirical_covariance_matrix(nvar,S,ad_comm::adprogram_name); } // Prepare the mass matrix for use. Depends on many factors below. dmatrix S(1,nvar,1,nvar); dvector old_scale(1,nvar); int old_nvar; // Need to grab old_scale values still, since it is scaled below read_covariance_matrix(S,nvar,old_Hybrid_bounded_flag,old_scale); if (diag_option) // set covariance to be diagonal { S.initialize(); for (int i=1;i<=nvar;i++) { S(i,i)=dscale; } } // How much to thin, for now fixed at 1. if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcsave"))>-1) { cerr << "Option -mcsave does not currently work with HMC -- every iteration is saved" << endl; ad_exit(1); } //// ------------------------------ End of input processing //// Setup more inputs and outputs pofs_psave= new uostream((char*)(ad_comm::adprogram_name + adstring(".psv"))); if (!pofs_psave|| !(*pofs_psave)) { cerr << "Error trying to open file" << ad_comm::adprogram_name + adstring(".psv") << endl; ad_exit(1); } if (mcrestart_flag == -1 ) { (*pofs_psave) << nvar; } // need to rescale the hessian // get the current scale dvector x0(1,nvar); dvector current_scale(1,nvar); initial_params::xinit(x0); int mctmp=initial_params::mc_phase; initial_params::mc_phase=0; initial_params::stddev_scale(current_scale,x0); initial_params::mc_phase=mctmp; // cout << "old scale=" << old_scale << endl; // cout << "current scale=" << current_scale << endl; // cout << "S before=" << S << endl; // I think this is only needed if mcmc2 is used?? // for (int i=1;i<=nvar;i++) // { // for (int j=1;j<=nvar;j++) // { // S(i,j)*=old_scale(i)*old_scale(j); // } // } if(diag_option){ for (int i=1;i<=nvar;i++) { for (int j=1;j<=nvar;j++) { S(i,j)*=current_scale(i)*current_scale(j); } } } // cout << "S after=" << S << endl; gradient_structure::set_NO_DERIVATIVES(); if (mcmc2_flag==0) { initial_params::set_inactive_random_effects(); } // Setup random number generator, based on seed passed int iseed=2197; if (iseed0) iseed=iseed0; random_number_generator rng(iseed); gradient_structure::set_YES_DERIVATIVES(); initial_params::xinit(x0); // Dual averaging components dvector epsvec(1,nmcmc+1), epsbar(1,nmcmc+1), Hbar(1,nmcmc+1); epsvec.initialize(); epsbar.initialize(); Hbar.initialize(); double time_warmup=0; double time_total=0; std::clock_t start = clock(); time_t now = time(0); tm* localtm = localtime(&now); cout << endl << "Starting static HMC for model '" << ad_comm::adprogram_name << "' at " << asctime(localtm); // write sampler parameters ofstream adaptation("adaptation.csv", ios::trunc); adaptation << "accept_stat__,stepsize__,int_time__,energy__,lp__" << endl; // Declare and initialize the variables needed for the algorithm dmatrix chd = choleski_decomp(S); // cholesky decomp of mass matrix dvector y(1,nvar); // unbounded parameters y.initialize(); // transformed params independent_variables z(1,nvar); z=chd*y; dvector gr(1,nvar); // gradients in unbounded space // Need to run this to fill gr with current gradients and initial NLL. double nllbegin=get_hybrid_monte_carlo_value(nvar,z,gr); if(std::isnan(nllbegin)){ cerr << "Starting MCMC trajectory at NaN -- something is wrong!" << endl; ad_exit(1); } // initial rotated gradient dvector gr2(1,nvar); gr2=gr*chd; dvector p(1,nvar); // momentum vector p.fill_randn(rng); // Copy initial value to parsave in case first trajectory rejected initial_params::copy_all_values(parsave,1.0); double iaccept=0.0; // The gradient and params at beginning of trajectory, in case rejected. dvector gr2begin(1,nvar); gr2begin=gr2; dvector ybegin(1,nvar); ybegin=y; double nll=nllbegin; // if(useDA){ // eps=find_reasonable_stepsize(nvar,y,p,chd); // epsvec(1)=eps; epsbar(1)=eps; Hbar(1)=0; // } double mu=log(10*eps); // Start of MCMC chain for (int is=1;is<=nmcmc;is++) { // Random momentum for next iteration, only affects Ham values p.fill_randn(rng); double H0=nll+0.5*norm2(p); // Generate trajectory int divergence=0; for (int i=1;i<=L;i++) { // leapfrog updates gr, p, y, and gr2 by reference nll=leapfrog(nvar, gr, chd, eps, p, y, gr2); // Break trajectory early if a divergence occurs to save computation if(std::isnan(nll)){ divergence=1; break; } } // end of trajectory // Test whether to accept the proposed state double Ham=nll+0.5*norm2(p); // Update Hamiltonian for proposed set double alpha=min(1.0, exp(H0-Ham)); // acceptance ratio double rr=randu(rng); // Runif(1) if (rr<alpha && !divergence){ // accept iaccept++; // Update for next iteration: params, Hamiltonian and gr2 ybegin=y; gr2begin=gr2; nllbegin=nll; initial_params::copy_all_values(parsave,1.0); } else { // Reject and don't update anything to reuse initials for next trajectory y=ybegin; gr2=gr2begin; nll=nllbegin; } // Save parameters to psv file, duplicated if rejected (*pofs_psave) << parsave; // Adaptation of step size (eps). if(useDA && is <= nwarmup){ eps=adapt_eps(is, eps, alpha, adapt_delta, mu, epsvec, epsbar, Hbar); } adaptation << alpha << "," << eps << "," << eps*L << "," << H0 << "," << -nll << endl; if(is ==nwarmup) time_warmup = ( std::clock()-start)/(double) CLOCKS_PER_SEC; print_mcmc_progress(is, nmcmc, nwarmup, chain); } // end of MCMC chain // This final ratio should closely match adapt_delta if(useDA){ cout << "Final acceptance ratio=" << iaccept/nmcmc << " and target is " << adapt_delta<<endl; cout << "Final step size=" << eps << "; after " << nwarmup << " warmup iterations"<< endl; } else { cout << "Final acceptance ratio=" << iaccept/nmcmc << endl; } time_total = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; print_mcmc_timing(time_warmup, time_total); // I assume this closes the connection to the file?? if (pofs_psave) { delete pofs_psave; pofs_psave=NULL; } } // end of HMC function
int main(int argc, char *argv[]){ double time=0; double xOld=0, yOld=0, v_xOld =0, v_yOld=0; double xNew=0, yNew=0, v_xNew=0, v_yNew=0; double K = 0.2; //k/m = constatnt K in code double k_over_m = 0; int count = 0, i=0; FILE *outp; h1 hist; const double deltaT = 0.01; outp = fopen("resist.dat", "w"); h1init(&hist, 700, 160., 490., "Distribution of Range with Varying Values of k/m"); h1labels(&hist, "Range in m", "Number of Occurences"); double angle = (pi/180)*atof(argv[1]); double initVelocity = atof(argv[2]); fprintf(outp, "# t x y v_x v_y\n"); fprintf(outp, "#--------------------\n"); v_xOld = initVelocity*cos(angle); v_yOld = initVelocity*sin(angle); while(yNew>=0){ v_xNew = xVelocity(v_xOld, deltaT, K); v_yNew = yVelocity(v_yOld, deltaT, K); xNew = position(xOld, v_xOld, deltaT); yNew = position(yOld, v_yOld, deltaT); time = deltaT*count; fprintf(outp, "%lf %lf %lf %lf %lf\n", time, xNew, yNew, v_xNew, v_xOld); count++; v_xOld = v_xNew; v_yOld = v_yNew; xOld = xNew; yOld = yNew; } for(i=0; i<10000; i++){ k_over_m = randu(0.1, 0.4); v_xOld = initVelocity*cos(angle); v_yOld = initVelocity*sin(angle); while(yNew>=0){ v_xNew = xVelocity(v_xOld, deltaT, k_over_m); v_yNew = yVelocity(v_yOld, deltaT, k_over_m); xNew = position(xOld, v_xOld, deltaT); yNew = position(yOld, v_yOld, deltaT); v_xOld = v_xNew; v_yOld = v_yNew; xOld = xNew; yOld = yNew; } h1fill(&hist, xOld, 1.0); xNew =0; xOld=0; yNew = 1e-8; yOld = 1e-8; } h1plot(&hist, ""); h1plot(&hist, "resist.pdf"); fclose(outp); return 0; }
/** Generate stars for nsky star fields from star catalog. \return a cell array of nskyx1, each cell contains (2+nwvl) x nstar array of location, and magnitudes. */ dcell *genstars(long nsky, /**<number of star fields wanted*/ double lat, /**<galactic latitude.*/ double lon, /**<galactic longitude*/ double catscl, /**<Scale the catlog star count.*/ double fov, /**<diameter of the patrol field of view in arcsec.*/ int nwvl, /**<number of wavelength*/ double *wvls, /**<wavelength vector*/ rand_t *rstat /**<random stream*/ ){ char fn[80]; double cat_fov=0;/*catalogue fov */ int Jind=-1; if(nwvl==2 && fabs(wvls[0]-1.25e-6)<1.e-10 && fabs(wvls[1]-1.65e-6)<1.e-10){ snprintf(fn,80,"besancon/JH_5sqdeg_lat%g_lon%g_besancon.bin", lat, lon); cat_fov=5.0;/*5 arc-degree squared. */ Jind=0; }else if(nwvl==3 && fabs(wvls[0]-1.25e-6)<1.e-10 && fabs(wvls[1]-1.65e-6)<1.e-10 && fabs(wvls[2]-2.2e-6)<1.e-10){ snprintf(fn,80,"besancon/JHK_5sqdeg_lat%g_lon%g_besancon.bin", lat, lon); cat_fov=5.0;/*5 arc-degree squared. */ Jind=0; }else{ Jind=-1; error("We only have stars for J+H and J+H+K band. Please fill this part\n"); } info("Loading star catalogue from %s\n",fn); dmat *catalog=dread("%s",fn); if(catalog->ny!=nwvl){ error("Catalogue and wanted doesn't match\n"); } long ntot=catalog->nx; long nsky0=0; dcell *res=dcellnew(nsky,1); dmat* pcatalog=catalog; double fov22=pow(fov/2/206265,2); double navg0=M_PI*pow(fov/2./3600.,2)/cat_fov * ntot; if(catscl>0){//regular sky coverage sim double navg=navg0*catscl; info("Average number of stars: %g, after scaled by %g\n", navg, catscl); /*generate nstart && magnitude according to distribution.*/ for(long isky=0; isky<nsky; isky++){ long nstar=randp(rstat, navg); if(nstar==0) continue; res->p[isky]=dnew(nwvl+2, nstar); dmat* pres=res->p[isky]; for(long istar=0; istar<nstar; istar++){ long ind=round(ntot*randu(rstat));/*randomly draw a star index in the catlog */ for(int iwvl=0; iwvl<nwvl; iwvl++){ P(pres,2+iwvl,istar)=P(pcatalog,ind,iwvl); } } } }else{ /*instead of doing draws on nb of stars, we scan all possibilities and assemble the curve in postprocessing. catscl is negative, with absolute value indicating the max number of J<=19 stars to consider*/ long nmax=round(-catscl); nsky0=nsky/nmax; if(nsky0*nmax!=nsky){ error("nsky=%ld, has to be dividable by max # of stars=%ld", nsky, nmax); } int counti[nmax];//record count in each bin memset(counti, 0, sizeof(int)*nmax); int count=0; while(count<nsky){ long nstar=randp(rstat, navg0); if(nstar==0) continue; dmat *tmp=dnew(nwvl+2, nstar); dmat* pres=tmp; int J19c=0; for(long istar=0; istar<nstar; istar++){ long ind=round((ntot-1)*randu(rstat)); for(int iwvl=0; iwvl<nwvl; iwvl++){ P(pres,2+iwvl,istar)=P(pcatalog,ind,iwvl); } if(P(pres,2+Jind,istar)<=19){ J19c++; } } //J19c=0 is ok. Do not skip. if(J19c<nmax && counti[J19c]<nsky0){ int isky=counti[J19c]+(J19c)*nsky0; res->p[isky]=dref(tmp); count++; counti[J19c]++; } dfree(tmp); } } /*Fill in the coordinate*/ for(long isky=0; isky<nsky; isky++){ if(!res->p[isky]) continue; long nstar=res->p[isky]->ny; dmat* pres=res->p[isky]; for(long istar=0; istar<nstar; istar++){ /*randomly draw the star location. */ double r=sqrt(fov22*randu(rstat)); double th=2*M_PI*randu(rstat); P(pres,0,istar)=r*cos(th); P(pres,1,istar)=r*sin(th); } } dfree(catalog); return res; }
int main(int argc, char *argv[]) { struct RunArgs rargs; proc_inputs(argc, argv,&rargs); size_t iii,jjj; size_t nround = 12; size_t napprox = 13; double roundtol[12] = {5e-3,1e-3,5e-4,1e-4,5e-5, 1e-5,5e-6,1e-6,5e-7,1e-7, 5e-8,1e-8}; //double roundtol[1] = {1e-7}; //double roundtol[2] = {1e-9,1e-10}; //double roundtol[2] = {1e-4,1e-6}; //double approxtol[1] = {1e0}; //double roundtol[3] = {1e-2,1e-5,1e-8}; //double approxtol[3] = {1e-1,1e-3,1e-5}; double approxtol[13] = {1e-1,5e-2,1e-2,5e-3,1e-3,5e-4, 1e-4,5e-5,1e-5,5e-6,1e-6,5e-7, 1e-7}; //double approxtol[1] = {1e-6}; //double approxtol[3] = {1e-1,1e-2,1e-3}; double lb=0.05; double ub=0.95; for (iii = 0; iii < nround; iii++){ for (jjj = 0; jjj < napprox; jjj++){ printf("done prcessing\n"); size_t dim = rargs.dim; struct FunctionMonitor * fm = NULL; fm = function_monitor_initnd(solveBlackBoxUni,&rargs,dim,1000*dim); struct Fwrap * fw = fwrap_create(dim,"general"); fwrap_set_f(fw,function_monitor_eval,fm); struct OpeOpts * opts = ope_opts_alloc(LEGENDRE); ope_opts_set_start(opts,3); ope_opts_set_coeffs_check(opts,1); ope_opts_set_tol(opts,approxtol[jjj]); ope_opts_set_maxnum(opts,25); ope_opts_set_lb(opts,lb); ope_opts_set_ub(opts,ub); struct OneApproxOpts * qmopts = one_approx_opts_alloc(POLYNOMIAL,opts); struct C3Approx * c3a = c3approx_create(CROSS,dim); int verbose = 0; size_t init_rank = 5; double ** start = malloc_dd(dim); for (size_t ii = 0; ii < dim; ii++){ c3approx_set_approx_opts_dim(c3a,ii,qmopts); start[ii] = linspace(lb,ub,init_rank); } c3approx_init_cross(c3a,init_rank,verbose,start); c3approx_set_verbose(c3a,2); c3approx_set_adapt_kickrank(c3a,5); c3approx_set_adapt_maxrank_all(c3a,init_rank + 3*5); c3approx_set_cross_tol(c3a,roundtol[iii]); c3approx_set_round_tol(c3a,roundtol[iii]); // cross approximation with rounding struct FunctionTrain * ft = c3approx_do_cross(c3a,fw,1); char ftsave[255] = "ftrain.ft"; int success = function_train_save(ft,ftsave); assert (success == 1); //ft = function_train_load(ftsave); //double * pt = darray_val(rargs.dim,0.5); // this is the mean //double valft = function_train_eval(ft,pt); //double valtrue = solveBlackBoxUni(pt,&rargs); //free(pt); pt = NULL; //printf("valtrue=%G, valft=%G\n",valtrue,valft); if (rargs.dim == 2){ size_t N = 20; double * x = linspace(0.05,0.95,N); size_t kk,ll; double pt[2]; double val[400*4]; size_t index = 0; for (kk = 0; kk < N; kk++){ pt[0] = x[kk]; for (ll = 0; ll < N; ll++){ pt[1] = x[ll]; val[index] = pt[0]; val[index+400] = pt[1]; val[800+kk*N+ll] = solveBlackBoxUni(pt,&rargs);; val[1200+kk*N+ll] = function_train_eval(ft,pt);; index++; } } darray_save(N*N,4,val,"2dcontour.dat",1); free(x); x = NULL; } else{ size_t N = 10000; size_t kk,ll; double errnum = 0.0; double errden = 0.0; double * x = calloc_double(rargs.dim); for (kk = 0; kk < N; kk++){ for (ll = 0; ll < rargs.dim; ll++){ x[ll] = randu() * (0.95-0.05) + 0.05; } double diff = solveBlackBoxUni(x,&rargs) - function_train_eval(ft,x); //printf("\n"); //dprint(rargs.dim,x); //printf("diff = %G\n",pow(diff,2) / pow(solveBlackBoxUni(x,&rargs),2)); errden += pow(solveBlackBoxUni(x,&rargs),2); errnum += pow(diff,2); } double err = errnum/errden; printf("err = %G\n",err); size_t nvals = nstored_hashtable_cp(fm->evals); printf("number of evaluations = %zu \n", nvals); printf("ranks are "); iprint_sz(dim+1,ft->ranks); double * data = calloc_double((rargs.dim+1)*2); for (kk = 0; kk < rargs.dim+1; kk++){ data[kk] = (double) ft->ranks[kk]; } data[rargs.dim+1] = sqrt(err); data[rargs.dim+2] = (double) nvals; char fff[256]; sprintf(fff,"rtol=%G_apptol=%G",roundtol[iii],approxtol[jjj]); darray_save(rargs.dim+1,2,data,fff,1); } function_monitor_free(fm); fm = NULL; function_train_free(ft); ft = NULL; c3approx_destroy(c3a); fwrap_destroy(fw); one_approx_opts_free_deep(&qmopts); } } free(rargs.sqrt_cov); rargs.sqrt_cov = NULL; return 0; }
void proc_inputs(int argc, char * argv[], struct RunArgs * rargs) { size_t nfield = 0; int fin_exists = 0; int fout_exists = 0; char filename[255]; char filein[255]; char covfile[255] = "cov.dat"; double * sqrt_cov = NULL; size_t dim = 0; int ii; printf("Processing program inputs: \n"); for (ii = 1; ii < argc; ii++){ char * name = bite_string(argv[ii],'='); if (strcmp(name,"fileout") == 0){ fout_exists = 1; printf("........filename is %s\n",argv[ii]); strcpy(filename,argv[ii]); } else if (strcmp(name,"filein") == 0){ fin_exists = 1; printf("........Random samples loaded from %s\n",argv[ii]); strcpy(filein,argv[ii]); } else if (strcmp(name,"size") == 0){ printf("........Size of field is %s\n",argv[ii]); nfield = (size_t) atol(argv[ii]); } else if (strcmp(name,"sqrtcov") == 0){ printf("........Reading sqrt of covariance from %s\n", argv[ii]); strcpy(covfile,argv[ii]); sqrt_cov = darray_load(covfile,0); } else if (strcmp(name,"dim") == 0){ printf("........Number of dimensions to use %s\n", argv[ii]); dim = (size_t) atol(argv[ii]); } free(name); name = NULL; } if (nfield == 0){ printf("Correct func call = ./elliptic1d dim=<number of dims> size=<number of pts> filein=<filein> fileout=<fileout> sqrtcov=<filename>\n"); exit(1); } size_t output = (size_t) floor( 0.7 * nfield) ; double * xsol = linspace(0.0,1.0,nfield); if (sqrt_cov == NULL){ printf("........Building sqrt\n"); double * eigs = calloc_double(nfield); sqrt_cov = buildCovSqrt(nfield, xsol,eigs, sigsize, corrlength); printf("........Saving sqrt\n"); darray_save(nfield, nfield, sqrt_cov, covfile, 0); double sumtot = 0.0; size_t zz; for (zz = 0; zz < (size_t) nfield; zz++){ sumtot += pow(eigs[nfield-1-zz],2); } double sum = 0.0; for (zz = 0.0; zz < (size_t) nfield; zz++){ sum += pow(eigs[nfield-1-zz],2); if (sum > 0.99 * sumtot){ break; } } size_t dimmodel = zz; double * saveeigs = calloc_double(3*nfield); for (zz = 0; zz < (size_t)nfield; zz++){ saveeigs[zz] = (double) zz; saveeigs[zz+nfield] = eigs[nfield-1-zz]; saveeigs[zz+2*nfield] = eigs[nfield-1-dimmodel]; } printf("dimension = %zu \n",dimmodel); darray_save(nfield,3,saveeigs,"eigenvalues.txt",1); free(eigs); free(saveeigs); } double * perm = NULL; double * sol = NULL; if (fin_exists == 0){ perm = darray_val(nfield,1.0); double * permout = darray_val(nfield,1.0); sol = fullRun(nfield, nfield, perm, permout,NULL); free(permout); } else{ perm = calloc_double(dim); read_data(filein,dim,1,perm); dprint(dim, perm); double * permout = darray_val(nfield,1.0); sol = fullRun(nfield, dim, perm, permout,sqrt_cov); free(permout); } if (fout_exists == 1){ // simulate size_t kk,ll; size_t nsym = 20; double * pt = calloc_double(nfield); double * fields = calloc_double(nfield*(nsym+1)); double * x = linspace(0,1,nfield); for (kk = 0; kk < nfield; kk++){ fields[kk] = x[kk]; } free(x); double * sols = calloc_double(nsym); for (kk = 0; kk < nsym; kk++){ for (ll = 0; ll < nfield; ll++){ pt[ll] = icdf_normal(0.0,1.0,randu()); } //dprint(nfield, pt); sol = fullRun(nfield, nfield, pt,fields+(kk+1)*nfield, sqrt_cov); sols[kk] = sol[output]; } darray_save(nfield,15,fields,filename,1); darray_save(nsym,1,sols,"solshisttrue.txt",1); free(sols); free(pt); } rargs->nfield = nfield; rargs->dim = dim; rargs->sqrt_cov = calloc_double(nfield * nfield); rargs->output = output; printf("Evaluating at point %G\n",xsol[output]); memmove(rargs->sqrt_cov, sqrt_cov, nfield*nfield * sizeof(double)); /* size_t jj; for (jj = 0; jj < nfield; jj++){ perm[jj] = log(perm[jj]-offset); } double * p = dconcat_cols(nfield,1,1,xsol,sol); double * p2 = dconcat_cols(nfield,2,1,p,perm); int success = darray_save(nfield,3,p2,filename,1); assert (success == 1); free(p); p = NULL; free(p2); p2 = NULL; */ free(sqrt_cov); sqrt_cov = NULL; free(xsol); xsol = NULL; free(perm); perm = NULL; free(sol); sol = NULL; }
void GRASTA_training(const mat &D, mat &Uhat, struct STATUS &status, const struct GRASTA_OPT &options, mat &W, mat &Outlier ) { int rows, cols; rows = D.n_rows; cols = D.n_cols; if ( !status.init ){ status.init = 1; status.curr_iter = 0; status.last_mu = options.MIN_MU; status.level = 0; status.step_scale = 0.0; status.last_w = zeros(options.RANK, 1); status.last_gamma = zeros(options.DIM, 1); if (!Uhat.is_finite()){ Uhat = orth(randn(options.DIM, options.RANK)); } } Outlier = zeros<mat>(rows, cols); W = zeros<mat>(options.RANK, cols); mat U_Omega, y_Omega, y_t, s, w, dual, gt; uvec idx, col_order; ADMM_OPT admm_opt; double SCALE, t, rel; bool bRet; admm_opt.lambda = options.lambda; //if (!options.QUIET) int maxIter = options.maxCycles * cols; // 20 passes through the data set status.hist_rel.reserve( maxIter); // Order of examples to process arma_rng::set_seed_random(); col_order = conv_to<uvec>::from(floor(cols*randu(maxIter, 1))); for (int k=0; k<maxIter; k++){ int iCol = col_order(k); //PRINTF("%d / %d\n",iCol, cols); y_t = D.col(iCol); idx = find_finite(y_t); y_Omega = y_t.elem(idx); SCALE = norm(y_Omega); y_Omega = y_Omega/SCALE; // the following for-loop is for U_Omega = U(idx,:) in matlab U_Omega = zeros<mat>(idx.n_elem, Uhat.n_cols); for (int i=0; i<idx.n_elem; i++) U_Omega.row(i) = Uhat.row(idx(i)); // solve L-1 regression admm_opt.MAX_ITER = options.MAX_ITER; if (options.NORM_TYPE == L1_NORM) bRet = ADMM_L1(U_Omega, y_Omega, admm_opt, s, w, dual); else if (options.NORM_TYPE == L21_NORM){ w = solve(U_Omega, y_Omega); s = y_Omega - U_Omega*w; dual = -s/norm(s, 2); } else { PRINTF("Error: norm type does not support!\n"); return; } vec tmp_col = zeros<vec>(rows); tmp_col.elem(idx) = SCALE * s; Outlier.col(iCol) = tmp_col; W.col(iCol) = SCALE * w; // take gradient step over Grassmannian t = GRASTA_update(Uhat, status, w, dual, idx, options); if (!options.QUIET){ rel = subspace(options.GT_mat, Uhat); status.hist_rel.push_back(rel); if (rel < options.TOL){ PRINTF("%d/%d: subspace angle %.2e\n",k,maxIter, rel); break; } } if (k % cols ==0){ if (!options.QUIET) PRINTF("Pass %d/%d: step-size %.2e, level %d, last mu %.2f\n", k % cols, options.maxCycles, t, status.level, status.last_mu); } if (status.level >= options.convergeLevel){ // Must cycling around the dataset twice to get the correct regression weight W if (!options.QUIET) PRINTF("Converge at level %d, last mu %.2f\n",status.level,status.last_mu); break; } } }
// [[Rcpp::export]] Rcpp::List if2_s(Rcpp::NumericVector data, int T, int N, int NP, double coolrate, Rcpp::NumericVector params) { // params will be in the order R0, r, sigma, eta, berr, Sinit, Iinit, Rinit Rcpp::NumericMatrix statemeans(T, 3); Rcpp::NumericMatrix statedata(NP, 4); srand(time(NULL)); // Seed PRNG with system time double w[NP]; // particle weights Particle particles[NP]; // particle estimates for current step Particle particles_old[NP]; // intermediate particle states for resampling printf("Initializing particle states\n"); // Initialize particle parameter states (seeding) // Note that they will all be the same for this code as we are only filtering over the states for (int n = 0; n < NP; n++) { particles[n].R0 = params[0]; particles[n].r = params[1]; particles[n].sigma = params[2]; particles[n].eta = params[3]; particles[n].berr = params[4]; particles[n].Sinit = params[5]; particles[n].Iinit = params[6]; particles[n].Rinit = params[7]; } // START PASS THROUGH DATA printf("Starting filter\n"); printf("---------------\n"); // seed initial states for (int n = 0; n < NP; n++) { double Iinitcan; double i_infec = particles[n].Iinit; do { Iinitcan = i_infec + i_infec*randn(); } while (Iinitcan < 0 || N < Iinitcan); particles[n].S = N - Iinitcan; particles[n].I = Iinitcan; particles[n].R = 0.0; particles[n].B = (double) particles[n].R0 * particles[n].r / N; } State sMeans; getStateMeans(&sMeans, particles, NP); statemeans(0,0) = sMeans.S; statemeans(0,1) = sMeans.I; statemeans(0,2) = sMeans.R; // start run through data for (int t = 1; t < T; t++) { // generate individual predictions and weight for (int n = 0; n < NP; n++) { exp_euler_SIR(1.0/7.0, 0.0, 1.0, N, &particles[n]); double merr_par = particles[n].sigma; double y_diff = data[t] - particles[n].I; w[n] = 1.0/(merr_par*sqrt(2.0*M_PI)) * exp( - y_diff*y_diff / (2.0*merr_par*merr_par) ); } // cumulative sum for (int n = 1; n < NP; n++) { w[n] += w[n-1]; } // save particle states to resample from for (int n = 0; n < NP; n++){ copyParticle(&particles_old[n], &particles[n]); } // resampling for (int n = 0; n < NP; n++) { double w_r = randu() * w[NP-1]; int i = 0; while (w_r > w[i]) { i++; } // i is now the index to copy state from copyParticle(&particles[n], &particles_old[i]); } State sMeans; getStateMeans(&sMeans, particles, NP); statemeans(t,0) = sMeans.S; statemeans(t,1) = sMeans.I; statemeans(t,2) = sMeans.R; } // Get particle results to pass back to R for (int n = 0; n < NP; n++) { statedata(n, 0) = particles[n].S; statedata(n, 1) = particles[n].I; statedata(n, 2) = particles[n].R; statedata(n, 3) = particles[n].B; } return Rcpp::List::create( Rcpp::Named("statemeans") = statemeans, Rcpp::Named("statedata") = statedata ); }
/** * The implementation of the particle filter using OpenMP for many frames * @see http://openmp.org/wp/ * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @param I The video to be run * @param IszX The x dimension of the video * @param IszY The y dimension of the video * @param Nfr The number of frames * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used */ void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){ int max_size = IszX*IszY*Nfr; long long start = get_time(); //original particle centroid double xe = roundDouble(IszY/2.0); double ye = roundDouble(IszX/2.0); //expected object locations, compared to center int radius = 5; int diameter = radius*2 - 1; int * disk = (int *)malloc(diameter*diameter*sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)malloc(countOnes*2*sizeof(double)); getneighbors(disk, countOnes, objxy, radius); long long get_neighbors = get_time(); printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors)); //initial weights are all equal (1/Nparticles) double * weights = (double *)malloc(sizeof(double)*Nparticles); #pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } long long get_weights = get_time(); printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights)); //initial likelihood to 0.0 double * likelihood = (double *)malloc(sizeof(double)*Nparticles); double * arrayX = (double *)malloc(sizeof(double)*Nparticles); double * arrayY = (double *)malloc(sizeof(double)*Nparticles); double * xj = (double *)malloc(sizeof(double)*Nparticles); double * yj = (double *)malloc(sizeof(double)*Nparticles); double * CDF = (double *)malloc(sizeof(double)*Nparticles); double * u = (double *)malloc(sizeof(double)*Nparticles); int * ind = (int*)malloc(sizeof(int)*countOnes*Nparticles); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } int k; printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, get_time())); int indX, indY; for(k = 1; k < Nfr; k++){ long long set_arrays = get_time(); //apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += 1 + 5*randn(seed, x); arrayY[x] += -2 + 2*randn(seed, x); } long long error = get_time(); printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error)); //particle filter likelihood #pragma omp parallel for shared(likelihood, I, arrayX, arrayY, objxy, ind) private(x, y, indX, indY) for(x = 0; x < Nparticles; x++){ //compute the likelihood: remember our assumption is that you know // foreground and the background image intensity distribution. // Notice that we consider here a likelihood ratio, instead of // p(z|x). It is possible in this case. why? a hometask for you. //calc ind for(y = 0; y < countOnes; y++){ indX = roundDouble(arrayX[x]) + objxy[y*2 + 1]; indY = roundDouble(arrayY[x]) + objxy[y*2]; ind[x*countOnes + y] = fabs(indX*IszY*Nfr + indY*Nfr + k); if(ind[x*countOnes + y] >= max_size) ind[x*countOnes + y] = 0; } likelihood[x] = 0; for(y = 0; y < countOnes; y++) likelihood[x] += (pow((I[ind[x*countOnes + y]] - 100),2) - pow((I[ind[x*countOnes + y]]-228),2))/50.0; likelihood[x] = likelihood[x]/((double) countOnes); } long long likelihood_time = get_time(); printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time)); // update & normalize weights // using equation (63) of Arulampalam Tutorial #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } long long exponential = get_time(); printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential)); double sumWeights = 0; #pragma omp parallel for private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } long long sum_time = get_time(); printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time)); #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } long long normalize = get_time(); printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize)); xe = 0; ye = 0; // estimate the object location by expected values #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } long long move_time = get_time(); printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time)); printf("XE: %lf\n", xe); printf("YE: %lf\n", ye); double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) ); printf("%lf\n", distance); //display(hold off for now) //pause(hold off for now) //resampling CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } long long cum_sum = get_time(); printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum)); double u1 = (1/((double)(Nparticles)))*randu(seed, 0); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } long long u_time = get_time(); printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time)); int j, i; #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } long long xyj_time = get_time(); printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time)); //reassign arrayX and arrayY arrayX = xj; arrayY = yj; //#pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } long long reset = get_time(); printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset)); } free(disk); free(objxy); free(weights); free(likelihood); free(arrayX); free(arrayY); free(CDF); free(u); free(ind); }
void SMaxReg::train(const Mat &data, const Mat &label, int batchSize) { assert(data.type() == CV_32FC1 && label.type() == CV_32FC1); assert(batchSize >= 1); srand((uchar)time(NULL)); Mat labelMatrix; realLabel2Matrix(label, labelMatrix); // initialize weight and bias int numData = data.rows; int dimData = data.cols; if (weight.empty() == true) { weight = Mat::zeros(dimData, numClasses, data.type()); randu(weight, 0, 1); weight *= 0.001; } else { if (weight.rows != dimData || weight.cols != numClasses) { printf("Initial weight dimension wrong: weight.rows == dimData && weight.cols == numClasses!\n"); abort(); } } Mat velocity = Mat::zeros(weight.size(), weight.type()); vector<int> index(numData, 0); for (int i = 0; i < numData; ++i) { index[i] = i; } Mat batchData, batchLabel; // batch data and label Mat rsp, maxRsp; // feed-forward response Mat prob, sumProb, logProb; // softmax prediction probability Mat gradient, ww; // update bool isConverge = false; double prevCost = 0; double currCost = 0; // cost double mom = 0.5; // moment int t = 0; // iteration int numBatches = floor(numData / batchSize); for (int ei = 0; ei < epochs; ++ei) { randperm(index); batchData = Mat::zeros(batchSize, dimData, data.type()); batchLabel = Mat::zeros(batchSize, numClasses, labelMatrix.type()); for (int batchIdx = 0; batchIdx < numBatches; ++batchIdx) { // batch SGD t++; if (t == 20) mom = moment; getBatchData(data, labelMatrix, index, batchSize, batchIdx, batchData, batchLabel); // if (batchIdx == (numBatches - 1)) { // int batchRange = data.rows - batchIdx*batchSize; // batchData = batchData.rowRange(0, batchRange); // batchLabel = batchLabel.rowRange(0, batchRange); // } rsp = batchData * weight; reduce(rsp, maxRsp, 1, REDUCE_MAX, rsp.type()); rsp -= repeat(maxRsp, 1, numClasses); exp(rsp, prob); reduce(prob, sumProb, 1, REDUCE_SUM, prob.type()); prob = prob / repeat(sumProb, 1, numClasses); // compute gradient gradient = batchLabel - prob; gradient = batchData.t() * gradient; gradient = -gradient / batchData.rows; gradient += regularizer * weight; // update weight and bias velocity = mom * velocity + learningRate * gradient; weight -= velocity; // compute objective cost log(prob, logProb); logProb = batchLabel.mul(logProb); currCost = -(sum(logProb)[0] / batchData.rows); pow(weight, 2, ww); currCost += sum(ww)[0] * 0.5 * regularizer; printf("epoch %d: processing batch %d / %d cost %f\n", ei + 1, batchIdx + 1, numBatches, currCost); if (abs(currCost - prevCost) < epsilon && ei != 0) { printf("objective cost variation less than pre-defined %f\n", epsilon); isConverge = true; break; } prevCost = currCost; } batchData.release(); batchLabel.release(); if (isConverge == true) break; } if (isConverge == false) { printf("stopped by reaching maximum number of iterations\n"); } // free and destroy space index.clear(); labelMatrix.release(); batchData.release(); batchLabel.release(); rsp.release(); maxRsp.release(); prob.release(); sumProb.release(); logProb.release(); gradient.release(); ww.release(); velocity.release(); }