static void test_stfun(){ rand_t rstat; int seed=4; double r0=0.2; double dx=1./16; long N=32; long nx=N; long ny=N; long nframe=500; seed_rand(&rstat, seed); if(L0<9000){ dmat *rr=dlinspace(0, N*dx, N); dmat *covvk=turbcov(rr, sqrt(2)*N*dx, r0, L0); writebin(covvk, "cov_vk"); dfree(rr); dfree(covvk); } /* return; */ { map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL); stfun_t *data=stfun_init(nx, ny, NULL); zfarr *save=zfarr_init(nframe, 1, "fractal_atm.bin"); for(long i=0; i<nframe; i++){ for(long j=0; j<(nx+1)*(ny+1); j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0,L0,ninit); stfun_push(data, (dmat*)atm); zfarr_dmat(save, i, (dmat*)atm); if(i%100==0) info("%ld of %ld\n", i, nframe); } zfarr_close(save); dmat *st=stfun_finalize(data); writebin(st, "stfun_fractal.bin"); ddraw("fractal", st, NULL,NULL, "Atmosphere","x","y","stfun"); } /*exit(0); */ { stfun_t *data=stfun_init(nx, ny, NULL); dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5); cmat *atm=cnew(nx, ny); //cfft2plan(atm, -1); dmat *atmr=dnew(atm->nx, atm->ny); dmat *atmi=dnew(atm->nx, atm->ny); spect->p[0]=0; for(long ii=0; ii<nframe; ii+=2){ for(long i=0; i<atm->nx*atm->ny; i++){ atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i]; } cfft2(atm, -1); for(long i=0; i<atm->nx*atm->ny; i++){ atmr->p[i]=creal(atm->p[i]); atmi->p[i]=cimag(atm->p[i]); } stfun_push(data, atmr); stfun_push(data, atmi); if(ii%100==0) info("%ld of %ld\n", ii, nframe); } dmat *st=stfun_finalize(data); writebin(st, "stfun_fft.bin"); ddraw("fft", st, NULL,NULL, "Atmosphere","x","y","stfun"); } }
int main(int argc, char** argv) { struct undistorter_ctx ctx; int i; int j; float complex dds_state; float complex dds_multiplier; float complex harmonics_in[ NUMBER_OF_HARMONICS]; float complex harmonics_out[ NUMBER_OF_HARMONICS]; float harmonic_power_in; float fundamental_power_in; float harmonic_power_out; float fundamental_power_out; float thd_in; float thd_out; undistorter_init(&ctx, 0.05, 2, 4, 1024, -1.5f, 1.5f, 1.0f); dds_state = 1.0f; dds_multiplier = cexpf(2.0f*I * M_PI * TEST_FREQUENCY/SAMPLING_FREQUENCY); for(i = 0; i < NUMBER_OF_HARMONICS; i++) { harmonics_in[i] = 0.0f; harmonics_out[i] = 0.0f; } /* * We produce an input sinusoid x[n] = exp(j*2*pi*f/f_s*n). The complex form * is convenient, because we can then calculate the power in each harmonic * by downconversion and time-averaging: * * P_y[i] = < (y[n] (x[n]^n)*)^2 > * * where < > denotes a time average and z* the complex conjugate. */ for(i = 0; i < NUMBER_OF_SAMPLES; i++) { float noise = randn()*1e-2f; /* 1.13 was chosen to produce 10% THD. */ float distorted_sinusoid = tanhf(1.13f*(crealf(dds_state)+ noise)); float undistorted_sinusoid = undistorter_compensate_sample(&ctx, distorted_sinusoid); float complex carrier = conjf(dds_state); if(i > NUMBER_OF_SAMPLES/4) { for(j = 0; j < NUMBER_OF_HARMONICS; j++) { harmonics_in[j] += carrier*distorted_sinusoid; harmonics_out[j] += carrier*undistorted_sinusoid; carrier *= conjf(dds_state); } } dds_state *= dds_multiplier; } fundamental_power_in = cabsf(harmonics_in[0] )*cabsf(harmonics_in[0] ); fundamental_power_out = cabsf(harmonics_out[0] )*cabsf(harmonics_out[0] ); harmonic_power_in = 0.0f; harmonic_power_out = 0.0f; for(i = 1; i < NUMBER_OF_HARMONICS; i++) { harmonic_power_in += cabsf(harmonics_in[i] )*cabsf(harmonics_in[i] ); harmonic_power_out += cabsf(harmonics_out[i] )*cabsf(harmonics_out[i] ); } thd_in = sqrtf((harmonic_power_in) / fundamental_power_in)*100.0f; thd_out = sqrtf((harmonic_power_out) / fundamental_power_out)*100.0f; if( thd_in/thd_out >= sqrtf(10.0f)) { printf("PASS: THDin = %f, THDout = %f\n", thd_in, thd_out); } else { printf("FAIL: THDin = %f, THDout = %f\n", thd_in, thd_out); } return 0; }
/** * The implementation of the particle filter using OpenMP for a single image * @see http://openmp.org/wp/ * @note This function is designed to work with a single image. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @warning Use the other particle filter function for videos; the accuracy of this function decreases significantly as it is called repeatedly while processing video * @param I The image to be run * @param IszX The x dimension of the image * @param IszY The y dimension of the image * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used * @param x_loc The array that will store the x locations of the desired object * @param y_loc The array that will store the y locations of the desired object * @param prevX The starting x position of the object * @param prevY The starting y position of the object */ void particleFilter1F(int * I, int IszX, int IszY, int * seed, int Nparticles, double * x_loc, double * y_loc, double prevX, double prevY){ int max_size = IszX*IszY; /*original particle centroid*/ double xe = prevX; double ye = prevY; /*expected object locations, compared to center*/ int radius = 5; int diameter = radius*2 -1; int * disk = (int *)mxCalloc(diameter*diameter, sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)mxCalloc(countOnes*2, sizeof(double)); getneighbors(disk, countOnes, objxy, radius); /*initial weights are all equal (1/Nparticles)*/ double * weights = (double *)mxCalloc(Nparticles, sizeof(double)); #pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } /*initial likelihood to 0.0*/ double * likelihood = (double *)mxCalloc(Nparticles, sizeof(double)); double * arrayX = (double *)mxCalloc(Nparticles, sizeof(double)); double * arrayY = (double *)mxCalloc(Nparticles, sizeof(double)); double * xj = (double *)mxCalloc(Nparticles, sizeof(double)); double * yj = (double *)mxCalloc(Nparticles, sizeof(double)); double * CDF = (double *)mxCalloc(Nparticles, sizeof(double)); double * u = (double *)mxCalloc(Nparticles, sizeof(double)); mxArray * arguments[4]; mxArray * mxIK = mxCreateDoubleMatrix(IszX, IszY, mxREAL); mxArray * mxObj = mxCreateDoubleMatrix(countOnes, 2, mxREAL); mxArray * mxX = mxCreateDoubleMatrix(1, Nparticles, mxREAL); mxArray * mxY = mxCreateDoubleMatrix(1, Nparticles, mxREAL); double * Ik = (double *)mxCalloc(IszX*IszY, sizeof(double)); mxArray * result = mxCreateDoubleMatrix(1, Nparticles, mxREAL); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } int k; int indX, indY; /*apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction*/ #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += 1 + 5*randn(seed, x); arrayY[x] += -2 + 2*randn(seed, x); } /*particle filter likelihood*/ //get the current image for(x = 0; x < IszX; x++) { for(y = 0; y < IszY; y++) { Ik[x*IszX + y] = (double)I[x*IszY + y]; } } //copy arguments memcpy(mxGetPr(mxIK), Ik, sizeof(double)*IszX*IszY); memcpy(mxGetPr(mxObj), objxy, sizeof(double)*countOnes); memcpy(mxGetPr(mxX), arrayX, sizeof(double)*Nparticles); memcpy(mxGetPr(mxY), arrayY, sizeof(double)*Nparticles); arguments[0] = mxIK; arguments[1] = mxObj; arguments[2] = mxX; arguments[3] = mxY; mexCallMATLAB(1, &result, 4, arguments, "GetSimpleLikelihood"); memcpy(likelihood, result, sizeof(double)*Nparticles); /* update & normalize weights // using equation (63) of Arulampalam Tutorial*/ #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } double sumWeights = 0; #pragma omp parallel for private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } xe = 0; ye = 0; /* estimate the object location by expected values*/ #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } x_loc[0] = xe+.5; y_loc[0] = ye+.5; /*display(hold off for now) //pause(hold off for now) //resampling*/ CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } double u1 = (1/((double)(Nparticles)))*randu(seed, 0); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } int j, i; #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); /*i = findIndexBin(CDF, 0, Nparticles, u[j]);*/ if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } /*reassign arrayX and arrayY*/ #pragma omp parallel for shared(weights, arrayX, arrayY, xj, yj, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); arrayX[x] = xj[x]; arrayY[x] = yj[x]; } mxFree(disk); mxFree(weights); mxFree(objxy); mxFree(likelihood); mxFree(arrayX); mxFree(arrayY); mxFree(CDF); mxFree(u); mxFree(xj); mxFree(yj); mxFree(Ik); }
/** * Sets values of 3D matrix using randomly generated numbers from a normal distribution * @param array3D The video to be modified * @param dimX The x dimension of the frame * @param dimY The y dimension of the frame * @param dimZ The number of frames * @param seed The seed array */ void addNoise(int * array3D, int * dimX, int * dimY, int * dimZ, int * seed){ int x, y, z; for(x = 0; x < *dimX; x++){ for(y = 0; y < *dimY; y++){ for(z = 0; z < *dimZ; z++){ array3D[x * *dimY * *dimZ + y * *dimZ + z] = array3D[x * *dimY * *dimZ + y * *dimZ + z] + (int)(5*randn(seed, 0)); } } } }
/** * The implementation of the particle filter using OpenMP for many frames * @see http://openmp.org/wp/ * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @param I The video to be run * @param IszX The x dimension of the video * @param IszY The y dimension of the video * @param Nfr The number of frames * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used */ int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){ int i, c; #ifdef HW XFcuda xcore; int Status; Status = XFcuda_Initialize(&xcore, 0); if (Status != XST_SUCCESS) { printf("Initialization failed\n"); return 1; // XST_FAILURE; } #endif int max_size = IszX*IszY*Nfr; //long long start = get_time(); //original particle centroid double xe = roundDouble(IszY/2.0); double ye = roundDouble(IszX/2.0); //expected object locations, compared to center int radius = 5; int diameter = radius*2 - 1; int * disk = (int *)malloc(diameter*diameter*sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)malloc(countOnes*2*sizeof(double)); getneighbors(disk, countOnes, objxy, radius); //long long get_neighbors = get_time(); //printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors)); //initial weights are all equal (1/Nparticles) double * weights = (double *)malloc(sizeof(double)*Nparticles); for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } //long long get_weights = get_time(); //printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights)); //initial likelihood to 0.0 //printf("%d\n", Nparticles); double * likelihood = (double *)malloc(sizeof(double)*Nparticles); double * arrayX = (double *)malloc(sizeof(double)*Nparticles); double * arrayY = (double *)malloc(sizeof(double)*Nparticles); double * xj = (double *)malloc(sizeof(double)*Nparticles); double * yj = (double *)malloc(sizeof(double)*Nparticles); double * CDF = (double *)malloc(sizeof(double)*Nparticles); //GPU copies of arrays //double * arrayX_GPU; //double * arrayY_GPU; //double * xj_GPU; //double * yj_GPU; //double * CDF_GPU; int * ind = (int*)malloc(sizeof(int)*countOnes); double * u = (double *)malloc(sizeof(double)*Nparticles); //double * u_GPU; //CUDA memory allocation //check_error(cudaMalloc((void **) &arrayX_GPU, sizeof(double)*Nparticles)); //arrayX_GPU = (double*)malloc(sizeof(double)*Nparticles); //check_error(cudaMalloc((void **) &arrayY_GPU, sizeof(double)*Nparticles)); //arrayY_GPU = (double*)malloc(sizeof(double)*Nparticles); //check_error(cudaMalloc((void **) &xj_GPU, sizeof(double)*Nparticles)); //xj_GPU = (double*)malloc(sizeof(double)*Nparticles); //check_error(cudaMalloc((void **) &yj_GPU, sizeof(double)*Nparticles)); //yj_GPU = (double*)malloc(sizeof(double)*Nparticles); //check_error(cudaMalloc((void **) &CDF_GPU, sizeof(double)*Nparticles)); //CDF_GPU = (double*)malloc(sizeof(double)*Nparticles); //check_error(cudaMalloc((void **) &u_GPU, sizeof(double)*Nparticles)); //u_GPU = (double*)malloc(sizeof(double)*Nparticles); for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } //Set number of threads int num_blocks = ceil((double) Nparticles/(double) threads_per_block); //printf("%d\n", num_blocks); dim3 grids, threads; grids.x = num_blocks; grids.y = 1; grids.z = 1; threads.x = threads_per_block; threads.y = 1; threads.z = 1; #ifdef HW XFcuda_SetNparticles(&xcore, Nparticles); XFcuda_SetGriddim_x(&xcore, grids.x); XFcuda_SetGriddim_y(&xcore, grids.y); //XFcuda_SetGriddim_z(&xcore, grids.z); XFcuda_SetBlockdim_x(&xcore, threads.x); //XFcuda_SetBlockdim_y(&xcore, threads.y); //XFcuda_SetBlockdim_z(&xcore, threads.z); XFcuda_SetArrayx_addr(&xcore, (u32)arrayX / sizeof(double)); XFcuda_SetArrayy_addr(&xcore, (u32)arrayY / sizeof(double)); XFcuda_SetCdf_addr(&xcore, (u32)CDF / sizeof(double)); XFcuda_SetU_addr(&xcore, (u32)u / sizeof(double)); XFcuda_SetXj_addr(&xcore, (u32)xj / sizeof(double)); XFcuda_SetYj_addr(&xcore, (u32)yj / sizeof(double)); #endif int k; //double * Ik = (double *)malloc(sizeof(double)*IszX*IszY); int indX, indY; double *result = (double *)malloc(3 * (Nfr - 1) * sizeof(double)); i = 0; for(k = 1; k < Nfr; k++){ //long long set_arrays = get_time(); //printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, set_arrays)); //apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction for(x = 0; x < Nparticles; x++){ arrayX[x] = arrayX[x] + 1.0 + 5.0*randn(seed, x); arrayY[x] = arrayY[x] - 2.0 + 2.0*randn(seed, x); } //particle filter likelihood //long long error = get_time(); //printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error)); for(x = 0; x < Nparticles; x++){ //compute the likelihood: remember our assumption is that you know // foreground and the background image intensity distribution. // Notice that we consider here a likelihood ratio, instead of // p(z|x). It is possible in this case. why? a hometask for you. //calc ind for(y = 0; y < countOnes; y++){ indX = roundDouble(arrayX[x]) + objxy[y*2 + 1]; indY = roundDouble(arrayY[x]) + objxy[y*2]; ind[y] = fabs(indX*IszY*Nfr + indY*Nfr + k); if(ind[y] >= max_size) ind[y] = 0; } likelihood[x] = calcLikelihoodSum(I, ind, countOnes); likelihood[x] = likelihood[x]/countOnes; } //long long likelihood_time = get_time(); //printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time)); // update & normalize weights // using equation (63) of Arulampalam Tutorial for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } //long long exponential = get_time(); //printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential)); double sumWeights = 0; for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } //long long sum_time = get_time(); //printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time)); for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } //long long normalize = get_time(); //printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize)); xe = 0; ye = 0; // estimate the object location by expected values for(x = 0; x < Nparticles; x++){ //printf("%f %f %f\n", arrayX[x], arrayY[x], weights[x]); xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } //long long move_time = get_time(); //printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time)); //printf("XE: %lf\n", xe); //printf("YE: %lf\n", ye); double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) ); //printf("%lf\n", distance); result[i] = xe; result[i + 1] = ye; result[i + 2] = distance; i += 3; //display(hold off for now) //pause(hold off for now) //resampling CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } //long long cum_sum = get_time(); //printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum)); double u1 = (1/((double)(Nparticles)))*randu(seed, 0); for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } //long long u_time = get_time(); //printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time)); //long long start_copy = get_time(); //CUDA memory copying from CPU memory to GPU memory //cudaMemcpy(arrayX_GPU, arrayX, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(arrayX_GPU, arrayX, sizeof(double)*Nparticles); //cudaMemcpy(arrayY_GPU, arrayY, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(arrayY_GPU, arrayY, sizeof(double)*Nparticles); //cudaMemcpy(xj_GPU, xj, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(xj_GPU, xj, sizeof(double)*Nparticles); //cudaMemcpy(yj_GPU, yj, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(yj_GPU, yj, sizeof(double)*Nparticles); //cudaMemcpy(CDF_GPU, CDF, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(CDF_GPU, CDF, sizeof(double)*Nparticles); //cudaMemcpy(u_GPU, u, sizeof(double)*Nparticles, cudaMemcpyHostToDevice); //memcpy(u_GPU, u, sizeof(double)*Nparticles); //long long end_copy = get_time(); //Xil_DCacheDisable(); #ifdef HW Xil_DCacheDisable(); XFcuda_SetEn_fcuda1(&xcore, 1); XFcuda_Start(&xcore); while (!XFcuda_IsDone(&xcore)); Xil_DCacheEnable(); #else //KERNEL FUNCTION CALL int j; for(j = 0; j < Nparticles; j++){ x = findIndex(CDF, Nparticles, u[j]); if(x == -1) x = Nparticles-1; xj[j] = arrayX[x]; yj[j] = arrayY[x]; } #endif //cudaThreadSynchronize(); //long long start_copy_back = get_time(); //CUDA memory copying back from GPU to CPU memory //cudaMemcpy(yj, yj_GPU, sizeof(double)*Nparticles, cudaMemcpyDeviceToHost); //memcpy(yj, yj_GPU, sizeof(double)*Nparticles); //cudaMemcpy(xj, xj_GPU, sizeof(double)*Nparticles, cudaMemcpyDeviceToHost); //memcpy(xj, xj_GPU, sizeof(double)*Nparticles); //long long end_copy_back = get_time(); //printf("SENDING TO GPU TOOK: %lf\n", elapsed_time(start_copy, end_copy)); //printf("CUDA EXEC TOOK: %lf\n", elapsed_time(end_copy, start_copy_back)); //printf("SENDING BACK FROM GPU TOOK: %lf\n", elapsed_time(start_copy_back, end_copy_back)); //long long xyj_time = get_time(); //printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time)); for(x = 0; x < Nparticles; x++){ //reassign arrayX and arrayY arrayX[x] = xj[x]; arrayY[x] = yj[x]; weights[x] = 1/((double)(Nparticles)); } //long long reset = get_time(); //printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset)); } //CUDA freeing of memory /*cudaFree(u_GPU); cudaFree(CDF_GPU); cudaFree(yj_GPU); cudaFree(xj_GPU); cudaFree(arrayY_GPU); cudaFree(arrayX_GPU);*/ /* free(u_GPU); free(CDF_GPU); free(yj_GPU); free(xj_GPU); free(arrayY_GPU); free(arrayX_GPU); */ //free memory free(disk); free(objxy); free(weights); free(likelihood); free(arrayX); free(arrayY); free(xj); free(yj); free(CDF); free(u); free(ind); /* FILE *fp = fopen("cuda/gold_output_naive.txt", "r"); if (fp == NULL) { printf("Cannot open file.\n"); free(result); return 0; } char buffer[50]; double gold_val; for (i = 0; i < 3 * (Nfr - 1); i++) { if (feof(fp)) { printf("Unexpected end of file.\n"); free(result); return 0; } fgets(buffer, sizeof(buffer), fp); sscanf(buffer, "%lf\n", &gold_val); if (gold_val - result[i] < -EPSILON || gold_val - result[i] > EPSILON) { printf("Mismatch result at %i: gold = %f, result = %f\n", i, gold_val, result[i]); free(result); return 0; } } free(result); */ #ifdef VERIFY for (i = 0; i < 3 * (Nfr - 1); i++) { printf("index %d: %f\n", i, result[i]); } for (i = 0; i < 3 * (Nfr - 1); i++) { if (fabs(gold_output[i] - result[i]) > EPSILON) { printf("Mismatch result at %i: gold = %f, result = %f\n", i, gold_output[i], result[i]); free(result); return 0; } } #endif free(result); return 1; }
void multivar_normal(double *mean, double *var, double *ret, int dim) { for(int i=0; i < dim; i++) ret[i] = mean[i] + sqrt(var[i])*randn(); }
int main(int argc, char **argv) { ros::init(argc, argv, "updating_particles"); ros::NodeHandle n; PlotRayUtils plt; RayTracer rayt; std::random_device rd; std::normal_distribution<double> randn(0.0,0.000001); ROS_INFO("Running..."); ros::Publisher pub_init = n.advertise<particle_filter::PFilterInit>("/particle_filter_init", 5); ros::ServiceClient srv_add = n.serviceClient<particle_filter::AddObservation>("/particle_filter_add"); ros::Duration(1).sleep(); // pub_init.publish(getInitialPoints(plt)); geometry_msgs::Point obs; geometry_msgs::Point dir; double radius = 0.00; int i = 0; //for(int i=0; i<20; i++){ while (i < NUM_TOUCHES) { // ros::Duration(1).sleep(); //tf::Point start(0.95,0,-0.15); //tf::Point end(0.95,2,-0.15); tf::Point start, end; // randomSelection(plt, rayt, start, end); fixedSelection(plt, rayt, start, end); Ray measurement(start, end); double distToPart; if(!rayt.traceRay(measurement, distToPart)){ ROS_INFO("NO INTERSECTION, Skipping"); continue; } tf::Point intersection(start.getX(), start.getY(), start.getZ()); intersection = intersection + (end-start).normalize() * (distToPart - radius); std::cout << "Intersection at: " << intersection.getX() << " " << intersection.getY() << " " << intersection.getZ() << std::endl; tf::Point ray_dir(end.x()-start.x(),end.y()-start.y(),end.z()-start.z()); ray_dir = ray_dir.normalize(); obs.x=intersection.getX() + randn(rd); obs.y=intersection.getY() + randn(rd); obs.z=intersection.getZ() + randn(rd); dir.x=ray_dir.x(); dir.y=ray_dir.y(); dir.z=ray_dir.z(); // obs.x=intersection.getX(); // obs.y=intersection.getY(); // obs.z=intersection.getZ(); // pub_add.publish(obs); // plt.plotCylinder(start, end, 0.01, 0.002, true); plt.plotRay(Ray(start, end)); // ros::Duration(1).sleep(); particle_filter::AddObservation pfilter_obs; pfilter_obs.request.p = obs; pfilter_obs.request.dir = dir; if(!srv_add.call(pfilter_obs)){ ROS_INFO("Failed to call add observation"); } ros::spinOnce(); while(!rayt.particleHandler.newParticles){ ROS_INFO_THROTTLE(10, "Waiting for new particles..."); ros::spinOnce(); ros::Duration(.1).sleep(); } i ++; } // std::ofstream myfile; // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); // myfile.open("/home/shiyuan/Documents/ros_marsarm/time.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff_trans.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff_rot.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); // myfile.open("/home/shiyuan/Documents/ros_marsarm/workspace_max.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); // myfile.open("/home/shiyuan/Documents/ros_marsarm/workspace_min.csv", std::ios::out|std::ios::app); // myfile << "\n"; // myfile.close(); ROS_INFO("Finished all action"); }
array randn(const dim_t d0, const dim_t d1, const dim_t d2, const dim_t d3, const af::dtype ty) { return randn(dim4(d0, d1, d2, d3), ty); }
//! Vector of x ~ N(0,1) size n vec norm(const size_t &n){ return randn(n); }
//! Single draw from N(0,1) double norm(){ vec n = randn(1); return n(0); }
//! Matrix of x ~ N(0,1) size (n_rows,n_cols) mat norm(const size_t & n_rows, const size_t & n_cols){ return randn(n_rows,n_cols); }
//! Matrix of x ~ N(mu,sigma) size (n_rows,n_cols) mat norm(const double mu, const double sigma, const int n_rows, int n_cols){ return mu + sigma*randn(n_rows,n_cols); }
GLFFTWater::GLFFTWater(GLFFTWaterParams ¶ms) { #if 0 #ifdef _WIN32 m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4); #else posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N)); #endif #else m_h = new float[(params.N+2)*params.N]; m_dx = new float[(params.N+2)*(params.N)]; m_dz = new float[(params.N+2)*(params.N)]; m_w = new float[(params.N)*(params.N)]; #endif m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N)); m_heightmap = new float3[(params.N)*(params.N)]; m_params = params; std::tr1::mt19937 prng(1337); std::tr1::normal_distribution<float> normal; std::tr1::uniform_real<float> uniform; std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal); std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform); for(int i=0, k=0; i<params.N; i++) { float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L); for(int j=0; j<params.N; j++, k++) { float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L); float A = randn(); float theta = randu()*2.f*3.141592654f; float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k])); m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta); } } m_kz = new float[params.N*(params.N / 2 + 1)]; m_kx = new float[params.N*(params.N / 2 + 1)]; const int hN = m_params.N / 2; for(int y=0; y<m_params.N; y++) { float kz = (float) (y - hN); for(int x=0; x<=hN; x++) { float kx = (float) (x - hN); float k = 1.f/sqrtf(kx*kx+kz*kz); m_kz[y*(hN+1)+x] = kz*k; m_kx[y*(hN+1)+x] = kx*k; } } if(!fftwf_init_threads()) { cerr << "Error initializing multithreaded fft." << endl; } else { fftwf_plan_with_nthreads(2); } m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, FFTW_ESTIMATE); glGenTextures(1, &m_texId); glBindTexture(GL_TEXTURE_2D, m_texId); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0); glBindTexture(GL_TEXTURE_2D, 0); }
void sammon(const emxArray_real_T *x, emxArray_real_T *y) { emxArray_real_T *D; emxArray_real_T *delta; int32_T i0; int32_T b_D; int32_T br; emxArray_real_T *Dinv; emxArray_real_T *d; emxArray_real_T *dinv; emxArray_real_T *c_D; emxArray_real_T *b_delta; emxArray_real_T *b_x; real_T E; int32_T i; emxArray_real_T *deltaone; emxArray_real_T *g; emxArray_real_T *y2; emxArray_real_T *H; emxArray_real_T *s; emxArray_real_T *C; emxArray_real_T *b_C; emxArray_real_T *c_delta; emxArray_real_T *d_D; emxArray_real_T *b_deltaone; boolean_T exitg1; boolean_T guard2 = FALSE; uint32_T unnamed_idx_0; int32_T ic; int32_T ar; int32_T ib; int32_T ia; int32_T i1; boolean_T guard1 = FALSE; int32_T exitg3; boolean_T exitg2; int32_T b_y[2]; real_T E_new; real_T u1; emxInit_real_T(&D, 2); emxInit_real_T(&delta, 2); /* #codgen */ /* */ /* SAMMON - apply Sammon's nonlinear mapping */ /* */ /* Y = SAMMON(X) applies Sammon's nonlinear mapping procedure on */ /* multivariate data X, where each row represents a pattern and each column */ /* represents a feature. On completion, Y contains the corresponding */ /* co-ordinates of each point on the map. By default, a two-dimensional */ /* map is created. Note if X contains any duplicated rows, SAMMON will */ /* fail (ungracefully). */ /* */ /* [Y,E] = SAMMON(X) also returns the value of the cost function in E (i.e. */ /* the stress of the mapping). */ /* */ /* An N-dimensional output map is generated by Y = SAMMON(X,N) . */ /* */ /* A set of optimisation options can also be specified using a third */ /* argument, Y = SAMMON(X,N,OPTS) , where OPTS is a structure with fields: */ /* */ /* MaxIter - maximum number of iterations */ /* TolFun - relative tolerance on objective function */ /* MaxHalves - maximum number of step halvings */ /* Input - {'raw','distance'} if set to 'distance', X is */ /* interpreted as a matrix of pairwise distances. */ /* Display - {'off', 'on', 'iter'} */ /* Initialisation - {'pca', 'random'} */ /* */ /* The default options structure can be retrieved by calling SAMMON with */ /* no parameters. */ /* */ /* References : */ /* */ /* [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure */ /* Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, */ /* pp 401-409, May 1969. */ /* */ /* See also : SAMMON_TEST */ /* */ /* File : sammon.m */ /* */ /* Date : Monday 12th November 2007. */ /* */ /* Author : Gavin C. Cawley and Nicola L. C. Talbot */ /* */ /* Description : Simple vectorised MATLAB implementation of Sammon's non-linear */ /* mapping algorithm [1]. */ /* */ /* References : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data */ /* Structure Analysis", IEEE Transactions on Computers, */ /* vol. C-18, no. 5, pp 401-409, May 1969. */ /* */ /* History : 10/08/2004 - v1.00 */ /* 11/08/2004 - v1.10 Hessian made positive semidefinite */ /* 13/08/2004 - v1.11 minor optimisation */ /* 12/11/2007 - v1.20 initialisation using the first n principal */ /* components. */ /* */ /* Thanks : Dr Nick Hamilton ([email protected]) for supplying the */ /* code for implementing initialisation using the first n */ /* principal components (introduced in v1.20). */ /* */ /* To do : The current version does not take advantage of the symmetry */ /* of the distance matrix in order to allow for easy */ /* vectorisation. This may not be a good choice for very large */ /* datasets, so perhaps one day I'll get around to doing a MEX */ /* version using the BLAS library etc. for very large datasets. */ /* */ /* Copyright : (c) Dr Gavin C. Cawley, November 2007. */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* */ /* use the default options structure */ /* create distance matrix unless given by parameters */ euclid(x, x, D); /* remaining initialisation */ eye(x->size[0], delta); i0 = D->size[0] * D->size[1]; emxEnsureCapacity((emxArray__common *)D, i0, (int32_T)sizeof(real_T)); b_D = D->size[0]; br = D->size[1]; br *= b_D; for (i0 = 0; i0 < br; i0++) { D->data[i0] += delta->data[i0]; } emxInit_real_T(&Dinv, 2); emxInit_real_T(&d, 2); rdivide(D, Dinv); randn(x->size[0], y); b_euclid(y, y, d); eye(x->size[0], delta); i0 = d->size[0] * d->size[1]; emxEnsureCapacity((emxArray__common *)d, i0, (int32_T)sizeof(real_T)); b_D = d->size[0]; br = d->size[1]; br *= b_D; for (i0 = 0; i0 < br; i0++) { d->data[i0] += delta->data[i0]; } emxInit_real_T(&dinv, 2); emxInit_real_T(&c_D, 2); rdivide(d, dinv); i0 = c_D->size[0] * c_D->size[1]; c_D->size[0] = D->size[0]; c_D->size[1] = D->size[1]; emxEnsureCapacity((emxArray__common *)c_D, i0, (int32_T)sizeof(real_T)); br = D->size[0] * D->size[1]; for (i0 = 0; i0 < br; i0++) { c_D->data[i0] = D->data[i0] - d->data[i0]; } emxInit_real_T(&b_delta, 2); power(c_D, delta); i0 = b_delta->size[0] * b_delta->size[1]; b_delta->size[0] = delta->size[0]; b_delta->size[1] = delta->size[1]; emxEnsureCapacity((emxArray__common *)b_delta, i0, (int32_T)sizeof(real_T)); br = delta->size[0] * delta->size[1]; emxFree_real_T(&c_D); for (i0 = 0; i0 < br; i0++) { b_delta->data[i0] = delta->data[i0] * Dinv->data[i0]; } emxInit_real_T(&b_x, 2); c_sum(b_delta, b_x); E = d_sum(b_x); /* get on with it */ i = 0; emxFree_real_T(&b_delta); emxInit_real_T(&deltaone, 2); emxInit_real_T(&g, 2); emxInit_real_T(&y2, 2); emxInit_real_T(&H, 2); b_emxInit_real_T(&s, 1); emxInit_real_T(&C, 2); emxInit_real_T(&b_C, 2); emxInit_real_T(&c_delta, 2); emxInit_real_T(&d_D, 2); b_emxInit_real_T(&b_deltaone, 1); exitg1 = FALSE; while ((exitg1 == FALSE) && (i < 500)) { /* compute gradient, Hessian and search direction (note it is actually */ /* 1/4 of the gradient and Hessian, but the step size is just the ratio */ /* of the gradient and the diagonal of the Hessian so it doesn't matter). */ i0 = delta->size[0] * delta->size[1]; delta->size[0] = dinv->size[0]; delta->size[1] = dinv->size[1]; emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T)); br = dinv->size[0] * dinv->size[1]; for (i0 = 0; i0 < br; i0++) { delta->data[i0] = dinv->data[i0] - Dinv->data[i0]; } guard2 = FALSE; if (delta->size[1] == 1) { guard2 = TRUE; } else { b_D = x->size[0]; if (b_D == 1) { guard2 = TRUE; } else { unnamed_idx_0 = (uint32_T)delta->size[0]; i0 = deltaone->size[0] * deltaone->size[1]; deltaone->size[0] = (int32_T)unnamed_idx_0; deltaone->size[1] = 2; emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof (real_T)); br = (int32_T)unnamed_idx_0 << 1; for (i0 = 0; i0 < br; i0++) { deltaone->data[i0] = 0.0; } if (delta->size[0] == 0) { } else { b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { i0 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i0; ic++) { deltaone->data[ic] = 0.0; } b_D += delta->size[0]; } br = 0; b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { ar = 0; i0 = br + delta->size[1]; for (ib = br + 1; ib <= i0; ib++) { ia = ar; i1 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i1; ic++) { ia++; deltaone->data[ic] += delta->data[ia - 1]; } ar += delta->size[0]; } br += delta->size[1]; b_D += delta->size[0]; } } } } if (guard2 == TRUE) { i0 = deltaone->size[0] * deltaone->size[1]; deltaone->size[0] = delta->size[0]; deltaone->size[1] = 2; emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof(real_T)); br = delta->size[0]; for (i0 = 0; i0 < br; i0++) { for (i1 = 0; i1 < 2; i1++) { deltaone->data[i0 + deltaone->size[0] * i1] = 0.0; ar = delta->size[1]; for (b_D = 0; b_D < ar; b_D++) { deltaone->data[i0 + deltaone->size[0] * i1] += delta->data[i0 + delta->size[0] * b_D]; } } } } if ((delta->size[1] == 1) || (y->size[0] == 1)) { i0 = g->size[0] * g->size[1]; g->size[0] = delta->size[0]; g->size[1] = 2; emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T)); br = delta->size[0]; for (i0 = 0; i0 < br; i0++) { for (i1 = 0; i1 < 2; i1++) { g->data[i0 + g->size[0] * i1] = 0.0; ar = delta->size[1]; for (b_D = 0; b_D < ar; b_D++) { g->data[i0 + g->size[0] * i1] += delta->data[i0 + delta->size[0] * b_D] * y->data[b_D + y->size[0] * i1]; } } } } else { unnamed_idx_0 = (uint32_T)delta->size[0]; i0 = g->size[0] * g->size[1]; g->size[0] = (int32_T)unnamed_idx_0; g->size[1] = 2; emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T)); br = (int32_T)unnamed_idx_0 << 1; for (i0 = 0; i0 < br; i0++) { g->data[i0] = 0.0; } if (delta->size[0] == 0) { } else { b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { i0 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i0; ic++) { g->data[ic] = 0.0; } b_D += delta->size[0]; } br = 0; b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { ar = 0; i0 = br + delta->size[1]; for (ib = br; ib + 1 <= i0; ib++) { if (y->data[ib] != 0.0) { ia = ar; i1 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i1; ic++) { ia++; g->data[ic] += y->data[ib] * delta->data[ia - 1]; } } ar += delta->size[0]; } br += delta->size[1]; b_D += delta->size[0]; } } } i0 = g->size[0] * g->size[1]; g->size[1] = 2; emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T)); b_D = g->size[0]; br = g->size[1]; br *= b_D; for (i0 = 0; i0 < br; i0++) { g->data[i0] -= y->data[i0] * deltaone->data[i0]; } c_power(dinv, delta); b_power(y, y2); if ((delta->size[1] == 1) || (y2->size[0] == 1)) { i0 = H->size[0] * H->size[1]; H->size[0] = delta->size[0]; H->size[1] = 2; emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T)); br = delta->size[0]; for (i0 = 0; i0 < br; i0++) { for (i1 = 0; i1 < 2; i1++) { H->data[i0 + H->size[0] * i1] = 0.0; ar = delta->size[1]; for (b_D = 0; b_D < ar; b_D++) { H->data[i0 + H->size[0] * i1] += delta->data[i0 + delta->size[0] * b_D] * y2->data[b_D + y2->size[0] * i1]; } } } } else { unnamed_idx_0 = (uint32_T)delta->size[0]; i0 = H->size[0] * H->size[1]; H->size[0] = (int32_T)unnamed_idx_0; H->size[1] = 2; emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T)); br = (int32_T)unnamed_idx_0 << 1; for (i0 = 0; i0 < br; i0++) { H->data[i0] = 0.0; } if (delta->size[0] == 0) { } else { b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { i0 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i0; ic++) { H->data[ic] = 0.0; } b_D += delta->size[0]; } br = 0; b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { ar = 0; i0 = br + delta->size[1]; for (ib = br; ib + 1 <= i0; ib++) { if (y2->data[ib] != 0.0) { ia = ar; i1 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i1; ic++) { ia++; H->data[ic] += y2->data[ib] * delta->data[ia - 1]; } } ar += delta->size[0]; } br += delta->size[1]; b_D += delta->size[0]; } } } if ((delta->size[1] == 1) || (y->size[0] == 1)) { i0 = C->size[0] * C->size[1]; C->size[0] = delta->size[0]; C->size[1] = 2; emxEnsureCapacity((emxArray__common *)C, i0, (int32_T)sizeof(real_T)); br = delta->size[0]; for (i0 = 0; i0 < br; i0++) { for (i1 = 0; i1 < 2; i1++) { C->data[i0 + C->size[0] * i1] = 0.0; ar = delta->size[1]; for (b_D = 0; b_D < ar; b_D++) { C->data[i0 + C->size[0] * i1] += delta->data[i0 + delta->size[0] * b_D] * y->data[b_D + y->size[0] * i1]; } } } } else { unnamed_idx_0 = (uint32_T)delta->size[0]; i0 = C->size[0] * C->size[1]; C->size[0] = (int32_T)unnamed_idx_0; C->size[1] = 2; emxEnsureCapacity((emxArray__common *)C, i0, (int32_T)sizeof(real_T)); br = (int32_T)unnamed_idx_0 << 1; for (i0 = 0; i0 < br; i0++) { C->data[i0] = 0.0; } if (delta->size[0] == 0) { } else { b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { i0 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i0; ic++) { C->data[ic] = 0.0; } b_D += delta->size[0]; } br = 0; b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { ar = 0; i0 = br + delta->size[1]; for (ib = br; ib + 1 <= i0; ib++) { if (y->data[ib] != 0.0) { ia = ar; i1 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i1; ic++) { ia++; C->data[ic] += y->data[ib] * delta->data[ia - 1]; } } ar += delta->size[0]; } br += delta->size[1]; b_D += delta->size[0]; } } } guard1 = FALSE; if (delta->size[1] == 1) { guard1 = TRUE; } else { b_D = x->size[0]; if (b_D == 1) { guard1 = TRUE; } else { unnamed_idx_0 = (uint32_T)delta->size[0]; i0 = b_C->size[0] * b_C->size[1]; b_C->size[0] = (int32_T)unnamed_idx_0; b_C->size[1] = 2; emxEnsureCapacity((emxArray__common *)b_C, i0, (int32_T)sizeof(real_T)); br = (int32_T)unnamed_idx_0 << 1; for (i0 = 0; i0 < br; i0++) { b_C->data[i0] = 0.0; } if (delta->size[0] == 0) { } else { b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { i0 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i0; ic++) { b_C->data[ic] = 0.0; } b_D += delta->size[0]; } br = 0; b_D = 0; while ((delta->size[0] > 0) && (b_D <= delta->size[0])) { ar = 0; i0 = br + delta->size[1]; for (ib = br + 1; ib <= i0; ib++) { ia = ar; i1 = b_D + delta->size[0]; for (ic = b_D; ic + 1 <= i1; ic++) { ia++; b_C->data[ic] += delta->data[ia - 1]; } ar += delta->size[0]; } br += delta->size[1]; b_D += delta->size[0]; } } } } if (guard1 == TRUE) { i0 = b_C->size[0] * b_C->size[1]; b_C->size[0] = delta->size[0]; b_C->size[1] = 2; emxEnsureCapacity((emxArray__common *)b_C, i0, (int32_T)sizeof(real_T)); br = delta->size[0]; for (i0 = 0; i0 < br; i0++) { for (i1 = 0; i1 < 2; i1++) { b_C->data[i0 + b_C->size[0] * i1] = 0.0; ar = delta->size[1]; for (b_D = 0; b_D < ar; b_D++) { b_C->data[i0 + b_C->size[0] * i1] += delta->data[i0 + delta->size[0] * b_D]; } } } } i0 = H->size[0] * H->size[1]; H->size[1] = 2; emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T)); b_D = H->size[0]; br = H->size[1]; br *= b_D; for (i0 = 0; i0 < br; i0++) { H->data[i0] = ((H->data[i0] - deltaone->data[i0]) - 2.0 * y->data[i0] * C->data[i0]) + y2->data[i0] * b_C->data[i0]; } b_D = H->size[0] << 1; i0 = s->size[0]; s->size[0] = b_D; emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T)); br = 0; do { exitg3 = 0; b_D = H->size[0] << 1; if (br <= b_D - 1) { s->data[br] = fabs(H->data[br]); br++; } else { exitg3 = 1; } } while (exitg3 == 0); i0 = s->size[0]; s->size[0] = g->size[0] << 1; emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T)); br = g->size[0] << 1; for (i0 = 0; i0 < br; i0++) { s->data[i0] = -g->data[i0] / s->data[i0]; } i0 = deltaone->size[0] * deltaone->size[1]; deltaone->size[0] = y->size[0]; deltaone->size[1] = 2; emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof(real_T)); br = y->size[0] * y->size[1]; for (i0 = 0; i0 < br; i0++) { deltaone->data[i0] = y->data[i0]; } /* use step-halving procedure to ensure progress is made */ ib = 1; ia = 0; exitg2 = FALSE; while ((exitg2 == FALSE) && (ia < 20)) { ib = ia + 1; b_D = deltaone->size[0] << 1; i0 = b_deltaone->size[0]; b_deltaone->size[0] = b_D; emxEnsureCapacity((emxArray__common *)b_deltaone, i0, (int32_T)sizeof (real_T)); for (i0 = 0; i0 < b_D; i0++) { b_deltaone->data[i0] = deltaone->data[i0] + s->data[i0]; } for (i0 = 0; i0 < 2; i0++) { b_y[i0] = y->size[i0]; } i0 = y->size[0] * y->size[1]; y->size[0] = b_y[0]; y->size[1] = b_y[1]; emxEnsureCapacity((emxArray__common *)y, i0, (int32_T)sizeof(real_T)); br = b_y[1]; for (i0 = 0; i0 < br; i0++) { ar = b_y[0]; for (i1 = 0; i1 < ar; i1++) { y->data[i1 + y->size[0] * i0] = b_deltaone->data[i1 + b_y[0] * i0]; } } b_euclid(y, y, d); b_D = x->size[0]; i0 = delta->size[0] * delta->size[1]; delta->size[0] = b_D; emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T)); b_D = x->size[0]; i0 = delta->size[0] * delta->size[1]; delta->size[1] = b_D; emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T)); br = x->size[0] * x->size[0]; for (i0 = 0; i0 < br; i0++) { delta->data[i0] = 0.0; } E_new = x->size[0]; u1 = x->size[0]; if (E_new <= u1) { } else { E_new = u1; } b_D = (int32_T)E_new; if (b_D > 0) { for (br = 0; br + 1 <= b_D; br++) { delta->data[br + delta->size[0] * br] = 1.0; } } i0 = d->size[0] * d->size[1]; emxEnsureCapacity((emxArray__common *)d, i0, (int32_T)sizeof(real_T)); b_D = d->size[0]; br = d->size[1]; br *= b_D; for (i0 = 0; i0 < br; i0++) { d->data[i0] += delta->data[i0]; } rdivide(d, dinv); i0 = d_D->size[0] * d_D->size[1]; d_D->size[0] = D->size[0]; d_D->size[1] = D->size[1]; emxEnsureCapacity((emxArray__common *)d_D, i0, (int32_T)sizeof(real_T)); br = D->size[0] * D->size[1]; for (i0 = 0; i0 < br; i0++) { d_D->data[i0] = D->data[i0] - d->data[i0]; } power(d_D, delta); i0 = c_delta->size[0] * c_delta->size[1]; c_delta->size[0] = delta->size[0]; c_delta->size[1] = delta->size[1]; emxEnsureCapacity((emxArray__common *)c_delta, i0, (int32_T)sizeof(real_T)); br = delta->size[0] * delta->size[1]; for (i0 = 0; i0 < br; i0++) { c_delta->data[i0] = delta->data[i0] * Dinv->data[i0]; } c_sum(c_delta, b_x); if (b_x->size[1] == 0) { E_new = 0.0; } else { E_new = b_x->data[0]; for (br = 2; br <= b_x->size[1]; br++) { E_new += b_x->data[br - 1]; } } if (E_new < E) { exitg2 = TRUE; } else { i0 = s->size[0]; emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T)); br = s->size[0]; for (i0 = 0; i0 < br; i0++) { s->data[i0] *= 0.5; } ia++; } } /* bomb out if too many halving steps are required */ if ((ib == 20) || (fabs((E - E_new) / E) < 1.0E-9)) { exitg1 = TRUE; } else { /* evaluate termination criterion */ /* report progress */ E = E_new; i++; } } emxFree_real_T(&b_deltaone); emxFree_real_T(&d_D); emxFree_real_T(&c_delta); emxFree_real_T(&b_x); emxFree_real_T(&b_C); emxFree_real_T(&C); emxFree_real_T(&s); emxFree_real_T(&H); emxFree_real_T(&y2); emxFree_real_T(&g); emxFree_real_T(&deltaone); emxFree_real_T(&delta); emxFree_real_T(&dinv); emxFree_real_T(&d); emxFree_real_T(&Dinv); emxFree_real_T(&D); /* fiddle stress to match the original Sammon paper */ }
array randn(const dim_type d0, const dim_type d1, const dim_type d2, const dim_type d3, af_dtype ty) { return randn(dim4(d0, d1, d2, d3), ty); }
array randn(const dim_t d0, const af::dtype ty) { return randn(dim4(d0), ty); }
array randn(const dim_type d0, af_dtype ty) { return randn(dim4(d0), ty); }
int main(int argc, char *argv[]) { FILE *fp; char *temp_path = NULL; char *input_path = NULL; int input_len = 0; char *data_buf = NULL; unsigned int block_size = 0; /* int because the max block size is 16k */ unsigned int num_files = 0; unsigned int ops_per_comp = 0; unsigned int num_ops = 0; unsigned int rw = 0; unsigned int *op_array = NULL; unsigned int *temp_array = NULL; unsigned int j = 0; int opt; clock_t t; struct timeval tv1, tv2; double time_taken; srand (time(NULL)); while ((opt = getopt(argc, argv, "b:n:m:c:l:hrw")) != -1) { switch (opt) { case 'b': /* Block size per read/write */ block_size = atoi((char *)optarg); if (block_size <= 0 || block_size > 32 * 1024) { printf( "Invalid block size or it exceeds 32k\n"); exit(-1); } break; case 'n': /* * Total number of files, each file of the form * /mnt/test/abcd0, /mnt/test/abcd1, ..... * if the path is specified as "/mnt/test/abcd" */ num_files = atoi((char *)optarg); if (num_files <= 0 || num_files > 10000) { printf( "Number of files exceeds 10000 or invalid\n"); exit(-1); } break; case 'm': /* * Total number of reads/writes */ num_ops = atoi((char *)optarg); if (num_ops <= 0 || num_ops > 10000) { printf("Invalid total number of reads/writes " "or it exceeds 10000\n"); exit(-1); } break; case 'c': /* * Number of operations in a single compound, should not * matter in normal reads */ ops_per_comp = atoi((char *)optarg); if (ops_per_comp <= 0 || ops_per_comp > 10) { printf( "Invalid ops per comp or it exceeds 10\n"); exit(-1); } break; case 'l': /* * Path of the files * * If the files are of the form - * /mnt/test/abcd0, /mnt/test/abcd1, ...... * Path should be "/mnt/test/abcd" */ input_path = (char *)optarg; input_len = strlen(input_path); if (input_len <= 0 || input_len > 50) { printf("Name is invalid or is too long, max 50 chars \n"); exit(-1); } break; case 'r': /* Read */ rw = 0; break; case 'w': /* Write */ rw = 1; break; case 'h': default: printf("%s", usage); exit(-1); } } if (argc != 12) { printf("Wrong usage, use -h to get help\n"); exit(-1); } if (rw == 0) { printf("TC_TEST - READ\n"); } else { printf("TC_TEST - WRITE\n"); } printf("Block size - %d\n", block_size); printf("Path - %s\n", input_path); printf("Num_files - %d\n", num_files); printf("Total num of reads/writes - %d\n", num_ops); printf("Num of ops in a comp - %d\n", ops_per_comp); temp_path = malloc(input_len + 8); data_buf = malloc(block_size); j = 0; op_array = malloc(num_files * sizeof(double)); if (op_array == NULL) { printf("Cannot allocate memory for op_array\n"); goto main_exit; } while (j < num_ops) { temp_array = op_array + j; *temp_array = (unsigned int)randn(num_ops / 2, num_ops / 8); j++; } printf("Op_array allocated\n"); j = 0; // t = clock(); gettimeofday(&tv1, NULL); while (j < num_ops) { temp_array = op_array + j; snprintf(temp_path, input_len + 8, "%s%u", input_path, *temp_array); if (rw == 0) { /* Read */ fp = fopen(temp_path, "r"); } else { /* Write */ fp = fopen(temp_path, "w"); } if (fp == NULL) { printf("Error opening file - %s\n", temp_path); goto exit; } if (rw == 0) { /* Read */ fread(data_buf, block_size, 1, (FILE *)fp); } else { /* Write */ fwrite(data_buf, 1, block_size, (FILE *)fp); } fclose(fp); j++; } //t = clock() - t; gettimeofday(&tv2, NULL); // time_taken = ((double)t) / CLOCKS_PER_SEC; time_taken = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000) + (double)(tv2.tv_sec - tv1.tv_sec); printf("Took %f seconds to execute \n", time_taken); exit: free(op_array); main_exit: free(data_buf); free(temp_path); return 0; }
void Radical::CopyAndPerturb(mat& matXNew, const mat& matX) { matXNew = repmat(matX, nReplicates, 1) + noiseStdDev * randn(nReplicates * matX.n_rows, matX.n_cols); }
void sammon(sammonStackData *SD, const real_T x[1000000], real_T y[2000], real_T *E) { real_T B; int32_T i; real_T dv0[1000]; int32_T b_i; boolean_T exitg1; real_T alpha1; real_T beta1; char_T TRANSB; char_T TRANSA; ptrdiff_t m_t; ptrdiff_t n_t; ptrdiff_t k_t; ptrdiff_t lda_t; ptrdiff_t ldb_t; ptrdiff_t ldc_t; double * alpha1_t; double * Aia0_t; double * Bib0_t; double * beta1_t; double * Cic0_t; real_T g[2000]; real_T y2[2000]; real_T b_y[2000]; real_T c_y[2000]; real_T d_y[2000]; real_T e_y[2000]; real_T b_g[2000]; int32_T j; int32_T b_j; boolean_T exitg2; real_T dv1[1000]; real_T E_new; /* #codgen */ /* */ /* SAMMON - apply Sammon's nonlinear mapping */ /* */ /* Y = SAMMON(X) applies Sammon's nonlinear mapping procedure on */ /* multivariate data X, where each row represents a pattern and each column */ /* represents a feature. On completion, Y contains the corresponding */ /* co-ordinates of each point on the map. By default, a two-dimensional */ /* map is created. Note if X contains any duplicated rows, SAMMON will */ /* fail (ungracefully). */ /* */ /* [Y,E] = SAMMON(X) also returns the value of the cost function in E (i.e. */ /* the stress of the mapping). */ /* */ /* An N-dimensional output map is generated by Y = SAMMON(X,N) . */ /* */ /* A set of optimisation options can also be specified using a third */ /* argument, Y = SAMMON(X,N,OPTS) , where OPTS is a structure with fields: */ /* */ /* MaxIter - maximum number of iterations */ /* TolFun - relative tolerance on objective function */ /* MaxHalves - maximum number of step halvings */ /* Input - {'raw','distance'} if set to 'distance', X is */ /* interpreted as a matrix of pairwise distances. */ /* Display - {'off', 'on', 'iter'} */ /* Initialisation - {'pca', 'random'} */ /* */ /* The default options structure can be retrieved by calling SAMMON with */ /* no parameters. */ /* */ /* References : */ /* */ /* [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure */ /* Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, */ /* pp 401-409, May 1969. */ /* */ /* See also : SAMMON_TEST */ /* */ /* File : sammon.m */ /* */ /* Date : Monday 12th November 2007. */ /* */ /* Author : Gavin C. Cawley and Nicola L. C. Talbot */ /* */ /* Description : Simple vectorised MATLAB implementation of Sammon's non-linear */ /* mapping algorithm [1]. */ /* */ /* References : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data */ /* Structure Analysis", IEEE Transactions on Computers, */ /* vol. C-18, no. 5, pp 401-409, May 1969. */ /* */ /* History : 10/08/2004 - v1.00 */ /* 11/08/2004 - v1.10 Hessian made positive semidefinite */ /* 13/08/2004 - v1.11 minor optimisation */ /* 12/11/2007 - v1.20 initialisation using the first n principal */ /* components. */ /* */ /* Thanks : Dr Nick Hamilton ([email protected]) for supplying the */ /* code for implementing initialisation using the first n */ /* principal components (introduced in v1.20). */ /* */ /* To do : The current version does not take advantage of the symmetry */ /* of the distance matrix in order to allow for easy */ /* vectorisation. This may not be a good choice for very large */ /* datasets, so perhaps one day I'll get around to doing a MEX */ /* version using the BLAS library etc. for very large datasets. */ /* */ /* Copyright : (c) Dr Gavin C. Cawley, November 2007. */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* */ /* use the default options structure */ /* create distance matrix unless given by parameters */ emlrtPushRtStackR2012b(&emlrtRSI, emlrtRootTLSGlobal); euclid(SD, x, x, SD->f2.D); emlrtPopRtStackR2012b(&emlrtRSI, emlrtRootTLSGlobal); /* remaining initialisation */ B = b_sum(SD->f2.D); eye(SD->f2.delta); for (i = 0; i < 1000000; i++) { SD->f2.D[i] += SD->f2.delta[i]; } rdivide(1.0, SD->f2.D, SD->f2.Dinv); emlrtPushRtStackR2012b(&b_emlrtRSI, emlrtRootTLSGlobal); randn(y); emlrtPopRtStackR2012b(&b_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&c_emlrtRSI, emlrtRootTLSGlobal); b_euclid(SD, y, y, SD->f2.d); eye(SD->f2.delta); for (i = 0; i < 1000000; i++) { SD->f2.d[i] += SD->f2.delta[i]; } emlrtPopRtStackR2012b(&c_emlrtRSI, emlrtRootTLSGlobal); rdivide(1.0, SD->f2.d, SD->f2.dinv); for (i = 0; i < 1000000; i++) { SD->f2.b_D[i] = SD->f2.D[i] - SD->f2.d[i]; } power(SD->f2.b_D, SD->f2.delta); for (i = 0; i < 1000000; i++) { SD->f2.d[i] = SD->f2.delta[i] * SD->f2.Dinv[i]; } d_sum(SD->f2.d, dv0); *E = e_sum(dv0); /* get on with it */ b_i = 0; exitg1 = FALSE; while ((exitg1 == FALSE) && (b_i < 500)) { /* compute gradient, Hessian and search direction (note it is actually */ /* 1/4 of the gradient and Hessian, but the step size is just the ratio */ /* of the gradient and the diagonal of the Hessian so it doesn't matter). */ for (i = 0; i < 1000000; i++) { SD->f2.delta[i] = SD->f2.dinv[i] - SD->f2.Dinv[i]; } emlrtPushRtStackR2012b(&d_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); alpha1 = 1.0; beta1 = 0.0; TRANSB = 'N'; TRANSA = 'N'; for (i = 0; i < 2000; i++) { SD->f2.y_old[i] = 1.0; SD->f2.deltaone[i] = 0.0; } emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); m_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); n_t = (ptrdiff_t)(2); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); k_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); lda_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldb_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldc_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); alpha1_t = (double *)(&alpha1); emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); Aia0_t = (double *)(&SD->f2.delta[0]); emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); Bib0_t = (double *)(&SD->f2.y_old[0]); emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); beta1_t = (double *)(&beta1); emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); Cic0_t = (double *)(&SD->f2.deltaone[0]); emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t, &ldb_t, beta1_t, Cic0_t, &ldc_t); emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&d_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&e_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); alpha1 = 1.0; beta1 = 0.0; TRANSB = 'N'; TRANSA = 'N'; memset(&g[0], 0, 2000U * sizeof(real_T)); emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); m_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); n_t = (ptrdiff_t)(2); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); k_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); lda_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldb_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldc_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); alpha1_t = (double *)(&alpha1); emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); Aia0_t = (double *)(&SD->f2.delta[0]); emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); Bib0_t = (double *)(&y[0]); emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); beta1_t = (double *)(&beta1); emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); Cic0_t = (double *)(&g[0]); emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t, &ldb_t, beta1_t, Cic0_t, &ldc_t); emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); for (i = 0; i < 2000; i++) { g[i] -= y[i] * SD->f2.deltaone[i]; } emlrtPopRtStackR2012b(&e_emlrtRSI, emlrtRootTLSGlobal); c_power(SD->f2.dinv, SD->f2.delta); b_power(y, y2); emlrtPushRtStackR2012b(&f_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); alpha1 = 1.0; beta1 = 0.0; TRANSB = 'N'; TRANSA = 'N'; memset(&b_y[0], 0, 2000U * sizeof(real_T)); emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); m_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); n_t = (ptrdiff_t)(2); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); k_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); lda_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldb_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldc_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); alpha1_t = (double *)(&alpha1); emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); Aia0_t = (double *)(&SD->f2.delta[0]); emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); Bib0_t = (double *)(&y2[0]); emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); beta1_t = (double *)(&beta1); emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); Cic0_t = (double *)(&b_y[0]); emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t, &ldb_t, beta1_t, Cic0_t, &ldc_t); emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); for (i = 0; i < 2000; i++) { c_y[i] = 2.0 * y[i]; } emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); alpha1 = 1.0; beta1 = 0.0; TRANSB = 'N'; TRANSA = 'N'; memset(&d_y[0], 0, 2000U * sizeof(real_T)); emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); m_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); n_t = (ptrdiff_t)(2); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); k_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); lda_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldb_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldc_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); alpha1_t = (double *)(&alpha1); emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); Aia0_t = (double *)(&SD->f2.delta[0]); emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); Bib0_t = (double *)(&y[0]); emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); beta1_t = (double *)(&beta1); emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); Cic0_t = (double *)(&d_y[0]); emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t, &ldb_t, beta1_t, Cic0_t, &ldc_t); emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); alpha1 = 1.0; beta1 = 0.0; TRANSB = 'N'; TRANSA = 'N'; for (i = 0; i < 2000; i++) { SD->f2.y_old[i] = 1.0; e_y[i] = 0.0; } emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); m_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); n_t = (ptrdiff_t)(2); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); k_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); lda_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldb_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); ldc_t = (ptrdiff_t)(1000); emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); alpha1_t = (double *)(&alpha1); emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); Aia0_t = (double *)(&SD->f2.delta[0]); emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); Bib0_t = (double *)(&SD->f2.y_old[0]); emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); beta1_t = (double *)(&beta1); emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); Cic0_t = (double *)(&e_y[0]); emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t, &ldb_t, beta1_t, Cic0_t, &ldc_t); emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&f_emlrtRSI, emlrtRootTLSGlobal); for (i = 0; i < 2000; i++) { SD->f2.y_old[i] = ((b_y[i] - SD->f2.deltaone[i]) - c_y[i] * d_y[i]) + y2[i] * e_y[i]; b_g[i] = -g[i]; } b_abs(SD->f2.y_old, y2); for (i = 0; i < 2000; i++) { SD->f2.deltaone[i] = y2[i]; SD->f2.y_old[i] = y[i]; } b_rdivide(b_g, SD->f2.deltaone, y2); /* use step-halving procedure to ensure progress is made */ j = 1; b_j = 0; exitg2 = FALSE; while ((exitg2 == FALSE) && (b_j < 20)) { j = b_j + 1; for (i = 0; i < 2000; i++) { y[i] = SD->f2.y_old[i] + y2[i]; } emlrtPushRtStackR2012b(&g_emlrtRSI, emlrtRootTLSGlobal); b_euclid(SD, y, y, SD->f2.d); eye(SD->f2.delta); for (i = 0; i < 1000000; i++) { SD->f2.d[i] += SD->f2.delta[i]; } emlrtPopRtStackR2012b(&g_emlrtRSI, emlrtRootTLSGlobal); rdivide(1.0, SD->f2.d, SD->f2.dinv); for (i = 0; i < 1000000; i++) { SD->f2.b_D[i] = SD->f2.D[i] - SD->f2.d[i]; } power(SD->f2.b_D, SD->f2.delta); for (i = 0; i < 1000000; i++) { SD->f2.d[i] = SD->f2.delta[i] * SD->f2.Dinv[i]; } d_sum(SD->f2.d, dv1); E_new = e_sum(dv1); if (E_new < *E) { exitg2 = TRUE; } else { for (i = 0; i < 2000; i++) { y2[i] *= 0.5; } b_j++; emlrtBreakCheckFastR2012b(emlrtBreakCheckR2012bFlagVar, emlrtRootTLSGlobal); } } /* bomb out if too many halving steps are required */ if ((j == 20) || (muDoubleScalarAbs((*E - E_new) / *E) < 1.0E-9)) { exitg1 = TRUE; } else { /* evaluate termination criterion */ /* report progress */ *E = E_new; b_i++; emlrtBreakCheckFastR2012b(emlrtBreakCheckR2012bFlagVar, emlrtRootTLSGlobal); } } /* fiddle stress to match the original Sammon paper */ *E *= 0.5 / B; }
/* generate simulated observation data ---------------------------------------*/ static int simobs(gtime_t ts, gtime_t te, double tint, const double *rr, nav_t *nav, obs_t *obs, int opt) { gtime_t time; obsd_t data[MAXSAT]={{{0}}}; double pos[3],rs[3*MAXSAT],dts[MAXSAT],r,e[3],azel[2]; double ecp[MAXSAT][NFREQ]={{0}},epr[MAXSAT][NFREQ]={{0}}; double snr[MAXSAT][NFREQ]={{0}},ers[MAXSAT][3]={{0}}; double iono,trop,fact,cp,pr,dtr=0.0,rref[3],bl; int i,j,k,n,ns,amb[MAXSAT][NFREQ]={{0}},sys,prn; char s[64]; double pref[]={36.106114294,140.087190410,70.3010}; /* ref station */ trace(3,"simobs:nnav=%d ngnav=%d\n",nav->n,nav->ng); for (i=0;i<2;i++) pref[i]*=D2R; pos2ecef(pref,rref); for (i=0;i<3;i++) rref[i]-=rr[i]; bl=norm(rref,3)/1E4; /* baseline (10km) */ srand(0); /* ephemeris error */ for (i=0;i<MAXSAT;i++) { data[i].sat=i+1; data[i].P[0]=2E7; for (j=0;j<3;j++) ers[i][j]=randn(0.0,erreph); } srand(tickget()); ecef2pos(rr,pos); n=(int)(timediff(te,ts)/tint+1.0); for (i=0;i<n;i++) { time=timeadd(ts,tint*i); time2str(time,s,0); for (j=0;j<MAXSAT;j++) data[j].time=time; for (j=0;j<3;j++) { /* iteration for pseudorange */ satpos(time,data,MAXSAT,nav,rs,dts); for (k=0;k<MAXSAT;k++) { if ((r=geodist(rs+k*3,rr,e))<=0.0) continue; data[k].P[0]=r+CLIGHT*(dtr-dts[k]); } } satpos(time,data,MAXSAT,nav,rs,dts); for (j=ns=0;j<MAXSAT;j++) { /* add ephemeris error */ for (k=0;k<3;k++) rs[k+j*3]+=ers[j][k]; if ((r=geodist(rs+j*3,rr,e))<=0.0) continue; satazel(pos,e,azel); if (azel[1]<minel*D2R) continue; iono=ionmodel(time,nav->ion,pos,azel); trop=tropmodel(pos,azel,0.3); /* add ionospheric error */ iono+=errion*bl*ionmapf(pos,azel); snrmodel(azel,snr[j]); errmodel(azel,snr[j],ecp[j],epr[j]); sys=satsys(data[j].sat,&prn); for (k=0;k<NFREQ;k++) { data[j].L[k]=data[j].P[k]=0.0; data[j].SNR[k]=0; data[j].LLI[k]=0; if (sys==SYS_GPS) { if (k>=3) continue; /* no L5a/L5b in gps */ if (k>=2&&!gpsblock[prn-1]) continue; /* no L5 in block II */ } else if (sys==SYS_GLO) { if (k>=3) continue; } else if (sys==SYS_GAL) { if (k==1) continue; /* no L2 in galileo */ } else continue; /* generate observation data */ fact=lam[k]*lam[k]/lam[0]/lam[0]; cp=r+CLIGHT*(dtr-dts[j])-fact*iono+trop+ecp[j][k]; pr=r+CLIGHT*(dtr-dts[j])+fact*iono+trop+epr[j][k]; if (amb[j][k]==0) amb[j][k]=(int)(-cp/lam[k]); data[j].L[k]=cp/lam[k]+amb[j][k]; data[j].P[k]=pr; data[j].SNR[k]=(unsigned char)snr[j][k]; data[j].LLI[k]=data[j].SNR[k]<slipthres?1:0; } if (obs->nmax<=obs->n) { if (obs->nmax==0) obs->nmax=65532; else obs->nmax+=65532; if (!(obs->data=(obsd_t *)realloc(obs->data,sizeof(obsd_t)*obs->nmax))) { fprintf(stderr,"malloc error\n"); return 0; } } obs->data[obs->n++]=data[j]; ns++; } fprintf(stderr,"time=%s nsat=%2d\r",s,ns); } fprintf(stderr,"\n"); return 1; }
// [[Rcpp::export]] Rcpp::List if2_s(Rcpp::NumericVector data, int T, int N, int NP, double coolrate, Rcpp::NumericVector params) { // params will be in the order R0, r, sigma, eta, berr, Sinit, Iinit, Rinit Rcpp::NumericMatrix statemeans(T, 3); Rcpp::NumericMatrix statedata(NP, 4); srand(time(NULL)); // Seed PRNG with system time double w[NP]; // particle weights Particle particles[NP]; // particle estimates for current step Particle particles_old[NP]; // intermediate particle states for resampling printf("Initializing particle states\n"); // Initialize particle parameter states (seeding) // Note that they will all be the same for this code as we are only filtering over the states for (int n = 0; n < NP; n++) { particles[n].R0 = params[0]; particles[n].r = params[1]; particles[n].sigma = params[2]; particles[n].eta = params[3]; particles[n].berr = params[4]; particles[n].Sinit = params[5]; particles[n].Iinit = params[6]; particles[n].Rinit = params[7]; } // START PASS THROUGH DATA printf("Starting filter\n"); printf("---------------\n"); // seed initial states for (int n = 0; n < NP; n++) { double Iinitcan; double i_infec = particles[n].Iinit; do { Iinitcan = i_infec + i_infec*randn(); } while (Iinitcan < 0 || N < Iinitcan); particles[n].S = N - Iinitcan; particles[n].I = Iinitcan; particles[n].R = 0.0; particles[n].B = (double) particles[n].R0 * particles[n].r / N; } State sMeans; getStateMeans(&sMeans, particles, NP); statemeans(0,0) = sMeans.S; statemeans(0,1) = sMeans.I; statemeans(0,2) = sMeans.R; // start run through data for (int t = 1; t < T; t++) { // generate individual predictions and weight for (int n = 0; n < NP; n++) { exp_euler_SIR(1.0/7.0, 0.0, 1.0, N, &particles[n]); double merr_par = particles[n].sigma; double y_diff = data[t] - particles[n].I; w[n] = 1.0/(merr_par*sqrt(2.0*M_PI)) * exp( - y_diff*y_diff / (2.0*merr_par*merr_par) ); } // cumulative sum for (int n = 1; n < NP; n++) { w[n] += w[n-1]; } // save particle states to resample from for (int n = 0; n < NP; n++){ copyParticle(&particles_old[n], &particles[n]); } // resampling for (int n = 0; n < NP; n++) { double w_r = randu() * w[NP-1]; int i = 0; while (w_r > w[i]) { i++; } // i is now the index to copy state from copyParticle(&particles[n], &particles_old[i]); } State sMeans; getStateMeans(&sMeans, particles, NP); statemeans(t,0) = sMeans.S; statemeans(t,1) = sMeans.I; statemeans(t,2) = sMeans.R; } // Get particle results to pass back to R for (int n = 0; n < NP; n++) { statedata(n, 0) = particles[n].S; statedata(n, 1) = particles[n].I; statedata(n, 2) = particles[n].R; statedata(n, 3) = particles[n].B; } return Rcpp::List::create( Rcpp::Named("statemeans") = statemeans, Rcpp::Named("statedata") = statedata ); }
/** Time domain physical simulation. noisy: - 0: no noise at all; - 1: poisson and read out noise. - 2: only poisson noise. */ dmat *skysim_sim(dmat **mresout, const dmat *mideal, const dmat *mideal_oa, double ngsol, ASTER_S *aster, const POWFS_S *powfs, const PARMS_S *parms, int idtratc, int noisy, int phystart){ int dtratc=0; if(!parms->skyc.multirate){ dtratc=parms->skyc.dtrats->p[idtratc]; } int hasphy; if(phystart>-1 && phystart<aster->nstep){ hasphy=1; }else{ hasphy=0; } const int nmod=mideal->nx; dmat *res=dnew(6,1);/*Results. 1-2: NGS and TT modes., 3-4:On axis NGS and TT modes, 4-6: On axis NGS and TT wihtout considering un-orthogonality.*/ dmat *mreal=NULL;/*modal correction at this step. */ dmat *merr=dnew(nmod,1);/*modal error */ dcell *merrm=dcellnew(1,1);dcell *pmerrm=NULL; const int nstep=aster->nstep?aster->nstep:parms->maos.nstep; dmat *mres=dnew(nmod,nstep); dmat* rnefs=parms->skyc.rnefs; dcell *zgradc=dcellnew3(aster->nwfs, 1, aster->ngs, 0); dcell *gradout=dcellnew3(aster->nwfs, 1, aster->ngs, 0); dmat *gradsave=0; if(parms->skyc.dbg){ gradsave=dnew(aster->tsa*2,nstep); } SERVO_T *st2t=0; kalman_t *kalman=0; dcell *mpsol=0; dmat *pgm=0; dmat *dtrats=0; int multirate=parms->skyc.multirate; if(multirate){ kalman=aster->kalman[0]; dtrats=aster->dtrats; }else{ if(parms->skyc.servo>0){ const double dtngs=parms->maos.dt*dtratc; st2t=servo_new(merrm, NULL, 0, dtngs, aster->gain->p[idtratc]); pgm=aster->pgm->p[idtratc]; }else{ kalman=aster->kalman[idtratc]; } } if(kalman){ kalman_init(kalman); mpsol=dcellnew(aster->nwfs, 1); //for psol grad. } const long nwvl=parms->maos.nwvl; dcell **psf=0, **mtche=0, **ints=0; ccell *wvf=0,*wvfc=0, *otf=0; if(hasphy){ psf=mycalloc(aster->nwfs,dcell*); wvf=ccellnew(aster->nwfs,1); wvfc=ccellnew(aster->nwfs,1); mtche=mycalloc(aster->nwfs,dcell*); ints=mycalloc(aster->nwfs,dcell*); otf=ccellnew(aster->nwfs,1); for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ const int ipowfs=aster->wfs[iwfs].ipowfs; const long ncomp=parms->maos.ncomp[ipowfs]; const long nsa=parms->maos.nsa[ipowfs]; wvf->p[iwfs]=cnew(ncomp,ncomp); wvfc->p[iwfs]=NULL; psf[iwfs]=dcellnew(nsa,nwvl); //cfft2plan(wvf->p[iwfs], -1); if(parms->skyc.multirate){ mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[(int)aster->idtrats->p[iwfs]]; }else{ mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[idtratc]; } otf->p[iwfs]=cnew(ncomp,ncomp); //cfft2plan(otf->p[iwfs],-1); //cfft2plan(otf->p[iwfs],1); ints[iwfs]=dcellnew(nsa,1); int pixpsa=parms->skyc.pixpsa[ipowfs]; for(long isa=0; isa<nsa; isa++){ ints[iwfs]->p[isa]=dnew(pixpsa,pixpsa); } } } for(int irep=0; irep<parms->skyc.navg; irep++){ if(kalman){ kalman_init(kalman); }else{ servo_reset(st2t); } dcellzero(zgradc); dcellzero(gradout); if(ints){ for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ dcellzero(ints[iwfs]); } } for(int istep=0; istep<nstep; istep++){ memcpy(merr->p, PCOL(mideal,istep), nmod*sizeof(double)); dadd(&merr, 1, mreal, -1);/*form NGS mode error; */ memcpy(PCOL(mres,istep),merr->p,sizeof(double)*nmod); if(mpsol){//collect averaged modes for PSOL. for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ dadd(&mpsol->p[iwfs], 1, mreal, 1); } } pmerrm=0; if(istep>=parms->skyc.evlstart){/*performance evaluation*/ double res_ngs=dwdot(merr->p,parms->maos.mcc,merr->p); if(res_ngs>ngsol*100){ dfree(res); res=NULL; break; } { res->p[0]+=res_ngs; res->p[1]+=dwdot2(merr->p,parms->maos.mcc_tt,merr->p); double dot_oa=dwdot(merr->p, parms->maos.mcc_oa, merr->p); double dot_res_ideal=dwdot(merr->p, parms->maos.mcc_oa, PCOL(mideal,istep)); double dot_res_oa=0; for(int imod=0; imod<nmod; imod++){ dot_res_oa+=merr->p[imod]*IND(mideal_oa,imod,istep); } res->p[2]+=dot_oa-2*dot_res_ideal+2*dot_res_oa; res->p[4]+=dot_oa; } { double dot_oa_tt=dwdot2(merr->p, parms->maos.mcc_oa_tt, merr->p); /*Notice that mcc_oa_tt2 is 2x5 marix. */ double dot_res_ideal_tt=dwdot(merr->p, parms->maos.mcc_oa_tt2, PCOL(mideal,istep)); double dot_res_oa_tt=0; for(int imod=0; imod<2; imod++){ dot_res_oa_tt+=merr->p[imod]*IND(mideal_oa,imod,istep); } res->p[3]+=dot_oa_tt-2*dot_res_ideal_tt+2*dot_res_oa_tt; res->p[5]+=dot_oa_tt; } }//if evl if(istep<phystart || phystart<0){ /*Ztilt, noise free simulation for acquisition. */ dmm(&zgradc->m, 1, aster->gm, merr, "nn", 1);/*grad due to residual NGS mode. */ for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ const int ipowfs=aster->wfs[iwfs].ipowfs; const long ng=parms->maos.nsa[ipowfs]*2; for(long ig=0; ig<ng; ig++){ zgradc->p[iwfs]->p[ig]+=aster->wfs[iwfs].ztiltout->p[istep*ng+ig]; } } for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc); if((istep+1) % dtrati==0){ dadd(&gradout->p[iwfs], 0, zgradc->p[iwfs], 1./dtrati); dzero(zgradc->p[iwfs]); if(noisy){ int idtrati=(multirate?(int)aster->idtrats->p[iwfs]:idtratc); dmat *nea=aster->wfs[iwfs].pistat->sanea->p[idtrati]; for(int i=0; i<nea->nx; i++){ gradout->p[iwfs]->p[i]+=nea->p[i]*randn(&aster->rand); } } pmerrm=merrm;//record output. } } }else{ /*Accumulate PSF intensities*/ for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ const double thetax=aster->wfs[iwfs].thetax; const double thetay=aster->wfs[iwfs].thetay; const int ipowfs=aster->wfs[iwfs].ipowfs; const long nsa=parms->maos.nsa[ipowfs]; ccell* wvfout=aster->wfs[iwfs].wvfout[istep]; for(long iwvl=0; iwvl<nwvl; iwvl++){ double wvl=parms->maos.wvl[iwvl]; for(long isa=0; isa<nsa; isa++){ ccp(&wvfc->p[iwfs], IND(wvfout,isa,iwvl)); /*Apply NGS mode error to PSF. */ ngsmod2wvf(wvfc->p[iwfs], wvl, merr, powfs+ipowfs, isa, thetax, thetay, parms); cembedc(wvf->p[iwfs],wvfc->p[iwfs],0,C_FULL); cfft2(wvf->p[iwfs],-1); /*peak in corner. */ cabs22d(&psf[iwfs]->p[isa+nsa*iwvl], 1., wvf->p[iwfs], 1.); }/*isa */ }/*iwvl */ }/*iwfs */ /*Form detector image from accumulated PSFs*/ double igrad[2]; for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=dtratc, idtrat=idtratc; if(multirate){//multirate idtrat=aster->idtrats->p[iwfs]; dtrati=dtrats->p[iwfs]; } if((istep+1) % dtrati == 0){/*has output */ dcellzero(ints[iwfs]); const int ipowfs=aster->wfs[iwfs].ipowfs; const long nsa=parms->maos.nsa[ipowfs]; for(long isa=0; isa<nsa; isa++){ for(long iwvl=0; iwvl<nwvl; iwvl++){ double siglev=aster->wfs[iwfs].siglev->p[iwvl]; ccpd(&otf->p[iwfs],psf[iwfs]->p[isa+nsa*iwvl]); cfft2i(otf->p[iwfs], 1); /*turn to OTF, peak in corner */ ccwm(otf->p[iwfs], powfs[ipowfs].dtf[iwvl].nominal); cfft2(otf->p[iwfs], -1); dspmulcreal(ints[iwfs]->p[isa]->p, powfs[ipowfs].dtf[iwvl].si, otf->p[iwfs]->p, siglev); } /*Add noise and apply matched filter. */ #if _OPENMP >= 200805 #pragma omp critical #endif switch(noisy){ case 0:/*no noise at all. */ break; case 1:/*both poisson and read out noise. */ { double bkgrnd=aster->wfs[iwfs].bkgrnd*dtrati; addnoise(ints[iwfs]->p[isa], &aster->rand, bkgrnd, bkgrnd, 0,0,IND(rnefs,idtrat,ipowfs)); } break; case 2:/*there is still poisson noise. */ addnoise(ints[iwfs]->p[isa], &aster->rand, 0, 0, 0,0,0); break; default: error("Invalid noisy\n"); } igrad[0]=0; igrad[1]=0; double pixtheta=parms->skyc.pixtheta[ipowfs]; if(parms->skyc.mtch){ dmulvec(igrad, mtche[iwfs]->p[isa], ints[iwfs]->p[isa]->p, 1); } if(!parms->skyc.mtch || fabs(igrad[0])>pixtheta || fabs(igrad[1])>pixtheta){ if(!parms->skyc.mtch){ warning2("fall back to cog\n"); }else{ warning_once("mtch is out of range\n"); } dcog(igrad, ints[iwfs]->p[isa], 0, 0, 0, 3*IND(rnefs,idtrat,ipowfs), 0); igrad[0]*=pixtheta; igrad[1]*=pixtheta; } gradout->p[iwfs]->p[isa]=igrad[0]; gradout->p[iwfs]->p[isa+nsa]=igrad[1]; }/*isa */ pmerrm=merrm; dcellzero(psf[iwfs]);/*reset accumulation.*/ }/*if iwfs has output*/ }/*for wfs*/ }/*if phystart */ //output to mreal after using it to ensure two cycle delay. if(st2t){//Type I or II control. if(st2t->mint->p[0]){//has output. dcp(&mreal, st2t->mint->p[0]->p[0]); } }else{//LQG control kalman_output(kalman, &mreal, 0, 1); } if(kalman){//LQG control int indk=0; //Form PSOL grads and obtain index to LQG M for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc); if((istep+1) % dtrati==0){ indk|=1<<iwfs; dmm(&gradout->p[iwfs], 1, aster->g->p[iwfs], mpsol->p[iwfs], "nn", 1./dtrati); dzero(mpsol->p[iwfs]); } } if(indk){ kalman_update(kalman, gradout->m, indk-1); } }else if(st2t){ if(pmerrm){ dmm(&merrm->p[0], 0, pgm, gradout->m, "nn", 1); } servo_filter(st2t, pmerrm);//do even if merrm is zero. to simulate additional latency } if(parms->skyc.dbg){ memcpy(PCOL(gradsave, istep), gradout->m->p, sizeof(double)*gradsave->nx); } }/*istep; */ } if(parms->skyc.dbg){ int dtrati=(multirate?(int)dtrats->p[0]:dtratc); writebin(gradsave,"%s/skysim_grads_aster%d_dtrat%d",dirsetup, aster->iaster,dtrati); writebin(mres,"%s/skysim_sim_mres_aster%d_dtrat%d",dirsetup,aster->iaster,dtrati); } dfree(mreal); dcellfree(mpsol); dfree(merr); dcellfree(merrm); dcellfree(zgradc); dcellfree(gradout); dfree(gradsave); if(hasphy){ dcellfreearr(psf, aster->nwfs); dcellfreearr(ints, aster->nwfs); ccellfree(wvf); ccellfree(wvfc); ccellfree(otf); free(mtche); } servo_free(st2t); /*dfree(mres); */ if(mresout) { *mresout=mres; }else{ dfree(mres); } dscale(res, 1./((nstep-parms->skyc.evlstart)*parms->skyc.navg)); return res; }
int doAllFeatures() { /* Initial weights */ int i, j, h; for (j=0; j<NMOVIES; j++) { for (i=0; i<TOTAL_FEATURES; i++) { vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution } } /* Initial biases */ for(i=0;i<TOTAL_FEATURES;i++) { hidbiases[i]=0.0; } for (j=0; j<NMOVIES; j++) { unsigned int mtot = moviercount[j*SOFTMAX+0] + moviercount[j*SOFTMAX+1] + moviercount[j*SOFTMAX+2] + moviercount[j*SOFTMAX+3] + moviercount[j*SOFTMAX+4]; for (i=0; i<SOFTMAX; i++) { visbiases[j][i] = log( ((double)moviercount[j*SOFTMAX+i]) / ((double) mtot) ); } } /* Optimize current feature */ double nrmse=2., last_rmse=10.; double prmse = 0, last_prmse=0; double s; int n; int loopcount=0; double EpsilonW = epsilonw; double EpsilonVB = epsilonvb; double EpsilonHB = epsilonhb; double Momentum = momentum; ZERO(CDinc); ZERO(visbiasinc); ZERO(hidbiasinc); int tSteps = 1; // Iterate through the model while the RMSE is decreasing //while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80 ) { while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80 ) { if ( loopcount >= 10 ) tSteps = 3 + (loopcount - 10)/5; last_rmse=nrmse; last_prmse=prmse; clock_t t0=clock(); loopcount++; int ntrain = 0; nrmse = 0.0; s = 0.0; n = 0; if ( loopcount > 5 ) Momentum = finalmomentum; //* CDpos =0, CDneg=0 (matrices) ZERO(CDpos); ZERO(CDneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); ZERO(moviecount); int u,m, f; for(u=0;u<NUSERS;u++) { //* Clear summations for probabilities ZERO(negvisprobs); ZERO(nvp2); //* perform steps 1 to 8 int base0=useridx[u][0]; int d0=UNTRAIN(u); int dall=UNALL(u); // For all rated movies, accumulate contributions to hidden units double sumW[TOTAL_FEATURES]; ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; moviecount[m]++; // 1. get one data point from data set. // 2. use values of this data point to set state of visible neurons Si int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // Add to the bias contribution for set visible units posvisact[m][r] += 1.0; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // sum_j(W[i][j] * v[0][j])) sumW[h] += vishid[m][r][h]; } } // Sample the hidden units state after computing probabilities for(h=0;h<TOTAL_FEATURES;h++) { // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases); // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j])) poshidprobs[h] = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h])); // sample h[0][i] from Q(h[0][i] = 1 | v[0]) if ( poshidprobs[h] > (rand()/(double)(RAND_MAX)) ) { poshidstates[h]=1; poshidact[h] += 1.0; } else { poshidstates[h]=0; } } // Load up a copy of poshidstates for use in loop for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = poshidstates[h]; // Make T Contrastive Divergence steps int stepT = 0; do { // Determine if this is the last pass through this loop int finalTStep = (stepT+1 >= tSteps); // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction // for all visible units j: int r; int count = d0; count += useridx[u][2]; // too compute probe errors for(j=0;j<count;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for(h=0;h<TOTAL_FEATURES;h++) { // Accumulate Weight values for sampled hidden states == 1 if ( curposhidstates[h] == 1 ) { for(r=0;r<SOFTMAX;r++) { negvisprobs[m][r] += vishid[m][r][h]; } } // Compute more accurate probabilites for RMSE reporting if ( stepT == 0 ) { for(r=0;r<SOFTMAX;r++) nvp2[m][r] += poshidprobs[h] * vishid[m][r][h]; } } // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i])) // Softmax elements are handled individually here negvisprobs[m][0] = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0])); negvisprobs[m][1] = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1])); negvisprobs[m][2] = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2])); negvisprobs[m][3] = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3])); negvisprobs[m][4] = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4])); // Normalize probabilities double tsum = negvisprobs[m][0] + negvisprobs[m][1] + negvisprobs[m][2] + negvisprobs[m][3] + negvisprobs[m][4]; if ( tsum != 0 ) { negvisprobs[m][0] /= tsum; negvisprobs[m][1] /= tsum; negvisprobs[m][2] /= tsum; negvisprobs[m][3] /= tsum; negvisprobs[m][4] /= tsum; } // Compute and Normalize more accurate RMSE reporting probabilities if ( stepT == 0) { nvp2[m][0] = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0])); nvp2[m][1] = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1])); nvp2[m][2] = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2])); nvp2[m][3] = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3])); nvp2[m][4] = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4])); double tsum2 = nvp2[m][0] + nvp2[m][1] + nvp2[m][2] + nvp2[m][3] + nvp2[m][4]; if ( tsum2 != 0 ) { nvp2[m][0] /= tsum2; nvp2[m][1] /= tsum2; nvp2[m][2] /= tsum2; nvp2[m][3] /= tsum2; nvp2[m][4] /= tsum2; } } // sample v[1][j] from P(v[1][j] = 1 | h[0]) double randval = (rand()/(double)(RAND_MAX)); if ( (randval -= negvisprobs[m][0]) <= 0.0 ) negvissoftmax[m] = 0; else if ( (randval -= negvisprobs[m][1]) <= 0.0 ) negvissoftmax[m] = 1; else if ( (randval -= negvisprobs[m][2]) <= 0.0 ) negvissoftmax[m] = 2; else if ( (randval -= negvisprobs[m][3]) <= 0.0 ) negvissoftmax[m] = 3; else //if ( (randval -= negvisprobs[m][4]) <= 0.0 ) negvissoftmax[m] = 4; // if in training data then train on it if ( j < d0 && finalTStep ) negvisact[m][negvissoftmax[m]] += 1.0; } // 6. compute state of hidden neurons Sj again using Si from 5 step. // For all rated movies accumulate contributions to hidden units from sampled visible units ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { sumW[h] += vishid[m][negvissoftmax[m]][h]; } } // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j])) neghidprobs[h] = 1./(1 + exp(-sumW[h] - hidbiases[h])); // Sample the hidden units state again. if ( neghidprobs[h] > (rand()/(double)(RAND_MAX)) ) { neghidstates[h]=1; if ( finalTStep ) neghidact[h] += 1.0; } else { neghidstates[h]=0; } } // Compute error rmse and prmse before we start iterating on T if ( stepT == 0 ) { // Compute rmse on training data for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); nrmse += (vdelta * vdelta); } ntrain+=d0; // Sum up probe rmse int base=useridx[u][0]; for(i=1;i<2;i++) base+=useridx[u][i]; int d=useridx[u][2]; for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; int r=(userent[base+i]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); s+=vdelta*vdelta; } n+=d; } // If looping again, load the curposvisstates if ( !finalTStep ) { for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = neghidstates[h]; ZERO(negvisprobs); } // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can // increase with learning steps to achieve better accuracy. } while ( ++stepT < tSteps ); // Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidstates[h] == 1 ) { // 4. now Si and Sj values can be used to compute (Si.Sj)0 here () means just values not average //* accumulate CDpos = CDpos + (Si.Sj)0 CDpos[m][r][h] += 1.0; } // 7. now use Si and Sj to compute (Si.Sj)1 (fig.3) CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h]; } } // Update weights and biases after batch // int bsize = 100; if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) { int numcases = u % bsize; numcases++; // Update weights for(m=0;m<NMOVIES;m++) { if ( moviecount[m] == 0 ) continue; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // for all softmax int rr; for(rr=0;rr<SOFTMAX;rr++) { //# At the end compute average of CDpos and CDneg by dividing them by number of data points. //# Compute CD = < Si.Sj >0 < Si.Sj >n = CDpos CDneg double CDp = CDpos[m][rr][h]; double CDn = CDneg[m][rr][h]; if ( CDp != 0.0 || CDn != 0.0 ) { CDp /= ((double)moviecount[m]); CDn /= ((double)moviecount[m]); // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]') //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0) //e.g between data and reconstruction. CDinc[m][rr][h] = Momentum * CDinc[m][rr][h] + EpsilonW * ((CDp - CDn) - weightcost * vishid[m][rr][h]); vishid[m][rr][h] += CDinc[m][rr][h]; } } } // Update visible softmax biases // c += epsilon * (v[0] - v[1])$ // for all softmax int rr; for(rr=0;rr<SOFTMAX;rr++) { if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) { posvisact[m][rr] /= ((double)moviecount[m]); negvisact[m][rr] /= ((double)moviecount[m]); visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr])); //visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]); visbiases[m][rr] += visbiasinc[m][rr]; } } } // Update hidden biases // b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1])) for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidact[h] != 0.0 || neghidact[h] != 0.0 ) { poshidact[h] /= ((double)(numcases)); neghidact[h] /= ((double)(numcases)); hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h])); //hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]); hidbiases[h] += hidbiasinc[h]; } } ZERO(CDpos); ZERO(CDneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); ZERO(moviecount); } } nrmse=sqrt(nrmse/ntrain); prmse = sqrt(s/n); printf("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); if ( TOTAL_FEATURES == 200 ) { if ( loopcount > 6 ) { EpsilonW *= 0.90; EpsilonVB *= 0.90; EpsilonHB *= 0.90; } else if ( loopcount > 5 ) { // With 200 hidden variables, you need to slow things down a little more EpsilonW *= 0.50; // This could probably use some more optimization EpsilonVB *= 0.50; EpsilonHB *= 0.50; } else if ( loopcount > 2 ) { EpsilonW *= 0.70; EpsilonVB *= 0.70; EpsilonHB *= 0.70; } } else { // The 100 hidden variable case if ( loopcount > 8 ) { EpsilonW *= 0.92; EpsilonVB *= 0.92; EpsilonHB *= 0.92; } else if ( loopcount > 6 ) { EpsilonW *= 0.90; EpsilonVB *= 0.90; EpsilonHB *= 0.90; } else if ( loopcount > 2 ) { EpsilonW *= 0.78; EpsilonVB *= 0.78; EpsilonHB *= 0.78; } } } /* Perform a final iteration in which the errors are clipped and stored */ recordErrors(); //if(save_model) { //dappend_bin(fnameV,sV,NMOVIES); //dappend_bin(fnameU,sU,NUSERS); //} return 1; }
/** * The implementation of the particle filter using OpenMP for many frames * @see http://openmp.org/wp/ * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the target matrix and the x and y arrays as arguments and returns the likelihoods * @param I The video to be run * @param IszX The x dimension of the video * @param IszY The y dimension of the video * @param Nfr The number of frames * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used * @param x_loc The array that will store the x locations of the desired object * @param y_loc The array that will store the y locations of the desired object * @param xe The starting x position of the object * @param ye The starting y position of the object * @param target The matrix containing the offsets to be used in the likelihood function */ void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles, double * x_loc, double * y_loc, double xe, double ye, mxArray * target){ int x, y, max_size; double * weights, *likelihood, *arrayX, *arrayY, *xj, *yj, *CDF, *u; max_size = IszX*IszY*Nfr; /*original particle centroid*/ x_loc[0] = xe; y_loc[0] = ye; /*initial weights are all equal (1/Nparticles)*/ weights = (double *)malloc(sizeof(double)*Nparticles); for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } /*initial likelihood to 0.0*/ likelihood = (double *)mxCalloc(Nparticles, sizeof(double)); arrayX = (double *)mxCalloc(Nparticles, sizeof(double)); arrayY = (double *)mxCalloc(Nparticles, sizeof(double)); xj = (double *)mxCalloc(Nparticles, sizeof(double)); yj = (double *)mxCalloc(Nparticles, sizeof(double)); CDF = (double *)mxCalloc(Nparticles, sizeof(double)); u = (double *)mxCalloc(Nparticles, sizeof(double)); mxArray * arguments[4]; mxArray * mxIK = mxCreateDoubleMatrix(IszX, IszY, mxREAL); mxArray * mxX = mxCreateDoubleMatrix(1, Nparticles, mxREAL); mxArray * mxY = mxCreateDoubleMatrix(1, Nparticles, mxREAL); double * Ik = (double *)mxCalloc(IszX*IszY, sizeof(double)); mxArray * result = mxCreateDoubleMatrix(1, Nparticles, mxREAL); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } int k; int indX, indY; //speed double speed = .01; for(k = 1; k < Nfr; k++){ /*apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction*/ #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += speed + 2*randn(seed, x); arrayY[x] += 2*randn(seed, x); if(arrayX[x] > IszX) arrayX = IszX; if(arrayY[x] > IszY) arrayY = IszY; if(arrayX[x] < 1) arrayX[x] = 1; if(arrayY[x] < 1) arrayY[x] = 1; } //get the current image for(x = 0; x < IszX; x++) { for(y = 0; y < IszY; y++) { Ik[x*IszX + y] = (double)I[k*IszX*IszY + x*IszY + y]; } } //copy arguments memcpy(mxGetPr(mxIK), Ik, sizeof(double)*IszX*IszY); memcpy(mxGetPr(mxX), arrayX, sizeof(double)*Nparticles); memcpy(mxGetPr(mxY), arrayY, sizeof(double)*Nparticles); arguments[0] = mxIK; arguments[1] = target; arguments[2] = mxX; arguments[3] = mxY; mexCallMATLAB(1, &result, 4, arguments, "GetLikelihood"); memcpy(likelihood, result, sizeof(double)*Nparticles); /* update & normalize weights // using equation (63) of Arulampalam Tutorial*/ #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } double sumWeights = 0; #pragma omp parallel for shared(weights) private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } xe = 0; ye = 0; /* estimate the object location by expected values*/ #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } x_loc[k] = xe; y_loc[k] = ye; /*display(hold off for now) //pause(hold off for now) //resampling*/ CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } double u1 = (1/((double)(Nparticles)))*randu(seed, 0); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } int j, i; #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); /*i = findIndexBin(CDF, 0, Nparticles, u[j]);*/ if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } /*reassign arrayX and arrayY*/ #pragma omp parallel for shared(weights, arrayX, arrayY, xj, yj, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); arrayX[x] = xj[x]; arrayY[x] = yj[x]; } } mxFree(weights); mxFree(likelihood); mxFree(arrayX); mxFree(arrayY); mxFree(CDF); mxFree(u); mxFree(xj); mxFree(yj); mxFree(Ik); }
/** * The implementation of the particle filter using OpenMP for many frames * @see http://openmp.org/wp/ * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @param I The video to be run * @param IszX The x dimension of the video * @param IszY The y dimension of the video * @param Nfr The number of frames * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used */ void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){ int max_size = IszX*IszY*Nfr; long long start = get_time(); //original particle centroid double xe = roundDouble(IszY/2.0); double ye = roundDouble(IszX/2.0); //expected object locations, compared to center int radius = 5; int diameter = radius*2 - 1; int * disk = (int *)malloc(diameter*diameter*sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)malloc(countOnes*2*sizeof(double)); getneighbors(disk, countOnes, objxy, radius); long long get_neighbors = get_time(); printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors)); //initial weights are all equal (1/Nparticles) double * weights = (double *)malloc(sizeof(double)*Nparticles); #pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } long long get_weights = get_time(); printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights)); //initial likelihood to 0.0 double * likelihood = (double *)malloc(sizeof(double)*Nparticles); double * arrayX = (double *)malloc(sizeof(double)*Nparticles); double * arrayY = (double *)malloc(sizeof(double)*Nparticles); double * xj = (double *)malloc(sizeof(double)*Nparticles); double * yj = (double *)malloc(sizeof(double)*Nparticles); double * CDF = (double *)malloc(sizeof(double)*Nparticles); double * u = (double *)malloc(sizeof(double)*Nparticles); int * ind = (int*)malloc(sizeof(int)*countOnes*Nparticles); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } int k; printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, get_time())); int indX, indY; for(k = 1; k < Nfr; k++){ long long set_arrays = get_time(); //apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += 1 + 5*randn(seed, x); arrayY[x] += -2 + 2*randn(seed, x); } long long error = get_time(); printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error)); //particle filter likelihood #pragma omp parallel for shared(likelihood, I, arrayX, arrayY, objxy, ind) private(x, y, indX, indY) for(x = 0; x < Nparticles; x++){ //compute the likelihood: remember our assumption is that you know // foreground and the background image intensity distribution. // Notice that we consider here a likelihood ratio, instead of // p(z|x). It is possible in this case. why? a hometask for you. //calc ind for(y = 0; y < countOnes; y++){ indX = roundDouble(arrayX[x]) + objxy[y*2 + 1]; indY = roundDouble(arrayY[x]) + objxy[y*2]; ind[x*countOnes + y] = fabs(indX*IszY*Nfr + indY*Nfr + k); if(ind[x*countOnes + y] >= max_size) ind[x*countOnes + y] = 0; } likelihood[x] = 0; for(y = 0; y < countOnes; y++) likelihood[x] += (pow((I[ind[x*countOnes + y]] - 100),2) - pow((I[ind[x*countOnes + y]]-228),2))/50.0; likelihood[x] = likelihood[x]/((double) countOnes); } long long likelihood_time = get_time(); printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time)); // update & normalize weights // using equation (63) of Arulampalam Tutorial #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } long long exponential = get_time(); printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential)); double sumWeights = 0; #pragma omp parallel for private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } long long sum_time = get_time(); printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time)); #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } long long normalize = get_time(); printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize)); xe = 0; ye = 0; // estimate the object location by expected values #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } long long move_time = get_time(); printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time)); printf("XE: %lf\n", xe); printf("YE: %lf\n", ye); double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) ); printf("%lf\n", distance); //display(hold off for now) //pause(hold off for now) //resampling CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } long long cum_sum = get_time(); printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum)); double u1 = (1/((double)(Nparticles)))*randu(seed, 0); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } long long u_time = get_time(); printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time)); int j, i; #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } long long xyj_time = get_time(); printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time)); //#pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ //reassign arrayX and arrayY arrayX[x] = xj[x]; arrayY[x] = yj[x]; weights[x] = 1/((double)(Nparticles)); } long long reset = get_time(); printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset)); } free(disk); free(objxy); free(weights); free(likelihood); free(xj); free(yj); free(arrayX); free(arrayY); free(CDF); free(u); free(ind); }
Particle2D VectorLocalization2D::createParticle(vector2f loc, float angle, float locationUncertainty, float angleUncertainty) { return Particle2D(randn(locationUncertainty,loc.x), randn(locationUncertainty,loc.y), randn(angleUncertainty,angle),1.0); }
static void test_psd(){ rand_t rstat; int seed=4; double r0=0.2; double dx=1./64; long N=1024; long nx=N; long ny=N; long ratio=1; long xskip=nx*(ratio-1)/2; long yskip=ny*(ratio-1)/2; long nframe=512; seed_rand(&rstat, seed); if(1){ map_t *atm=mapnew(nx+1, ny+1, dx,dx, NULL); cmat *hat=cnew(nx*ratio, ny*ratio); //cfft2plan(hat, -1); dmat *hattot=dnew(nx*ratio, ny*ratio); for(long i=0; i<nframe; i++){ info2("%ld of %ld\n", i, nframe); for(long j=0; j<(nx+1)*(ny+1); j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0,L0,ninit); czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(hat,ix+xskip,iy+yskip)=IND(atm,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); } dscale(hattot, 1./nframe); dfftshift(hattot); writebin(hattot, "PSD_fractal"); } { dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5); writebin(spect, "spect"); cmat *hat=cnew(nx*ratio, ny*ratio); //cfft2plan(hat, -1); dmat *hattot=dnew(nx*ratio, ny*ratio); cmat *atm=cnew(nx, ny); //cfft2plan(atm, -1); dmat *atmr=dnew(atm->nx, atm->ny); dmat *atmi=dnew(atm->nx, atm->ny); cmat* phat=hat; dmat* patmr=atmr; dmat* patmi=atmi; for(long ii=0; ii<nframe; ii+=2){ info2("%ld of %ld\n", ii, nframe); for(long i=0; i<atm->nx*atm->ny; i++){ atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i]; } cfft2(atm, -1); for(long i=0; i<atm->nx*atm->ny; i++){ atmr->p[i]=creal(atm->p[i]); atmi->p[i]=cimag(atm->p[i]); } czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(phat,ix+xskip,iy+yskip)=IND(patmr,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(phat,ix+xskip,iy+yskip)=IND(patmi,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); } dscale(hattot, 1./nframe); dfftshift(hattot); writebin(hattot, "PSD_fft"); } }
void kalman_dlqe3(real32_T dt, real32_T k1, real32_T k2, real32_T k3, const real32_T x_aposteriori_k[3], real32_T z, real32_T posUpdate, real32_T addNoise, real32_T sigma, real32_T x_aposteriori[3]) { real32_T A[9]; int32_T i0; static const int8_T iv0[3] = { 0, 0, 1 }; real_T b; real32_T y; real32_T b_y[3]; int32_T i1; static const int8_T iv1[3] = { 1, 0, 0 }; real32_T b_k1[3]; real32_T f0; A[0] = 1.0F; A[3] = dt; A[6] = 0.5F * rt_powf_snf(dt, 2.0F); A[1] = 0.0F; A[4] = 1.0F; A[7] = dt; for (i0 = 0; i0 < 3; i0++) { A[2 + 3 * i0] = (real32_T)iv0[i0]; } if (addNoise == 1.0F) { b = randn(); z += sigma * (real32_T)b; } if (posUpdate != 0.0F) { y = 0.0F; for (i0 = 0; i0 < 3; i0++) { b_y[i0] = 0.0F; for (i1 = 0; i1 < 3; i1++) { b_y[i0] += (real32_T)iv1[i1] * A[i1 + 3 * i0]; } y += b_y[i0] * x_aposteriori_k[i0]; } y = z - y; b_k1[0] = k1; b_k1[1] = k2; b_k1[2] = k3; for (i0 = 0; i0 < 3; i0++) { f0 = 0.0F; for (i1 = 0; i1 < 3; i1++) { f0 += A[i0 + 3 * i1] * x_aposteriori_k[i1]; } x_aposteriori[i0] = f0 + b_k1[i0] * y; } } else { for (i0 = 0; i0 < 3; i0++) { x_aposteriori[i0] = 0.0F; for (i1 = 0; i1 < 3; i1++) { x_aposteriori[i0] += A[i0 + 3 * i1] * x_aposteriori_k[i1]; } } } }