Exemple #1
0
static void test_stfun(){
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./16;
    long N=32;
    long nx=N;
    long ny=N;
    long nframe=500;
    seed_rand(&rstat, seed);
    if(L0<9000){
	dmat *rr=dlinspace(0, N*dx, N);
	dmat *covvk=turbcov(rr, sqrt(2)*N*dx, r0, L0);
	writebin(covvk, "cov_vk");
	dfree(rr);
	dfree(covvk);
    }
    /*    return; */
    {
	map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL);
	stfun_t *data=stfun_init(nx, ny, NULL);
	zfarr *save=zfarr_init(nframe, 1, "fractal_atm.bin");
	for(long i=0; i<nframe; i++){
	    for(long j=0; j<(nx+1)*(ny+1); j++){
		atm->p[j]=randn(&rstat);
	    }
	    fractal_do((dmat*)atm, dx, r0,L0,ninit);
	    stfun_push(data, (dmat*)atm);
	    zfarr_dmat(save, i, (dmat*)atm);
	    if(i%100==0)
		info("%ld of %ld\n", i, nframe);
	}
	zfarr_close(save);
	dmat *st=stfun_finalize(data);
	writebin(st, "stfun_fractal.bin");
	ddraw("fractal", st, NULL,NULL, "Atmosphere","x","y","stfun");
    }
    /*exit(0); */
    {
	stfun_t *data=stfun_init(nx, ny, NULL);
	dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5);
	cmat *atm=cnew(nx, ny);
	//cfft2plan(atm, -1);
	dmat *atmr=dnew(atm->nx, atm->ny);
	dmat *atmi=dnew(atm->nx, atm->ny);
	spect->p[0]=0;
	for(long ii=0; ii<nframe; ii+=2){
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i];
	    }
	    cfft2(atm, -1);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atmr->p[i]=creal(atm->p[i]);
		atmi->p[i]=cimag(atm->p[i]);
	    }
	    stfun_push(data, atmr);
	    stfun_push(data, atmi);
	    if(ii%100==0)
		info("%ld of %ld\n", ii, nframe);
	}
	dmat *st=stfun_finalize(data);
	writebin(st, "stfun_fft.bin");
	ddraw("fft", st, NULL,NULL, "Atmosphere","x","y","stfun");
    }
	
}
int main(int argc, char** argv)
{
    struct undistorter_ctx ctx;
    int i;
    int j;

    float complex dds_state;
    float complex dds_multiplier;

    float complex harmonics_in[  NUMBER_OF_HARMONICS];
    float complex harmonics_out[ NUMBER_OF_HARMONICS];
    
    float harmonic_power_in;
    float fundamental_power_in;
    
    float harmonic_power_out;
    float fundamental_power_out;
    
    float thd_in;
    float thd_out;
   
    undistorter_init(&ctx, 0.05, 2, 4, 1024, -1.5f, 1.5f, 1.0f);
    
    dds_state = 1.0f;
    dds_multiplier = cexpf(2.0f*I * M_PI * TEST_FREQUENCY/SAMPLING_FREQUENCY);
        
    for(i = 0; i < NUMBER_OF_HARMONICS; i++)
    {
        harmonics_in[i]  = 0.0f;
        harmonics_out[i] = 0.0f;
    }
    
    /*
     * We produce an input sinusoid x[n] = exp(j*2*pi*f/f_s*n).  The complex form
     * is convenient, because we can then calculate the power in each harmonic
     * by downconversion and time-averaging:
     *
     *      P_y[i] = < (y[n] (x[n]^n)*)^2 >
     *
     * where < > denotes a time average and z* the complex conjugate.
     */
    for(i = 0; i < NUMBER_OF_SAMPLES; i++)
    {
        float noise = randn()*1e-2f;
        /* 1.13 was chosen to produce 10% THD. */
        float distorted_sinusoid = tanhf(1.13f*(crealf(dds_state)+ noise));
        float undistorted_sinusoid = undistorter_compensate_sample(&ctx, distorted_sinusoid);
        float complex carrier = conjf(dds_state);
        
        if(i > NUMBER_OF_SAMPLES/4)
        {
            for(j = 0; j < NUMBER_OF_HARMONICS; j++)
            {
                harmonics_in[j]  += carrier*distorted_sinusoid;
                harmonics_out[j] += carrier*undistorted_sinusoid;
                
                carrier *= conjf(dds_state);
            }
        }

        dds_state *= dds_multiplier;
    }
    
    fundamental_power_in   = cabsf(harmonics_in[0]  )*cabsf(harmonics_in[0]  );
    fundamental_power_out  = cabsf(harmonics_out[0] )*cabsf(harmonics_out[0] );
    
    harmonic_power_in = 0.0f;
    harmonic_power_out = 0.0f;
    
    for(i = 1; i < NUMBER_OF_HARMONICS; i++)
    {
        harmonic_power_in   += cabsf(harmonics_in[i]  )*cabsf(harmonics_in[i]  );
        harmonic_power_out  += cabsf(harmonics_out[i] )*cabsf(harmonics_out[i] );
    }
    
    thd_in = sqrtf((harmonic_power_in)  / fundamental_power_in)*100.0f;
    thd_out = sqrtf((harmonic_power_out) / fundamental_power_out)*100.0f;
    
    if( thd_in/thd_out  >= sqrtf(10.0f))
    {
        printf("PASS: THDin = %f, THDout = %f\n", thd_in, thd_out);
    }
    else
    {
        printf("FAIL: THDin = %f, THDout = %f\n", thd_in, thd_out);
    }

    return 0;
}
/**
* The implementation of the particle filter using OpenMP for a single image
* @see http://openmp.org/wp/
* @note This function is designed to work with a single image. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods
* @warning Use the other particle filter function for videos; the accuracy of this function decreases significantly as it is called repeatedly while processing video
* @param I The image to be run
* @param IszX The x dimension of the image
* @param IszY The y dimension of the image
* @param seed The seed array used for random number generation
* @param Nparticles The number of particles to be used
* @param x_loc The array that will store the x locations of the desired object
* @param y_loc The array that will store the y locations of the desired object
* @param prevX The starting x position of the object
* @param prevY The starting y position of the object
*/
void particleFilter1F(int * I, int IszX, int IszY, int * seed, int Nparticles, double * x_loc, double * y_loc, double prevX, double prevY){
	int max_size = IszX*IszY;

	/*original particle centroid*/
	double xe = prevX;
	double ye = prevY;
	/*expected object locations, compared to center*/
	int radius = 5;
	int diameter = radius*2 -1;
	int * disk = (int *)mxCalloc(diameter*diameter, sizeof(int));
	strelDisk(disk, radius);
	int countOnes = 0;
	int x, y;
	for(x = 0; x < diameter; x++){
		for(y = 0; y < diameter; y++){
			if(disk[x*diameter + y] == 1)
			countOnes++;
		}
	}
	double * objxy = (double *)mxCalloc(countOnes*2, sizeof(double));
	getneighbors(disk, countOnes, objxy, radius);
	

	/*initial weights are all equal (1/Nparticles)*/
	double * weights = (double *)mxCalloc(Nparticles, sizeof(double));
	#pragma omp parallel for shared(weights, Nparticles) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = 1/((double)(Nparticles));
	}

	/*initial likelihood to 0.0*/
	double * likelihood = (double *)mxCalloc(Nparticles, sizeof(double));
	double * arrayX = (double *)mxCalloc(Nparticles, sizeof(double));
	double * arrayY = (double *)mxCalloc(Nparticles, sizeof(double));
	double * xj = (double *)mxCalloc(Nparticles, sizeof(double));
	double * yj = (double *)mxCalloc(Nparticles, sizeof(double));
	double * CDF = (double *)mxCalloc(Nparticles, sizeof(double));
	double * u = (double *)mxCalloc(Nparticles, sizeof(double));
	mxArray * arguments[4];
	mxArray * mxIK = mxCreateDoubleMatrix(IszX, IszY, mxREAL);
	mxArray * mxObj = mxCreateDoubleMatrix(countOnes, 2, mxREAL);
	mxArray * mxX = mxCreateDoubleMatrix(1, Nparticles, mxREAL);
	mxArray * mxY = mxCreateDoubleMatrix(1, Nparticles, mxREAL);
	double * Ik = (double *)mxCalloc(IszX*IszY, sizeof(double));
	mxArray * result = mxCreateDoubleMatrix(1, Nparticles, mxREAL);
	
	#pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x)
	for(x = 0; x < Nparticles; x++){
		arrayX[x] = xe;
		arrayY[x] = ye;
	}
	int k;
	int indX, indY;
	

	/*apply motion model
		//draws sample from motion model (random walk). The only prior information
		//is that the object moves 2x as fast as in the y direction*/
	#pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x)
	for(x = 0; x < Nparticles; x++){
		arrayX[x] += 1 + 5*randn(seed, x);
		arrayY[x] += -2 + 2*randn(seed, x);
	}

	/*particle filter likelihood*/
	//get the current image
	for(x = 0; x < IszX; x++)
	{
		for(y = 0; y < IszY; y++)
		{
			Ik[x*IszX + y] = (double)I[x*IszY + y];
		}
	}
	//copy arguments
	memcpy(mxGetPr(mxIK), Ik, sizeof(double)*IszX*IszY);
	memcpy(mxGetPr(mxObj), objxy, sizeof(double)*countOnes);
	memcpy(mxGetPr(mxX), arrayX, sizeof(double)*Nparticles);
	memcpy(mxGetPr(mxY), arrayY, sizeof(double)*Nparticles);
	arguments[0] = mxIK;
	arguments[1] = mxObj;
	arguments[2] = mxX;
	arguments[3] = mxY;
	mexCallMATLAB(1, &result, 4, arguments, "GetSimpleLikelihood");
	memcpy(likelihood, result, sizeof(double)*Nparticles);

	/* update & normalize weights
		// using equation (63) of Arulampalam Tutorial*/
	#pragma omp parallel for shared(Nparticles, weights, likelihood) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = weights[x] * exp(likelihood[x]);
	}

	double sumWeights = 0;
	#pragma omp parallel for private(x) reduction(+:sumWeights)
	for(x = 0; x < Nparticles; x++){
		sumWeights += weights[x];
	}

	#pragma omp parallel for shared(sumWeights, weights) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = weights[x]/sumWeights;
	}

	xe = 0;
	ye = 0;
	/* estimate the object location by expected values*/
	#pragma omp parallel for private(x) reduction(+:xe, ye)
	for(x = 0; x < Nparticles; x++){
		xe += arrayX[x] * weights[x];
		ye += arrayY[x] * weights[x];
	}
	

	x_loc[0] = xe+.5;
	y_loc[0] = ye+.5;

	/*display(hold off for now)
		
		//pause(hold off for now)
		
		//resampling*/
	
	
	CDF[0] = weights[0];
	for(x = 1; x < Nparticles; x++){
		CDF[x] = weights[x] + CDF[x-1];
	}

	double u1 = (1/((double)(Nparticles)))*randu(seed, 0);
	#pragma omp parallel for shared(u, u1, Nparticles) private(x)
	for(x = 0; x < Nparticles; x++){
		u[x] = u1 + x/((double)(Nparticles));
	}

	int j, i;
	
	#pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j)
	for(j = 0; j < Nparticles; j++){
		i = findIndex(CDF, Nparticles, u[j]);
		/*i = findIndexBin(CDF, 0, Nparticles, u[j]);*/
		if(i == -1)
		i = Nparticles-1;
		xj[j] = arrayX[i];
		yj[j] = arrayY[i];
		
	}

	/*reassign arrayX and arrayY*/
	#pragma omp parallel for shared(weights, arrayX, arrayY, xj, yj, Nparticles) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = 1/((double)(Nparticles));
		arrayX[x] = xj[x];
		arrayY[x] = yj[x];
	}

	
	mxFree(disk);
	mxFree(weights);
	mxFree(objxy);	
	mxFree(likelihood);
	mxFree(arrayX);
	mxFree(arrayY);
	mxFree(CDF);
	mxFree(u);
	mxFree(xj);
	mxFree(yj);
	mxFree(Ik);
}
/**
* Sets values of 3D matrix using randomly generated numbers from a normal distribution
* @param array3D The video to be modified
* @param dimX The x dimension of the frame
* @param dimY The y dimension of the frame
* @param dimZ The number of frames
* @param seed The seed array
*/
void addNoise(int * array3D, int * dimX, int * dimY, int * dimZ, int * seed){
	int x, y, z;
	for(x = 0; x < *dimX; x++){
		for(y = 0; y < *dimY; y++){
			for(z = 0; z < *dimZ; z++){
				array3D[x * *dimY * *dimZ + y * *dimZ + z] = array3D[x * *dimY * *dimZ + y * *dimZ + z] + (int)(5*randn(seed, 0));
			}
		}
	}
}
/**
 * The implementation of the particle filter using OpenMP for many frames
 * @see http://openmp.org/wp/
 * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods
 * @param I The video to be run
 * @param IszX The x dimension of the video
 * @param IszY The y dimension of the video
 * @param Nfr The number of frames
 * @param seed The seed array used for random number generation
 * @param Nparticles The number of particles to be used
 */
int particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){

  int i, c;

#ifdef HW
  XFcuda xcore;
  int Status;


  Status = XFcuda_Initialize(&xcore, 0);
  if (Status != XST_SUCCESS) {
    printf("Initialization failed\n");
    return 1; // XST_FAILURE;
  }
#endif
  int max_size = IszX*IszY*Nfr;
  //long long start = get_time();
  //original particle centroid
  double xe = roundDouble(IszY/2.0);
  double ye = roundDouble(IszX/2.0);

  //expected object locations, compared to center
  int radius = 5;
  int diameter = radius*2 - 1;
  int * disk = (int *)malloc(diameter*diameter*sizeof(int));
  strelDisk(disk, radius);
  int countOnes = 0;
  int x, y;
  for(x = 0; x < diameter; x++){
    for(y = 0; y < diameter; y++){
      if(disk[x*diameter + y] == 1)
        countOnes++;
    }
  }
  double * objxy = (double *)malloc(countOnes*2*sizeof(double));
  getneighbors(disk, countOnes, objxy, radius);

  //long long get_neighbors = get_time();
  //printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors));
  //initial weights are all equal (1/Nparticles)
  double * weights = (double *)malloc(sizeof(double)*Nparticles);
  for(x = 0; x < Nparticles; x++){
    weights[x] = 1/((double)(Nparticles));
  }
  //long long get_weights = get_time();
  //printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights));
  //initial likelihood to 0.0
  //printf("%d\n", Nparticles);
  double * likelihood = (double *)malloc(sizeof(double)*Nparticles);
  double * arrayX = (double *)malloc(sizeof(double)*Nparticles);
  double * arrayY = (double *)malloc(sizeof(double)*Nparticles);
  double * xj = (double *)malloc(sizeof(double)*Nparticles);
  double * yj = (double *)malloc(sizeof(double)*Nparticles);
  double * CDF = (double *)malloc(sizeof(double)*Nparticles);

  //GPU copies of arrays
  //double * arrayX_GPU;
  //double * arrayY_GPU;
  //double * xj_GPU;
  //double * yj_GPU;
  //double * CDF_GPU;

  int * ind = (int*)malloc(sizeof(int)*countOnes);
  double * u = (double *)malloc(sizeof(double)*Nparticles);
  //double * u_GPU;

  //CUDA memory allocation
  //check_error(cudaMalloc((void **) &arrayX_GPU, sizeof(double)*Nparticles));
  //arrayX_GPU = (double*)malloc(sizeof(double)*Nparticles);
  //check_error(cudaMalloc((void **) &arrayY_GPU, sizeof(double)*Nparticles));
  //arrayY_GPU = (double*)malloc(sizeof(double)*Nparticles);
  //check_error(cudaMalloc((void **) &xj_GPU, sizeof(double)*Nparticles));
  //xj_GPU = (double*)malloc(sizeof(double)*Nparticles);
  //check_error(cudaMalloc((void **) &yj_GPU, sizeof(double)*Nparticles));
  //yj_GPU = (double*)malloc(sizeof(double)*Nparticles);
  //check_error(cudaMalloc((void **) &CDF_GPU, sizeof(double)*Nparticles));
  //CDF_GPU = (double*)malloc(sizeof(double)*Nparticles);
  //check_error(cudaMalloc((void **) &u_GPU, sizeof(double)*Nparticles));
  //u_GPU = (double*)malloc(sizeof(double)*Nparticles);
  for(x = 0; x < Nparticles; x++){
    arrayX[x] = xe;
    arrayY[x] = ye;
  }

  //Set number of threads
  int num_blocks = ceil((double) Nparticles/(double) threads_per_block);
  //printf("%d\n", num_blocks);
  dim3 grids, threads;
  grids.x = num_blocks;
  grids.y = 1;
  grids.z = 1;
  threads.x = threads_per_block;
  threads.y = 1;
  threads.z = 1;

#ifdef HW
  XFcuda_SetNparticles(&xcore, Nparticles);
  XFcuda_SetGriddim_x(&xcore, grids.x);
  XFcuda_SetGriddim_y(&xcore, grids.y);
  //XFcuda_SetGriddim_z(&xcore, grids.z);
  XFcuda_SetBlockdim_x(&xcore, threads.x);
  //XFcuda_SetBlockdim_y(&xcore, threads.y);
  //XFcuda_SetBlockdim_z(&xcore, threads.z);
  XFcuda_SetArrayx_addr(&xcore, (u32)arrayX / sizeof(double));
  XFcuda_SetArrayy_addr(&xcore, (u32)arrayY / sizeof(double));
  XFcuda_SetCdf_addr(&xcore, (u32)CDF / sizeof(double));
  XFcuda_SetU_addr(&xcore, (u32)u / sizeof(double));
  XFcuda_SetXj_addr(&xcore, (u32)xj / sizeof(double));
  XFcuda_SetYj_addr(&xcore, (u32)yj / sizeof(double));

#endif

  int k;
  //double * Ik = (double *)malloc(sizeof(double)*IszX*IszY);
  int indX, indY;
  double *result = (double *)malloc(3 * (Nfr - 1) * sizeof(double));
  i = 0;
  for(k = 1; k < Nfr; k++){
    //long long set_arrays = get_time();
    //printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, set_arrays));
    //apply motion model
    //draws sample from motion model (random walk). The only prior information
    //is that the object moves 2x as fast as in the y direction

    for(x = 0; x < Nparticles; x++){
      arrayX[x] = arrayX[x] + 1.0 + 5.0*randn(seed, x);
      arrayY[x] = arrayY[x] - 2.0 + 2.0*randn(seed, x);
    }

    //particle filter likelihood
    //long long error = get_time();
    //printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error));
    for(x = 0; x < Nparticles; x++){

      //compute the likelihood: remember our assumption is that you know
      // foreground and the background image intensity distribution.
      // Notice that we consider here a likelihood ratio, instead of
      // p(z|x). It is possible in this case. why? a hometask for you.		
      //calc ind
      for(y = 0; y < countOnes; y++){
        indX = roundDouble(arrayX[x]) + objxy[y*2 + 1];
        indY = roundDouble(arrayY[x]) + objxy[y*2];
        ind[y] = fabs(indX*IszY*Nfr + indY*Nfr + k);
        if(ind[y] >= max_size)
          ind[y] = 0;
      }
      likelihood[x] = calcLikelihoodSum(I, ind, countOnes);
      likelihood[x] = likelihood[x]/countOnes;
    }
    //long long likelihood_time = get_time();
    //printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time));
    // update & normalize weights
    // using equation (63) of Arulampalam Tutorial		
    for(x = 0; x < Nparticles; x++){
      weights[x] = weights[x] * exp(likelihood[x]);
    }
    //long long exponential = get_time();
    //printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential));
    double sumWeights = 0;	
    for(x = 0; x < Nparticles; x++){
      sumWeights += weights[x];
    }
    //long long sum_time = get_time();
    //printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time));
    for(x = 0; x < Nparticles; x++){
      weights[x] = weights[x]/sumWeights;
    }
    //long long normalize = get_time();
    //printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize));
    xe = 0;
    ye = 0;
    // estimate the object location by expected values
    for(x = 0; x < Nparticles; x++){
      //printf("%f %f %f\n", arrayX[x], arrayY[x], weights[x]);
      xe += arrayX[x] * weights[x];
      ye += arrayY[x] * weights[x];
    }
    //long long move_time = get_time();
    //printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time));
    //printf("XE: %lf\n", xe);
    //printf("YE: %lf\n", ye);
    double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) );
    //printf("%lf\n", distance);
    result[i] = xe;
    result[i + 1] = ye;
    result[i + 2] = distance;
    i += 3;
    //display(hold off for now)

    //pause(hold off for now)

    //resampling


    CDF[0] = weights[0];
    for(x = 1; x < Nparticles; x++){
      CDF[x] = weights[x] + CDF[x-1];
    }
    //long long cum_sum = get_time();
    //printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum));
    double u1 = (1/((double)(Nparticles)))*randu(seed, 0);
    for(x = 0; x < Nparticles; x++){
      u[x] = u1 + x/((double)(Nparticles));
    }
    //long long u_time = get_time();
    //printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time));
    //long long start_copy = get_time();
    //CUDA memory copying from CPU memory to GPU memory
    //cudaMemcpy(arrayX_GPU, arrayX, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(arrayX_GPU, arrayX, sizeof(double)*Nparticles);
    //cudaMemcpy(arrayY_GPU, arrayY, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(arrayY_GPU, arrayY, sizeof(double)*Nparticles);
    //cudaMemcpy(xj_GPU, xj, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(xj_GPU, xj, sizeof(double)*Nparticles);
    //cudaMemcpy(yj_GPU, yj, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(yj_GPU, yj, sizeof(double)*Nparticles);
    //cudaMemcpy(CDF_GPU, CDF, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(CDF_GPU, CDF, sizeof(double)*Nparticles);
    //cudaMemcpy(u_GPU, u, sizeof(double)*Nparticles, cudaMemcpyHostToDevice);
    //memcpy(u_GPU, u, sizeof(double)*Nparticles);
    //long long end_copy = get_time();
    //Xil_DCacheDisable();

#ifdef HW
    Xil_DCacheDisable();
    XFcuda_SetEn_fcuda1(&xcore, 1);
    XFcuda_Start(&xcore);
    while (!XFcuda_IsDone(&xcore));
    Xil_DCacheEnable();
#else
    //KERNEL FUNCTION CALL
    int j;
    for(j = 0; j < Nparticles; j++){
      x = findIndex(CDF, Nparticles, u[j]);
      if(x == -1)
        x = Nparticles-1;
      xj[j] = arrayX[x];
      yj[j] = arrayY[x];

    }
#endif
    //cudaThreadSynchronize();
    //long long start_copy_back = get_time();
    //CUDA memory copying back from GPU to CPU memory
    //cudaMemcpy(yj, yj_GPU, sizeof(double)*Nparticles, cudaMemcpyDeviceToHost);
    //memcpy(yj, yj_GPU, sizeof(double)*Nparticles);
    //cudaMemcpy(xj, xj_GPU, sizeof(double)*Nparticles, cudaMemcpyDeviceToHost);
    //memcpy(xj, xj_GPU, sizeof(double)*Nparticles);
    //long long end_copy_back = get_time();
    //printf("SENDING TO GPU TOOK: %lf\n", elapsed_time(start_copy, end_copy));
    //printf("CUDA EXEC TOOK: %lf\n", elapsed_time(end_copy, start_copy_back));
    //printf("SENDING BACK FROM GPU TOOK: %lf\n", elapsed_time(start_copy_back, end_copy_back));
    //long long xyj_time = get_time();
    //printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time));

    for(x = 0; x < Nparticles; x++){
      //reassign arrayX and arrayY
      arrayX[x] = xj[x];
      arrayY[x] = yj[x];
      weights[x] = 1/((double)(Nparticles));
    }
    //long long reset = get_time();
    //printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset));
  }

  //CUDA freeing of memory
  /*cudaFree(u_GPU);
    cudaFree(CDF_GPU);
    cudaFree(yj_GPU);
    cudaFree(xj_GPU);
    cudaFree(arrayY_GPU);
    cudaFree(arrayX_GPU);*/
  /*
     free(u_GPU);
     free(CDF_GPU);
     free(yj_GPU);
     free(xj_GPU);
     free(arrayY_GPU);
     free(arrayX_GPU);
     */
  //free memory
  free(disk);
  free(objxy);
  free(weights);
  free(likelihood);
  free(arrayX);
  free(arrayY);
  free(xj);
  free(yj);
  free(CDF);
  free(u);
  free(ind);

  /*
     FILE *fp = fopen("cuda/gold_output_naive.txt", "r");
     if (fp == NULL) {
     printf("Cannot open file.\n");
     free(result);
     return 0;
     }
     char buffer[50];
     double gold_val;
     for (i = 0; i < 3 * (Nfr - 1); i++) {
     if (feof(fp)) {
     printf("Unexpected end of file.\n");
     free(result);
     return 0;
     }
     fgets(buffer, sizeof(buffer), fp);
     sscanf(buffer, "%lf\n", &gold_val);
     if (gold_val - result[i] < -EPSILON ||
     gold_val - result[i] > EPSILON) {

     printf("Mismatch result at %i: gold = %f, result = %f\n",
     i, gold_val, result[i]);
     free(result);
     return 0;
     }
     }
     free(result);
     */

#ifdef VERIFY
  for (i = 0; i < 3 * (Nfr - 1); i++) {
    printf("index %d: %f\n", i, result[i]);
  }

  for (i = 0; i < 3 * (Nfr - 1); i++) {
    if (fabs(gold_output[i] - result[i]) > EPSILON) {
      printf("Mismatch result at %i: gold = %f, result = %f\n",
          i, gold_output[i], result[i]);
      free(result);
      return 0;
    }
  }
#endif
  free(result);
  return 1;
}
Exemple #6
0
void multivar_normal(double *mean, double *var, double *ret, int dim)
{
    for(int i=0; i < dim; i++)
        ret[i] = mean[i] + sqrt(var[i])*randn();
}
int main(int argc, char **argv)
{
  ros::init(argc, argv, "updating_particles");
  ros::NodeHandle n;
  PlotRayUtils plt;
  RayTracer rayt;

  std::random_device rd;
  std::normal_distribution<double> randn(0.0,0.000001);

  ROS_INFO("Running...");

  ros::Publisher pub_init = 
    n.advertise<particle_filter::PFilterInit>("/particle_filter_init", 5);
  ros::ServiceClient srv_add = 
    n.serviceClient<particle_filter::AddObservation>("/particle_filter_add");


 
  ros::Duration(1).sleep();
  // pub_init.publish(getInitialPoints(plt));
 
  geometry_msgs::Point obs;
  geometry_msgs::Point dir;
  double radius = 0.00;

  int i = 0;
  //for(int i=0; i<20; i++){
  while (i < NUM_TOUCHES) {
    // ros::Duration(1).sleep();
    //tf::Point start(0.95,0,-0.15);
    //tf::Point end(0.95,2,-0.15);
    tf::Point start, end;
    // randomSelection(plt, rayt, start, end);
    fixedSelection(plt, rayt, start, end);

    Ray measurement(start, end);
    
    double distToPart;
    if(!rayt.traceRay(measurement, distToPart)){
      ROS_INFO("NO INTERSECTION, Skipping");
      continue;
    }
    tf::Point intersection(start.getX(), start.getY(), start.getZ());
    intersection = intersection + (end-start).normalize() * (distToPart - radius);
	std::cout << "Intersection at: " << intersection.getX() << "  " << intersection.getY() << "   " << intersection.getZ() << std::endl;
    tf::Point ray_dir(end.x()-start.x(),end.y()-start.y(),end.z()-start.z());
    ray_dir = ray_dir.normalize();
    obs.x=intersection.getX() + randn(rd); 
    obs.y=intersection.getY() + randn(rd); 
    obs.z=intersection.getZ() + randn(rd);
    dir.x=ray_dir.x();
    dir.y=ray_dir.y();
    dir.z=ray_dir.z();
    // obs.x=intersection.getX(); 
    // obs.y=intersection.getY(); 
    // obs.z=intersection.getZ();

    // pub_add.publish(obs);
    
    // plt.plotCylinder(start, end, 0.01, 0.002, true);
    plt.plotRay(Ray(start, end));
    // ros::Duration(1).sleep();

    particle_filter::AddObservation pfilter_obs;
    pfilter_obs.request.p = obs;
    pfilter_obs.request.dir = dir;
    if(!srv_add.call(pfilter_obs)){
      ROS_INFO("Failed to call add observation");
    }

    ros::spinOnce();
    while(!rayt.particleHandler.newParticles){
      ROS_INFO_THROTTLE(10, "Waiting for new particles...");
      ros::spinOnce();
      ros::Duration(.1).sleep();
    }
    i ++;
  }
  // std::ofstream myfile;
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/time.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff_trans.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/diff_rot.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/workspace_max.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  // myfile.open("/home/shiyuan/Documents/ros_marsarm/workspace_min.csv", std::ios::out|std::ios::app);
  // myfile << "\n";
  // myfile.close();
  ROS_INFO("Finished all action");

}
Exemple #8
0
 array randn(const dim_t d0,
             const dim_t d1, const dim_t d2,
             const dim_t d3, const af::dtype ty)
 {
     return randn(dim4(d0, d1, d2, d3), ty);
 }
Exemple #9
0
 //! Vector of x ~ N(0,1) size n
 vec norm(const size_t &n){
   return randn(n);
 }
Exemple #10
0
    //! Single draw from N(0,1)
    double norm(){

      vec n = randn(1);
      return n(0);
    }
Exemple #11
0
 //! Matrix of x ~ N(0,1) size (n_rows,n_cols)
 mat norm(const size_t & n_rows, const size_t & n_cols){
   return randn(n_rows,n_cols);
 }
Exemple #12
0
 //! Matrix of x ~ N(mu,sigma) size (n_rows,n_cols)
 mat norm(const double mu, const double sigma, const int n_rows, int n_cols){
   return mu + sigma*randn(n_rows,n_cols);
 }
Exemple #13
0
GLFFTWater::GLFFTWater(GLFFTWaterParams &params) {
#if 0
#ifdef _WIN32
    m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4);
#else
    posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N));
#endif
#else
		m_h = new float[(params.N+2)*params.N];
		m_dx = new float[(params.N+2)*(params.N)];
		m_dz = new float[(params.N+2)*(params.N)];
		m_w = new float[(params.N)*(params.N)];
#endif
    m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N));
    m_heightmap = new float3[(params.N)*(params.N)];
    m_params = params;

    std::tr1::mt19937 prng(1337);
    std::tr1::normal_distribution<float> normal;
    std::tr1::uniform_real<float> uniform;
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal);
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform);
    for(int i=0, k=0; i<params.N; i++) {
	    float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L);
	    for(int j=0; j<params.N; j++, k++) {
		    float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L);
		    float A = randn();
		    float theta = randu()*2.f*3.141592654f;
		    float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k]));
		    m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta);
	    }
    }


    m_kz = new float[params.N*(params.N / 2 + 1)];
    m_kx = new float[params.N*(params.N / 2 + 1)];

    const int hN = m_params.N / 2;
    for(int y=0; y<m_params.N; y++) {
	float kz = (float) (y - hN);
	for(int x=0; x<=hN; x++) {
		float kx = (float) (x - hN);
		float k = 1.f/sqrtf(kx*kx+kz*kz);
		m_kz[y*(hN+1)+x] = kz*k;
		m_kx[y*(hN+1)+x] = kx*k;
	}
    }

    if(!fftwf_init_threads()) {
	cerr << "Error initializing multithreaded fft."  << endl;
    } else {
	fftwf_plan_with_nthreads(2);
    }
  
    m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, 
				      FFTW_ESTIMATE);

    glGenTextures(1, &m_texId);
    glBindTexture(GL_TEXTURE_2D, m_texId);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
}
Exemple #14
0
void sammon(const emxArray_real_T *x, emxArray_real_T *y)
{
  emxArray_real_T *D;
  emxArray_real_T *delta;
  int32_T i0;
  int32_T b_D;
  int32_T br;
  emxArray_real_T *Dinv;
  emxArray_real_T *d;
  emxArray_real_T *dinv;
  emxArray_real_T *c_D;
  emxArray_real_T *b_delta;
  emxArray_real_T *b_x;
  real_T E;
  int32_T i;
  emxArray_real_T *deltaone;
  emxArray_real_T *g;
  emxArray_real_T *y2;
  emxArray_real_T *H;
  emxArray_real_T *s;
  emxArray_real_T *C;
  emxArray_real_T *b_C;
  emxArray_real_T *c_delta;
  emxArray_real_T *d_D;
  emxArray_real_T *b_deltaone;
  boolean_T exitg1;
  boolean_T guard2 = FALSE;
  uint32_T unnamed_idx_0;
  int32_T ic;
  int32_T ar;
  int32_T ib;
  int32_T ia;
  int32_T i1;
  boolean_T guard1 = FALSE;
  int32_T exitg3;
  boolean_T exitg2;
  int32_T b_y[2];
  real_T E_new;
  real_T u1;
  emxInit_real_T(&D, 2);
  emxInit_real_T(&delta, 2);

  /* #codgen */
  /*  */
  /*  SAMMON - apply Sammon's nonlinear mapping  */
  /*  */
  /*     Y = SAMMON(X) applies Sammon's nonlinear mapping procedure on */
  /*     multivariate data X, where each row represents a pattern and each column */
  /*     represents a feature.  On completion, Y contains the corresponding */
  /*     co-ordinates of each point on the map.  By default, a two-dimensional */
  /*     map is created.  Note if X contains any duplicated rows, SAMMON will */
  /*     fail (ungracefully).  */
  /*  */
  /*     [Y,E] = SAMMON(X) also returns the value of the cost function in E (i.e. */
  /*     the stress of the mapping). */
  /*  */
  /*     An N-dimensional output map is generated by Y = SAMMON(X,N) . */
  /*  */
  /*     A set of optimisation options can also be specified using a third */
  /*     argument, Y = SAMMON(X,N,OPTS) , where OPTS is a structure with fields: */
  /*  */
  /*        MaxIter        - maximum number of iterations */
  /*        TolFun         - relative tolerance on objective function */
  /*        MaxHalves      - maximum number of step halvings */
  /*        Input          - {'raw','distance'} if set to 'distance', X is  */
  /*                         interpreted as a matrix of pairwise distances. */
  /*        Display        - {'off', 'on', 'iter'} */
  /*        Initialisation - {'pca', 'random'} */
  /*  */
  /*     The default options structure can be retrieved by calling SAMMON with */
  /*     no parameters. */
  /*  */
  /*     References : */
  /*  */
  /*        [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure */
  /*            Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, */
  /*            pp 401-409, May 1969. */
  /*  */
  /*     See also : SAMMON_TEST */
  /*  */
  /*  File        : sammon.m */
  /*  */
  /*  Date        : Monday 12th November 2007. */
  /*  */
  /*  Author      : Gavin C. Cawley and Nicola L. C. Talbot */
  /*  */
  /*  Description : Simple vectorised MATLAB implementation of Sammon's non-linear */
  /*                mapping algorithm [1]. */
  /*  */
  /*  References  : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data */
  /*                    Structure Analysis", IEEE Transactions on Computers, */
  /*                    vol. C-18, no. 5, pp 401-409, May 1969. */
  /*  */
  /*  History     : 10/08/2004 - v1.00 */
  /*                11/08/2004 - v1.10 Hessian made positive semidefinite */
  /*                13/08/2004 - v1.11 minor optimisation */
  /*                12/11/2007 - v1.20 initialisation using the first n principal */
  /*                                   components. */
  /*  */
  /*  Thanks      : Dr Nick Hamilton ([email protected]) for supplying the */
  /*                code for implementing initialisation using the first n */
  /*                principal components (introduced in v1.20). */
  /*  */
  /*  To do       : The current version does not take advantage of the symmetry */
  /*                of the distance matrix in order to allow for easy */
  /*                vectorisation.  This may not be a good choice for very large */
  /*                datasets, so perhaps one day I'll get around to doing a MEX */
  /*                version using the BLAS library etc. for very large datasets. */
  /*  */
  /*  Copyright   : (c) Dr Gavin C. Cawley, November 2007. */
  /*  */
  /*     This program is free software; you can redistribute it and/or modify */
  /*     it under the terms of the GNU General Public License as published by */
  /*     the Free Software Foundation; either version 2 of the License, or */
  /*     (at your option) any later version. */
  /*  */
  /*     This program is distributed in the hope that it will be useful, */
  /*     but WITHOUT ANY WARRANTY; without even the implied warranty of */
  /*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the */
  /*     GNU General Public License for more details. */
  /*  */
  /*     You should have received a copy of the GNU General Public License */
  /*     along with this program; if not, write to the Free Software */
  /*     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
  /*  */
  /*  use the default options structure */
  /*  create distance matrix unless given by parameters */
  euclid(x, x, D);

  /*  remaining initialisation */
  eye(x->size[0], delta);
  i0 = D->size[0] * D->size[1];
  emxEnsureCapacity((emxArray__common *)D, i0, (int32_T)sizeof(real_T));
  b_D = D->size[0];
  br = D->size[1];
  br *= b_D;
  for (i0 = 0; i0 < br; i0++) {
    D->data[i0] += delta->data[i0];
  }

  emxInit_real_T(&Dinv, 2);
  emxInit_real_T(&d, 2);
  rdivide(D, Dinv);
  randn(x->size[0], y);
  b_euclid(y, y, d);
  eye(x->size[0], delta);
  i0 = d->size[0] * d->size[1];
  emxEnsureCapacity((emxArray__common *)d, i0, (int32_T)sizeof(real_T));
  b_D = d->size[0];
  br = d->size[1];
  br *= b_D;
  for (i0 = 0; i0 < br; i0++) {
    d->data[i0] += delta->data[i0];
  }

  emxInit_real_T(&dinv, 2);
  emxInit_real_T(&c_D, 2);
  rdivide(d, dinv);
  i0 = c_D->size[0] * c_D->size[1];
  c_D->size[0] = D->size[0];
  c_D->size[1] = D->size[1];
  emxEnsureCapacity((emxArray__common *)c_D, i0, (int32_T)sizeof(real_T));
  br = D->size[0] * D->size[1];
  for (i0 = 0; i0 < br; i0++) {
    c_D->data[i0] = D->data[i0] - d->data[i0];
  }

  emxInit_real_T(&b_delta, 2);
  power(c_D, delta);
  i0 = b_delta->size[0] * b_delta->size[1];
  b_delta->size[0] = delta->size[0];
  b_delta->size[1] = delta->size[1];
  emxEnsureCapacity((emxArray__common *)b_delta, i0, (int32_T)sizeof(real_T));
  br = delta->size[0] * delta->size[1];
  emxFree_real_T(&c_D);
  for (i0 = 0; i0 < br; i0++) {
    b_delta->data[i0] = delta->data[i0] * Dinv->data[i0];
  }

  emxInit_real_T(&b_x, 2);
  c_sum(b_delta, b_x);
  E = d_sum(b_x);

  /*  get on with it */
  i = 0;
  emxFree_real_T(&b_delta);
  emxInit_real_T(&deltaone, 2);
  emxInit_real_T(&g, 2);
  emxInit_real_T(&y2, 2);
  emxInit_real_T(&H, 2);
  b_emxInit_real_T(&s, 1);
  emxInit_real_T(&C, 2);
  emxInit_real_T(&b_C, 2);
  emxInit_real_T(&c_delta, 2);
  emxInit_real_T(&d_D, 2);
  b_emxInit_real_T(&b_deltaone, 1);
  exitg1 = FALSE;
  while ((exitg1 == FALSE) && (i < 500)) {
    /*  compute gradient, Hessian and search direction (note it is actually */
    /*  1/4 of the gradient and Hessian, but the step size is just the ratio */
    /*  of the gradient and the diagonal of the Hessian so it doesn't matter). */
    i0 = delta->size[0] * delta->size[1];
    delta->size[0] = dinv->size[0];
    delta->size[1] = dinv->size[1];
    emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T));
    br = dinv->size[0] * dinv->size[1];
    for (i0 = 0; i0 < br; i0++) {
      delta->data[i0] = dinv->data[i0] - Dinv->data[i0];
    }

    guard2 = FALSE;
    if (delta->size[1] == 1) {
      guard2 = TRUE;
    } else {
      b_D = x->size[0];
      if (b_D == 1) {
        guard2 = TRUE;
      } else {
        unnamed_idx_0 = (uint32_T)delta->size[0];
        i0 = deltaone->size[0] * deltaone->size[1];
        deltaone->size[0] = (int32_T)unnamed_idx_0;
        deltaone->size[1] = 2;
        emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof
                          (real_T));
        br = (int32_T)unnamed_idx_0 << 1;
        for (i0 = 0; i0 < br; i0++) {
          deltaone->data[i0] = 0.0;
        }

        if (delta->size[0] == 0) {
        } else {
          b_D = 0;
          while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
            i0 = b_D + delta->size[0];
            for (ic = b_D; ic + 1 <= i0; ic++) {
              deltaone->data[ic] = 0.0;
            }

            b_D += delta->size[0];
          }

          br = 0;
          b_D = 0;
          while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
            ar = 0;
            i0 = br + delta->size[1];
            for (ib = br + 1; ib <= i0; ib++) {
              ia = ar;
              i1 = b_D + delta->size[0];
              for (ic = b_D; ic + 1 <= i1; ic++) {
                ia++;
                deltaone->data[ic] += delta->data[ia - 1];
              }

              ar += delta->size[0];
            }

            br += delta->size[1];
            b_D += delta->size[0];
          }
        }
      }
    }

    if (guard2 == TRUE) {
      i0 = deltaone->size[0] * deltaone->size[1];
      deltaone->size[0] = delta->size[0];
      deltaone->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof(real_T));
      br = delta->size[0];
      for (i0 = 0; i0 < br; i0++) {
        for (i1 = 0; i1 < 2; i1++) {
          deltaone->data[i0 + deltaone->size[0] * i1] = 0.0;
          ar = delta->size[1];
          for (b_D = 0; b_D < ar; b_D++) {
            deltaone->data[i0 + deltaone->size[0] * i1] += delta->data[i0 +
              delta->size[0] * b_D];
          }
        }
      }
    }

    if ((delta->size[1] == 1) || (y->size[0] == 1)) {
      i0 = g->size[0] * g->size[1];
      g->size[0] = delta->size[0];
      g->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T));
      br = delta->size[0];
      for (i0 = 0; i0 < br; i0++) {
        for (i1 = 0; i1 < 2; i1++) {
          g->data[i0 + g->size[0] * i1] = 0.0;
          ar = delta->size[1];
          for (b_D = 0; b_D < ar; b_D++) {
            g->data[i0 + g->size[0] * i1] += delta->data[i0 + delta->size[0] *
              b_D] * y->data[b_D + y->size[0] * i1];
          }
        }
      }
    } else {
      unnamed_idx_0 = (uint32_T)delta->size[0];
      i0 = g->size[0] * g->size[1];
      g->size[0] = (int32_T)unnamed_idx_0;
      g->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T));
      br = (int32_T)unnamed_idx_0 << 1;
      for (i0 = 0; i0 < br; i0++) {
        g->data[i0] = 0.0;
      }

      if (delta->size[0] == 0) {
      } else {
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          i0 = b_D + delta->size[0];
          for (ic = b_D; ic + 1 <= i0; ic++) {
            g->data[ic] = 0.0;
          }

          b_D += delta->size[0];
        }

        br = 0;
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          ar = 0;
          i0 = br + delta->size[1];
          for (ib = br; ib + 1 <= i0; ib++) {
            if (y->data[ib] != 0.0) {
              ia = ar;
              i1 = b_D + delta->size[0];
              for (ic = b_D; ic + 1 <= i1; ic++) {
                ia++;
                g->data[ic] += y->data[ib] * delta->data[ia - 1];
              }
            }

            ar += delta->size[0];
          }

          br += delta->size[1];
          b_D += delta->size[0];
        }
      }
    }

    i0 = g->size[0] * g->size[1];
    g->size[1] = 2;
    emxEnsureCapacity((emxArray__common *)g, i0, (int32_T)sizeof(real_T));
    b_D = g->size[0];
    br = g->size[1];
    br *= b_D;
    for (i0 = 0; i0 < br; i0++) {
      g->data[i0] -= y->data[i0] * deltaone->data[i0];
    }

    c_power(dinv, delta);
    b_power(y, y2);
    if ((delta->size[1] == 1) || (y2->size[0] == 1)) {
      i0 = H->size[0] * H->size[1];
      H->size[0] = delta->size[0];
      H->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T));
      br = delta->size[0];
      for (i0 = 0; i0 < br; i0++) {
        for (i1 = 0; i1 < 2; i1++) {
          H->data[i0 + H->size[0] * i1] = 0.0;
          ar = delta->size[1];
          for (b_D = 0; b_D < ar; b_D++) {
            H->data[i0 + H->size[0] * i1] += delta->data[i0 + delta->size[0] *
              b_D] * y2->data[b_D + y2->size[0] * i1];
          }
        }
      }
    } else {
      unnamed_idx_0 = (uint32_T)delta->size[0];
      i0 = H->size[0] * H->size[1];
      H->size[0] = (int32_T)unnamed_idx_0;
      H->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T));
      br = (int32_T)unnamed_idx_0 << 1;
      for (i0 = 0; i0 < br; i0++) {
        H->data[i0] = 0.0;
      }

      if (delta->size[0] == 0) {
      } else {
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          i0 = b_D + delta->size[0];
          for (ic = b_D; ic + 1 <= i0; ic++) {
            H->data[ic] = 0.0;
          }

          b_D += delta->size[0];
        }

        br = 0;
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          ar = 0;
          i0 = br + delta->size[1];
          for (ib = br; ib + 1 <= i0; ib++) {
            if (y2->data[ib] != 0.0) {
              ia = ar;
              i1 = b_D + delta->size[0];
              for (ic = b_D; ic + 1 <= i1; ic++) {
                ia++;
                H->data[ic] += y2->data[ib] * delta->data[ia - 1];
              }
            }

            ar += delta->size[0];
          }

          br += delta->size[1];
          b_D += delta->size[0];
        }
      }
    }

    if ((delta->size[1] == 1) || (y->size[0] == 1)) {
      i0 = C->size[0] * C->size[1];
      C->size[0] = delta->size[0];
      C->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)C, i0, (int32_T)sizeof(real_T));
      br = delta->size[0];
      for (i0 = 0; i0 < br; i0++) {
        for (i1 = 0; i1 < 2; i1++) {
          C->data[i0 + C->size[0] * i1] = 0.0;
          ar = delta->size[1];
          for (b_D = 0; b_D < ar; b_D++) {
            C->data[i0 + C->size[0] * i1] += delta->data[i0 + delta->size[0] *
              b_D] * y->data[b_D + y->size[0] * i1];
          }
        }
      }
    } else {
      unnamed_idx_0 = (uint32_T)delta->size[0];
      i0 = C->size[0] * C->size[1];
      C->size[0] = (int32_T)unnamed_idx_0;
      C->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)C, i0, (int32_T)sizeof(real_T));
      br = (int32_T)unnamed_idx_0 << 1;
      for (i0 = 0; i0 < br; i0++) {
        C->data[i0] = 0.0;
      }

      if (delta->size[0] == 0) {
      } else {
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          i0 = b_D + delta->size[0];
          for (ic = b_D; ic + 1 <= i0; ic++) {
            C->data[ic] = 0.0;
          }

          b_D += delta->size[0];
        }

        br = 0;
        b_D = 0;
        while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
          ar = 0;
          i0 = br + delta->size[1];
          for (ib = br; ib + 1 <= i0; ib++) {
            if (y->data[ib] != 0.0) {
              ia = ar;
              i1 = b_D + delta->size[0];
              for (ic = b_D; ic + 1 <= i1; ic++) {
                ia++;
                C->data[ic] += y->data[ib] * delta->data[ia - 1];
              }
            }

            ar += delta->size[0];
          }

          br += delta->size[1];
          b_D += delta->size[0];
        }
      }
    }

    guard1 = FALSE;
    if (delta->size[1] == 1) {
      guard1 = TRUE;
    } else {
      b_D = x->size[0];
      if (b_D == 1) {
        guard1 = TRUE;
      } else {
        unnamed_idx_0 = (uint32_T)delta->size[0];
        i0 = b_C->size[0] * b_C->size[1];
        b_C->size[0] = (int32_T)unnamed_idx_0;
        b_C->size[1] = 2;
        emxEnsureCapacity((emxArray__common *)b_C, i0, (int32_T)sizeof(real_T));
        br = (int32_T)unnamed_idx_0 << 1;
        for (i0 = 0; i0 < br; i0++) {
          b_C->data[i0] = 0.0;
        }

        if (delta->size[0] == 0) {
        } else {
          b_D = 0;
          while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
            i0 = b_D + delta->size[0];
            for (ic = b_D; ic + 1 <= i0; ic++) {
              b_C->data[ic] = 0.0;
            }

            b_D += delta->size[0];
          }

          br = 0;
          b_D = 0;
          while ((delta->size[0] > 0) && (b_D <= delta->size[0])) {
            ar = 0;
            i0 = br + delta->size[1];
            for (ib = br + 1; ib <= i0; ib++) {
              ia = ar;
              i1 = b_D + delta->size[0];
              for (ic = b_D; ic + 1 <= i1; ic++) {
                ia++;
                b_C->data[ic] += delta->data[ia - 1];
              }

              ar += delta->size[0];
            }

            br += delta->size[1];
            b_D += delta->size[0];
          }
        }
      }
    }

    if (guard1 == TRUE) {
      i0 = b_C->size[0] * b_C->size[1];
      b_C->size[0] = delta->size[0];
      b_C->size[1] = 2;
      emxEnsureCapacity((emxArray__common *)b_C, i0, (int32_T)sizeof(real_T));
      br = delta->size[0];
      for (i0 = 0; i0 < br; i0++) {
        for (i1 = 0; i1 < 2; i1++) {
          b_C->data[i0 + b_C->size[0] * i1] = 0.0;
          ar = delta->size[1];
          for (b_D = 0; b_D < ar; b_D++) {
            b_C->data[i0 + b_C->size[0] * i1] += delta->data[i0 + delta->size[0]
              * b_D];
          }
        }
      }
    }

    i0 = H->size[0] * H->size[1];
    H->size[1] = 2;
    emxEnsureCapacity((emxArray__common *)H, i0, (int32_T)sizeof(real_T));
    b_D = H->size[0];
    br = H->size[1];
    br *= b_D;
    for (i0 = 0; i0 < br; i0++) {
      H->data[i0] = ((H->data[i0] - deltaone->data[i0]) - 2.0 * y->data[i0] *
                     C->data[i0]) + y2->data[i0] * b_C->data[i0];
    }

    b_D = H->size[0] << 1;
    i0 = s->size[0];
    s->size[0] = b_D;
    emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T));
    br = 0;
    do {
      exitg3 = 0;
      b_D = H->size[0] << 1;
      if (br <= b_D - 1) {
        s->data[br] = fabs(H->data[br]);
        br++;
      } else {
        exitg3 = 1;
      }
    } while (exitg3 == 0);

    i0 = s->size[0];
    s->size[0] = g->size[0] << 1;
    emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T));
    br = g->size[0] << 1;
    for (i0 = 0; i0 < br; i0++) {
      s->data[i0] = -g->data[i0] / s->data[i0];
    }

    i0 = deltaone->size[0] * deltaone->size[1];
    deltaone->size[0] = y->size[0];
    deltaone->size[1] = 2;
    emxEnsureCapacity((emxArray__common *)deltaone, i0, (int32_T)sizeof(real_T));
    br = y->size[0] * y->size[1];
    for (i0 = 0; i0 < br; i0++) {
      deltaone->data[i0] = y->data[i0];
    }

    /*  use step-halving procedure to ensure progress is made */
    ib = 1;
    ia = 0;
    exitg2 = FALSE;
    while ((exitg2 == FALSE) && (ia < 20)) {
      ib = ia + 1;
      b_D = deltaone->size[0] << 1;
      i0 = b_deltaone->size[0];
      b_deltaone->size[0] = b_D;
      emxEnsureCapacity((emxArray__common *)b_deltaone, i0, (int32_T)sizeof
                        (real_T));
      for (i0 = 0; i0 < b_D; i0++) {
        b_deltaone->data[i0] = deltaone->data[i0] + s->data[i0];
      }

      for (i0 = 0; i0 < 2; i0++) {
        b_y[i0] = y->size[i0];
      }

      i0 = y->size[0] * y->size[1];
      y->size[0] = b_y[0];
      y->size[1] = b_y[1];
      emxEnsureCapacity((emxArray__common *)y, i0, (int32_T)sizeof(real_T));
      br = b_y[1];
      for (i0 = 0; i0 < br; i0++) {
        ar = b_y[0];
        for (i1 = 0; i1 < ar; i1++) {
          y->data[i1 + y->size[0] * i0] = b_deltaone->data[i1 + b_y[0] * i0];
        }
      }

      b_euclid(y, y, d);
      b_D = x->size[0];
      i0 = delta->size[0] * delta->size[1];
      delta->size[0] = b_D;
      emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T));
      b_D = x->size[0];
      i0 = delta->size[0] * delta->size[1];
      delta->size[1] = b_D;
      emxEnsureCapacity((emxArray__common *)delta, i0, (int32_T)sizeof(real_T));
      br = x->size[0] * x->size[0];
      for (i0 = 0; i0 < br; i0++) {
        delta->data[i0] = 0.0;
      }

      E_new = x->size[0];
      u1 = x->size[0];
      if (E_new <= u1) {
      } else {
        E_new = u1;
      }

      b_D = (int32_T)E_new;
      if (b_D > 0) {
        for (br = 0; br + 1 <= b_D; br++) {
          delta->data[br + delta->size[0] * br] = 1.0;
        }
      }

      i0 = d->size[0] * d->size[1];
      emxEnsureCapacity((emxArray__common *)d, i0, (int32_T)sizeof(real_T));
      b_D = d->size[0];
      br = d->size[1];
      br *= b_D;
      for (i0 = 0; i0 < br; i0++) {
        d->data[i0] += delta->data[i0];
      }

      rdivide(d, dinv);
      i0 = d_D->size[0] * d_D->size[1];
      d_D->size[0] = D->size[0];
      d_D->size[1] = D->size[1];
      emxEnsureCapacity((emxArray__common *)d_D, i0, (int32_T)sizeof(real_T));
      br = D->size[0] * D->size[1];
      for (i0 = 0; i0 < br; i0++) {
        d_D->data[i0] = D->data[i0] - d->data[i0];
      }

      power(d_D, delta);
      i0 = c_delta->size[0] * c_delta->size[1];
      c_delta->size[0] = delta->size[0];
      c_delta->size[1] = delta->size[1];
      emxEnsureCapacity((emxArray__common *)c_delta, i0, (int32_T)sizeof(real_T));
      br = delta->size[0] * delta->size[1];
      for (i0 = 0; i0 < br; i0++) {
        c_delta->data[i0] = delta->data[i0] * Dinv->data[i0];
      }

      c_sum(c_delta, b_x);
      if (b_x->size[1] == 0) {
        E_new = 0.0;
      } else {
        E_new = b_x->data[0];
        for (br = 2; br <= b_x->size[1]; br++) {
          E_new += b_x->data[br - 1];
        }
      }

      if (E_new < E) {
        exitg2 = TRUE;
      } else {
        i0 = s->size[0];
        emxEnsureCapacity((emxArray__common *)s, i0, (int32_T)sizeof(real_T));
        br = s->size[0];
        for (i0 = 0; i0 < br; i0++) {
          s->data[i0] *= 0.5;
        }

        ia++;
      }
    }

    /*  bomb out if too many halving steps are required */
    if ((ib == 20) || (fabs((E - E_new) / E) < 1.0E-9)) {
      exitg1 = TRUE;
    } else {
      /*  evaluate termination criterion */
      /*  report progress */
      E = E_new;
      i++;
    }
  }

  emxFree_real_T(&b_deltaone);
  emxFree_real_T(&d_D);
  emxFree_real_T(&c_delta);
  emxFree_real_T(&b_x);
  emxFree_real_T(&b_C);
  emxFree_real_T(&C);
  emxFree_real_T(&s);
  emxFree_real_T(&H);
  emxFree_real_T(&y2);
  emxFree_real_T(&g);
  emxFree_real_T(&deltaone);
  emxFree_real_T(&delta);
  emxFree_real_T(&dinv);
  emxFree_real_T(&d);
  emxFree_real_T(&Dinv);
  emxFree_real_T(&D);

  /*  fiddle stress to match the original Sammon paper */
}
Exemple #15
0
 array randn(const dim_type d0,
             const dim_type d1, const dim_type d2,
             const dim_type d3, af_dtype ty)
 {
     return randn(dim4(d0, d1, d2, d3), ty);
 }
Exemple #16
0
 array randn(const dim_t d0, const af::dtype ty)
 {
     return randn(dim4(d0), ty);
 }
Exemple #17
0
 array randn(const dim_type d0, af_dtype ty)
 {
     return randn(dim4(d0), ty);
 }
int main(int argc, char *argv[])
{
	FILE *fp;
	char *temp_path = NULL;
	char *input_path = NULL;
	int input_len = 0;
	char *data_buf = NULL;
	unsigned int block_size = 0; /* int because the max block size is 16k */
	unsigned int num_files = 0;
	unsigned int ops_per_comp = 0;
	unsigned int num_ops = 0;
	unsigned int rw = 0;
	unsigned int *op_array = NULL;
	unsigned int *temp_array = NULL;
	unsigned int j = 0;
	int opt;
	clock_t t;
	struct timeval tv1, tv2;
	double time_taken;
	srand (time(NULL));

	while ((opt = getopt(argc, argv, "b:n:m:c:l:hrw")) != -1) {
		switch (opt) {
		case 'b':
			/* Block size per read/write */

			block_size = atoi((char *)optarg);

			if (block_size <= 0 || block_size > 32 * 1024) {
				printf(
				    "Invalid block size or it exceeds 32k\n");
				exit(-1);
			}

			break;
		case 'n':
			/*
			 * Total number of files, each file of the form
			 * /mnt/test/abcd0, /mnt/test/abcd1, .....
			 * if the path is specified as "/mnt/test/abcd"
			 */

			num_files = atoi((char *)optarg);

			if (num_files <= 0 || num_files > 10000) {
				printf(
				    "Number of files exceeds 10000 or invalid\n");
				exit(-1);
			}

			break;
		case 'm':
			/*
			 * Total number of reads/writes
			 */

			num_ops = atoi((char *)optarg);

			if (num_ops <= 0 || num_ops > 10000) {
				printf("Invalid total number of reads/writes "
				       "or it exceeds 10000\n");
				exit(-1);
			}

			break;
		case 'c':
			/*
			 * Number of operations in a single compound, should not
			 * matter in normal reads
			 */

			ops_per_comp = atoi((char *)optarg);

			if (ops_per_comp <= 0 || ops_per_comp > 10) {
				printf(
				    "Invalid ops per comp or it exceeds 10\n");
				exit(-1);
			}

			break;
		case 'l':
			/*
			 * Path of the files
			 *
			 * If the files are of the form -
			 * /mnt/test/abcd0, /mnt/test/abcd1, ......
			 * Path should be "/mnt/test/abcd"
			 */

			input_path = (char *)optarg;
			input_len = strlen(input_path);
			if (input_len <= 0 || input_len > 50) {
				printf("Name is invalid or is too long, max 50 chars \n");
				exit(-1);
			}

			break;
		case 'r':
			/* Read */

			rw = 0;

			break;
		case 'w':
			/* Write */

			rw = 1;

			break;
		case 'h':
		default:
			printf("%s", usage);
			exit(-1);
		}
	}

	if (argc != 12) {
		printf("Wrong usage, use -h to get help\n");
		exit(-1);
	}

	if (rw == 0) {
		printf("TC_TEST - READ\n");
	} else {
		printf("TC_TEST - WRITE\n");
	}
	printf("Block size - %d\n", block_size);
	printf("Path - %s\n", input_path);
	printf("Num_files - %d\n", num_files);
	printf("Total num of reads/writes - %d\n", num_ops);
	printf("Num of ops in a comp - %d\n", ops_per_comp);

	temp_path = malloc(input_len + 8);
	data_buf = malloc(block_size);
	j = 0;

	op_array = malloc(num_files * sizeof(double));

	if (op_array == NULL) {
		printf("Cannot allocate memory for op_array\n");
		goto main_exit;
	}

	while (j < num_ops) {
		temp_array = op_array + j;
		*temp_array = (unsigned int)randn(num_ops / 2, num_ops / 8);
		j++;
	}

	printf("Op_array allocated\n");

	j = 0;

	// t = clock();
	gettimeofday(&tv1, NULL);
	while (j < num_ops) {

		temp_array = op_array + j;

		snprintf(temp_path, input_len + 8, "%s%u", input_path, *temp_array);

		if (rw == 0) { /* Read */
			fp = fopen(temp_path, "r");
		} else { /* Write */
			fp = fopen(temp_path, "w");
		}

		if (fp == NULL) {
			printf("Error opening file - %s\n", temp_path);
			goto exit;
		}

		if (rw == 0) { /* Read */
			fread(data_buf, block_size, 1, (FILE *)fp);
		} else { /* Write */
			fwrite(data_buf, 1, block_size, (FILE *)fp);
		}

		fclose(fp);
                j++;
        }

	//t = clock() - t;
	gettimeofday(&tv2, NULL);
	// time_taken = ((double)t) / CLOCKS_PER_SEC;
	time_taken = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000) +
		     (double)(tv2.tv_sec - tv1.tv_sec);

	printf("Took %f seconds to execute \n", time_taken);

exit:
	free(op_array);
main_exit:
	free(data_buf);
        free(temp_path);

	return 0;
}
Exemple #19
0
void Radical::CopyAndPerturb(mat& matXNew, const mat& matX)
{
  matXNew = repmat(matX, nReplicates, 1) + noiseStdDev *
                  randn(nReplicates * matX.n_rows, matX.n_cols);
}
Exemple #20
0
void sammon(sammonStackData *SD, const real_T x[1000000], real_T y[2000], real_T
            *E)
{
  real_T B;
  int32_T i;
  real_T dv0[1000];
  int32_T b_i;
  boolean_T exitg1;
  real_T alpha1;
  real_T beta1;
  char_T TRANSB;
  char_T TRANSA;
  ptrdiff_t m_t;
  ptrdiff_t n_t;
  ptrdiff_t k_t;
  ptrdiff_t lda_t;
  ptrdiff_t ldb_t;
  ptrdiff_t ldc_t;
  double * alpha1_t;
  double * Aia0_t;
  double * Bib0_t;
  double * beta1_t;
  double * Cic0_t;
  real_T g[2000];
  real_T y2[2000];
  real_T b_y[2000];
  real_T c_y[2000];
  real_T d_y[2000];
  real_T e_y[2000];
  real_T b_g[2000];
  int32_T j;
  int32_T b_j;
  boolean_T exitg2;
  real_T dv1[1000];
  real_T E_new;

  /* #codgen */
  /*  */
  /*  SAMMON - apply Sammon's nonlinear mapping  */
  /*  */
  /*     Y = SAMMON(X) applies Sammon's nonlinear mapping procedure on */
  /*     multivariate data X, where each row represents a pattern and each column */
  /*     represents a feature.  On completion, Y contains the corresponding */
  /*     co-ordinates of each point on the map.  By default, a two-dimensional */
  /*     map is created.  Note if X contains any duplicated rows, SAMMON will */
  /*     fail (ungracefully).  */
  /*  */
  /*     [Y,E] = SAMMON(X) also returns the value of the cost function in E (i.e. */
  /*     the stress of the mapping). */
  /*  */
  /*     An N-dimensional output map is generated by Y = SAMMON(X,N) . */
  /*  */
  /*     A set of optimisation options can also be specified using a third */
  /*     argument, Y = SAMMON(X,N,OPTS) , where OPTS is a structure with fields: */
  /*  */
  /*        MaxIter        - maximum number of iterations */
  /*        TolFun         - relative tolerance on objective function */
  /*        MaxHalves      - maximum number of step halvings */
  /*        Input          - {'raw','distance'} if set to 'distance', X is  */
  /*                         interpreted as a matrix of pairwise distances. */
  /*        Display        - {'off', 'on', 'iter'} */
  /*        Initialisation - {'pca', 'random'} */
  /*  */
  /*     The default options structure can be retrieved by calling SAMMON with */
  /*     no parameters. */
  /*  */
  /*     References : */
  /*  */
  /*        [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure */
  /*            Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, */
  /*            pp 401-409, May 1969. */
  /*  */
  /*     See also : SAMMON_TEST */
  /*  */
  /*  File        : sammon.m */
  /*  */
  /*  Date        : Monday 12th November 2007. */
  /*  */
  /*  Author      : Gavin C. Cawley and Nicola L. C. Talbot */
  /*  */
  /*  Description : Simple vectorised MATLAB implementation of Sammon's non-linear */
  /*                mapping algorithm [1]. */
  /*  */
  /*  References  : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data */
  /*                    Structure Analysis", IEEE Transactions on Computers, */
  /*                    vol. C-18, no. 5, pp 401-409, May 1969. */
  /*  */
  /*  History     : 10/08/2004 - v1.00 */
  /*                11/08/2004 - v1.10 Hessian made positive semidefinite */
  /*                13/08/2004 - v1.11 minor optimisation */
  /*                12/11/2007 - v1.20 initialisation using the first n principal */
  /*                                   components. */
  /*  */
  /*  Thanks      : Dr Nick Hamilton ([email protected]) for supplying the */
  /*                code for implementing initialisation using the first n */
  /*                principal components (introduced in v1.20). */
  /*  */
  /*  To do       : The current version does not take advantage of the symmetry */
  /*                of the distance matrix in order to allow for easy */
  /*                vectorisation.  This may not be a good choice for very large */
  /*                datasets, so perhaps one day I'll get around to doing a MEX */
  /*                version using the BLAS library etc. for very large datasets. */
  /*  */
  /*  Copyright   : (c) Dr Gavin C. Cawley, November 2007. */
  /*  */
  /*     This program is free software; you can redistribute it and/or modify */
  /*     it under the terms of the GNU General Public License as published by */
  /*     the Free Software Foundation; either version 2 of the License, or */
  /*     (at your option) any later version. */
  /*  */
  /*     This program is distributed in the hope that it will be useful, */
  /*     but WITHOUT ANY WARRANTY; without even the implied warranty of */
  /*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the */
  /*     GNU General Public License for more details. */
  /*  */
  /*     You should have received a copy of the GNU General Public License */
  /*     along with this program; if not, write to the Free Software */
  /*     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
  /*  */
  /*  use the default options structure */
  /*  create distance matrix unless given by parameters */
  emlrtPushRtStackR2012b(&emlrtRSI, emlrtRootTLSGlobal);
  euclid(SD, x, x, SD->f2.D);
  emlrtPopRtStackR2012b(&emlrtRSI, emlrtRootTLSGlobal);

  /*  remaining initialisation */
  B = b_sum(SD->f2.D);
  eye(SD->f2.delta);
  for (i = 0; i < 1000000; i++) {
    SD->f2.D[i] += SD->f2.delta[i];
  }

  rdivide(1.0, SD->f2.D, SD->f2.Dinv);
  emlrtPushRtStackR2012b(&b_emlrtRSI, emlrtRootTLSGlobal);
  randn(y);
  emlrtPopRtStackR2012b(&b_emlrtRSI, emlrtRootTLSGlobal);
  emlrtPushRtStackR2012b(&c_emlrtRSI, emlrtRootTLSGlobal);
  b_euclid(SD, y, y, SD->f2.d);
  eye(SD->f2.delta);
  for (i = 0; i < 1000000; i++) {
    SD->f2.d[i] += SD->f2.delta[i];
  }

  emlrtPopRtStackR2012b(&c_emlrtRSI, emlrtRootTLSGlobal);
  rdivide(1.0, SD->f2.d, SD->f2.dinv);
  for (i = 0; i < 1000000; i++) {
    SD->f2.b_D[i] = SD->f2.D[i] - SD->f2.d[i];
  }

  power(SD->f2.b_D, SD->f2.delta);
  for (i = 0; i < 1000000; i++) {
    SD->f2.d[i] = SD->f2.delta[i] * SD->f2.Dinv[i];
  }

  d_sum(SD->f2.d, dv0);
  *E = e_sum(dv0);

  /*  get on with it */
  b_i = 0;
  exitg1 = FALSE;
  while ((exitg1 == FALSE) && (b_i < 500)) {
    /*  compute gradient, Hessian and search direction (note it is actually */
    /*  1/4 of the gradient and Hessian, but the step size is just the ratio */
    /*  of the gradient and the diagonal of the Hessian so it doesn't matter). */
    for (i = 0; i < 1000000; i++) {
      SD->f2.delta[i] = SD->f2.dinv[i] - SD->f2.Dinv[i];
    }

    emlrtPushRtStackR2012b(&d_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    alpha1 = 1.0;
    beta1 = 0.0;
    TRANSB = 'N';
    TRANSA = 'N';
    for (i = 0; i < 2000; i++) {
      SD->f2.y_old[i] = 1.0;
      SD->f2.deltaone[i] = 0.0;
    }

    emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    m_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    n_t = (ptrdiff_t)(2);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    k_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    lda_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldb_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldc_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    alpha1_t = (double *)(&alpha1);
    emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    Aia0_t = (double *)(&SD->f2.delta[0]);
    emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    Bib0_t = (double *)(&SD->f2.y_old[0]);
    emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    beta1_t = (double *)(&beta1);
    emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    Cic0_t = (double *)(&SD->f2.deltaone[0]);
    emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t,
          &ldb_t, beta1_t, Cic0_t, &ldc_t);
    emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&d_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&e_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    alpha1 = 1.0;
    beta1 = 0.0;
    TRANSB = 'N';
    TRANSA = 'N';
    memset(&g[0], 0, 2000U * sizeof(real_T));
    emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    m_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    n_t = (ptrdiff_t)(2);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    k_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    lda_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldb_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldc_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    alpha1_t = (double *)(&alpha1);
    emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    Aia0_t = (double *)(&SD->f2.delta[0]);
    emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    Bib0_t = (double *)(&y[0]);
    emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    beta1_t = (double *)(&beta1);
    emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    Cic0_t = (double *)(&g[0]);
    emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t,
          &ldb_t, beta1_t, Cic0_t, &ldc_t);
    emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    for (i = 0; i < 2000; i++) {
      g[i] -= y[i] * SD->f2.deltaone[i];
    }

    emlrtPopRtStackR2012b(&e_emlrtRSI, emlrtRootTLSGlobal);
    c_power(SD->f2.dinv, SD->f2.delta);
    b_power(y, y2);
    emlrtPushRtStackR2012b(&f_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    alpha1 = 1.0;
    beta1 = 0.0;
    TRANSB = 'N';
    TRANSA = 'N';
    memset(&b_y[0], 0, 2000U * sizeof(real_T));
    emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    m_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    n_t = (ptrdiff_t)(2);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    k_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    lda_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldb_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldc_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    alpha1_t = (double *)(&alpha1);
    emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    Aia0_t = (double *)(&SD->f2.delta[0]);
    emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    Bib0_t = (double *)(&y2[0]);
    emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    beta1_t = (double *)(&beta1);
    emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    Cic0_t = (double *)(&b_y[0]);
    emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t,
          &ldb_t, beta1_t, Cic0_t, &ldc_t);
    emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    for (i = 0; i < 2000; i++) {
      c_y[i] = 2.0 * y[i];
    }

    emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    alpha1 = 1.0;
    beta1 = 0.0;
    TRANSB = 'N';
    TRANSA = 'N';
    memset(&d_y[0], 0, 2000U * sizeof(real_T));
    emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    m_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    n_t = (ptrdiff_t)(2);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    k_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    lda_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldb_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldc_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    alpha1_t = (double *)(&alpha1);
    emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    Aia0_t = (double *)(&SD->f2.delta[0]);
    emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    Bib0_t = (double *)(&y[0]);
    emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    beta1_t = (double *)(&beta1);
    emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    Cic0_t = (double *)(&d_y[0]);
    emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t,
          &ldb_t, beta1_t, Cic0_t, &ldc_t);
    emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    alpha1 = 1.0;
    beta1 = 0.0;
    TRANSB = 'N';
    TRANSA = 'N';
    for (i = 0; i < 2000; i++) {
      SD->f2.y_old[i] = 1.0;
      e_y[i] = 0.0;
    }

    emlrtPushRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    m_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&l_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    n_t = (ptrdiff_t)(2);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&m_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    k_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&n_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    lda_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&o_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldb_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&p_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    ldc_t = (ptrdiff_t)(1000);
    emlrtPopRtStackR2012b(&x_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&q_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    alpha1_t = (double *)(&alpha1);
    emlrtPopRtStackR2012b(&r_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    Aia0_t = (double *)(&SD->f2.delta[0]);
    emlrtPopRtStackR2012b(&s_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    Bib0_t = (double *)(&SD->f2.y_old[0]);
    emlrtPopRtStackR2012b(&t_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    beta1_t = (double *)(&beta1);
    emlrtPopRtStackR2012b(&u_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    Cic0_t = (double *)(&e_y[0]);
    emlrtPopRtStackR2012b(&v_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPushRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    dgemm(&TRANSA, &TRANSB, &m_t, &n_t, &k_t, alpha1_t, Aia0_t, &lda_t, Bib0_t,
          &ldb_t, beta1_t, Cic0_t, &ldc_t);
    emlrtPopRtStackR2012b(&w_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&k_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&j_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&i_emlrtRSI, emlrtRootTLSGlobal);
    emlrtPopRtStackR2012b(&f_emlrtRSI, emlrtRootTLSGlobal);
    for (i = 0; i < 2000; i++) {
      SD->f2.y_old[i] = ((b_y[i] - SD->f2.deltaone[i]) - c_y[i] * d_y[i]) + y2[i]
        * e_y[i];
      b_g[i] = -g[i];
    }

    b_abs(SD->f2.y_old, y2);
    for (i = 0; i < 2000; i++) {
      SD->f2.deltaone[i] = y2[i];
      SD->f2.y_old[i] = y[i];
    }

    b_rdivide(b_g, SD->f2.deltaone, y2);

    /*  use step-halving procedure to ensure progress is made */
    j = 1;
    b_j = 0;
    exitg2 = FALSE;
    while ((exitg2 == FALSE) && (b_j < 20)) {
      j = b_j + 1;
      for (i = 0; i < 2000; i++) {
        y[i] = SD->f2.y_old[i] + y2[i];
      }

      emlrtPushRtStackR2012b(&g_emlrtRSI, emlrtRootTLSGlobal);
      b_euclid(SD, y, y, SD->f2.d);
      eye(SD->f2.delta);
      for (i = 0; i < 1000000; i++) {
        SD->f2.d[i] += SD->f2.delta[i];
      }

      emlrtPopRtStackR2012b(&g_emlrtRSI, emlrtRootTLSGlobal);
      rdivide(1.0, SD->f2.d, SD->f2.dinv);
      for (i = 0; i < 1000000; i++) {
        SD->f2.b_D[i] = SD->f2.D[i] - SD->f2.d[i];
      }

      power(SD->f2.b_D, SD->f2.delta);
      for (i = 0; i < 1000000; i++) {
        SD->f2.d[i] = SD->f2.delta[i] * SD->f2.Dinv[i];
      }

      d_sum(SD->f2.d, dv1);
      E_new = e_sum(dv1);
      if (E_new < *E) {
        exitg2 = TRUE;
      } else {
        for (i = 0; i < 2000; i++) {
          y2[i] *= 0.5;
        }

        b_j++;
        emlrtBreakCheckFastR2012b(emlrtBreakCheckR2012bFlagVar,
          emlrtRootTLSGlobal);
      }
    }

    /*  bomb out if too many halving steps are required */
    if ((j == 20) || (muDoubleScalarAbs((*E - E_new) / *E) < 1.0E-9)) {
      exitg1 = TRUE;
    } else {
      /*  evaluate termination criterion */
      /*  report progress */
      *E = E_new;
      b_i++;
      emlrtBreakCheckFastR2012b(emlrtBreakCheckR2012bFlagVar, emlrtRootTLSGlobal);
    }
  }

  /*  fiddle stress to match the original Sammon paper */
  *E *= 0.5 / B;
}
Exemple #21
0
/* generate simulated observation data ---------------------------------------*/
static int simobs(gtime_t ts, gtime_t te, double tint, const double *rr,
                  nav_t *nav, obs_t *obs, int opt)
{
    gtime_t time;
    obsd_t data[MAXSAT]={{{0}}};
    double pos[3],rs[3*MAXSAT],dts[MAXSAT],r,e[3],azel[2];
    double ecp[MAXSAT][NFREQ]={{0}},epr[MAXSAT][NFREQ]={{0}};
    double snr[MAXSAT][NFREQ]={{0}},ers[MAXSAT][3]={{0}};
    double iono,trop,fact,cp,pr,dtr=0.0,rref[3],bl;
    int i,j,k,n,ns,amb[MAXSAT][NFREQ]={{0}},sys,prn;
    char s[64];
    
    double pref[]={36.106114294,140.087190410,70.3010}; /* ref station */
    
    trace(3,"simobs:nnav=%d ngnav=%d\n",nav->n,nav->ng);
    
    for (i=0;i<2;i++) pref[i]*=D2R;
    pos2ecef(pref,rref);
    for (i=0;i<3;i++) rref[i]-=rr[i];
    bl=norm(rref,3)/1E4; /* baseline (10km) */
    srand(0);
    
    /* ephemeris error */
    for (i=0;i<MAXSAT;i++) {
        data[i].sat=i+1;
        data[i].P[0]=2E7;
        for (j=0;j<3;j++) ers[i][j]=randn(0.0,erreph);
    }
    srand(tickget());
    
    ecef2pos(rr,pos);
    n=(int)(timediff(te,ts)/tint+1.0);
    
    for (i=0;i<n;i++) {
        time=timeadd(ts,tint*i);
        time2str(time,s,0);
        
        for (j=0;j<MAXSAT;j++) data[j].time=time;
        
        for (j=0;j<3;j++) { /* iteration for pseudorange */
            satpos(time,data,MAXSAT,nav,rs,dts);
            for (k=0;k<MAXSAT;k++) {
                if ((r=geodist(rs+k*3,rr,e))<=0.0) continue;
                data[k].P[0]=r+CLIGHT*(dtr-dts[k]);
            }
        }
        satpos(time,data,MAXSAT,nav,rs,dts);
        for (j=ns=0;j<MAXSAT;j++) {
            
            /* add ephemeris error */
            for (k=0;k<3;k++) rs[k+j*3]+=ers[j][k];
            
            if ((r=geodist(rs+j*3,rr,e))<=0.0) continue;
            satazel(pos,e,azel);
            if (azel[1]<minel*D2R) continue;
            
            iono=ionmodel(time,nav->ion,pos,azel);
            trop=tropmodel(pos,azel,0.3);
            
            /* add ionospheric error */
            iono+=errion*bl*ionmapf(pos,azel);
            
            snrmodel(azel,snr[j]);
            errmodel(azel,snr[j],ecp[j],epr[j]);
            sys=satsys(data[j].sat,&prn);
            
            for (k=0;k<NFREQ;k++) {
                data[j].L[k]=data[j].P[k]=0.0;
                data[j].SNR[k]=0;
                data[j].LLI[k]=0;
                
                if (sys==SYS_GPS) {
                    if (k>=3) continue; /* no L5a/L5b in gps */
                    if (k>=2&&!gpsblock[prn-1]) continue; /* no L5 in block II */
                }
                else if (sys==SYS_GLO) {
                    if (k>=3) continue;
                }
                else if (sys==SYS_GAL) {
                    if (k==1) continue; /* no L2 in galileo */
                }
                else continue;
                
                /* generate observation data */
                fact=lam[k]*lam[k]/lam[0]/lam[0];
                cp=r+CLIGHT*(dtr-dts[j])-fact*iono+trop+ecp[j][k];
                pr=r+CLIGHT*(dtr-dts[j])+fact*iono+trop+epr[j][k];
                
                if (amb[j][k]==0) amb[j][k]=(int)(-cp/lam[k]);
                data[j].L[k]=cp/lam[k]+amb[j][k];
                data[j].P[k]=pr;
                data[j].SNR[k]=(unsigned char)snr[j][k];
                data[j].LLI[k]=data[j].SNR[k]<slipthres?1:0;
            }
            if (obs->nmax<=obs->n) {
                if (obs->nmax==0) obs->nmax=65532; else obs->nmax+=65532;
                if (!(obs->data=(obsd_t *)realloc(obs->data,sizeof(obsd_t)*obs->nmax))) {
                    fprintf(stderr,"malloc error\n");
                    return 0;
                }
            }
            obs->data[obs->n++]=data[j];
            ns++;
        }
        fprintf(stderr,"time=%s nsat=%2d\r",s,ns);
    }
    fprintf(stderr,"\n");
    return 1;
}
Exemple #22
0
// [[Rcpp::export]]
Rcpp::List if2_s(Rcpp::NumericVector data, int T, int N, int NP, double coolrate, Rcpp::NumericVector params) {

	// params will be in the order R0, r, sigma, eta, berr, Sinit, Iinit, Rinit

	Rcpp::NumericMatrix statemeans(T, 3);
	Rcpp::NumericMatrix statedata(NP, 4);

	srand(time(NULL));		// Seed PRNG with system time

	double w[NP]; 			// particle weights

	Particle particles[NP]; 	// particle estimates for current step
	Particle particles_old[NP]; // intermediate particle states for resampling

	printf("Initializing particle states\n");

	// Initialize particle parameter states (seeding)
	// Note that they will all be the same for this code as we are only filtering over the states
	for (int n = 0; n < NP; n++) {

		particles[n].R0 	= params[0];
		particles[n].r 		= params[1];
		particles[n].sigma 	= params[2];
		particles[n].eta 	= params[3];
		particles[n].berr 	= params[4];
		particles[n].Sinit 	= params[5];
		particles[n].Iinit 	= params[6];
		particles[n].Rinit 	= params[7];

	}

	// START PASS THROUGH DATA

	printf("Starting filter\n");
	printf("---------------\n");

	// seed initial states
	for (int n = 0; n < NP; n++) {

		double Iinitcan;

		double i_infec = particles[n].Iinit;

		do {
			Iinitcan = i_infec + i_infec*randn();
		} while (Iinitcan < 0 || N < Iinitcan);

		particles[n].S = N - Iinitcan;
		particles[n].I = Iinitcan;
		particles[n].R = 0.0;

		particles[n].B = (double) particles[n].R0 * particles[n].r / N;

	}

	State sMeans;
	getStateMeans(&sMeans, particles, NP);
	statemeans(0,0) = sMeans.S;
	statemeans(0,1) = sMeans.I;
	statemeans(0,2) = sMeans.R;

	// start run through data
	for (int t = 1; t < T; t++) {

		// generate individual predictions and weight
		for (int n = 0; n < NP; n++) {

			exp_euler_SIR(1.0/7.0, 0.0, 1.0, N, &particles[n]);

			double merr_par = particles[n].sigma;
			double y_diff 	= data[t] - particles[n].I;
			
			w[n] = 1.0/(merr_par*sqrt(2.0*M_PI)) * exp( - y_diff*y_diff / (2.0*merr_par*merr_par) );

		}

		// cumulative sum
		for (int n = 1; n < NP; n++) {
			w[n] += w[n-1];
		}

		// save particle states to resample from
		for (int n = 0; n < NP; n++){
			copyParticle(&particles_old[n], &particles[n]);
		}

		// resampling
		for (int n = 0; n < NP; n++) {

			double w_r = randu() * w[NP-1];
			int i = 0;
			while (w_r > w[i]) {
				i++;
			}

			// i is now the index to copy state from
			copyParticle(&particles[n], &particles_old[i]);

		}
		
		State sMeans;
		getStateMeans(&sMeans, particles, NP);
		statemeans(t,0) = sMeans.S;
		statemeans(t,1) = sMeans.I;
		statemeans(t,2) = sMeans.R;

	}

	// Get particle results to pass back to R
	for (int n = 0; n < NP; n++) {
		statedata(n, 0) = particles[n].S;
		statedata(n, 1) = particles[n].I;
		statedata(n, 2) = particles[n].R;
		statedata(n, 3) = particles[n].B;
	}



	return Rcpp::List::create(	Rcpp::Named("statemeans") = statemeans,
                             	Rcpp::Named("statedata") = statedata 	);

}
Exemple #23
0
/**
   Time domain physical simulation.
   
   noisy: 
   - 0: no noise at all; 
   - 1: poisson and read out noise. 
   - 2: only poisson noise.   
*/
dmat *skysim_sim(dmat **mresout, const dmat *mideal, const dmat *mideal_oa, double ngsol, 
		 ASTER_S *aster, const POWFS_S *powfs, 
		 const PARMS_S *parms, int idtratc, int noisy, int phystart){
    int dtratc=0;
    if(!parms->skyc.multirate){
	dtratc=parms->skyc.dtrats->p[idtratc];
    }
    int hasphy;
    if(phystart>-1 && phystart<aster->nstep){
	hasphy=1;
    }else{
	hasphy=0;
    }
    const int nmod=mideal->nx;
    dmat *res=dnew(6,1);/*Results. 1-2: NGS and TT modes., 
			  3-4:On axis NGS and TT modes,
			  4-6: On axis NGS and TT wihtout considering un-orthogonality.*/
    dmat *mreal=NULL;/*modal correction at this step. */
    dmat *merr=dnew(nmod,1);/*modal error */
    dcell *merrm=dcellnew(1,1);dcell *pmerrm=NULL;
    const int nstep=aster->nstep?aster->nstep:parms->maos.nstep;
    dmat *mres=dnew(nmod,nstep);
    dmat* rnefs=parms->skyc.rnefs;
    dcell *zgradc=dcellnew3(aster->nwfs, 1, aster->ngs, 0);
    dcell *gradout=dcellnew3(aster->nwfs, 1, aster->ngs, 0);
    dmat *gradsave=0;
    if(parms->skyc.dbg){
	gradsave=dnew(aster->tsa*2,nstep);
    }
   
    
    SERVO_T *st2t=0;
    kalman_t *kalman=0;
    dcell *mpsol=0;
    dmat *pgm=0;
    dmat *dtrats=0;
    int multirate=parms->skyc.multirate;
    if(multirate){
	kalman=aster->kalman[0];
	dtrats=aster->dtrats;
    }else{
	if(parms->skyc.servo>0){
	    const double dtngs=parms->maos.dt*dtratc;
	    st2t=servo_new(merrm, NULL, 0, dtngs, aster->gain->p[idtratc]);
	    pgm=aster->pgm->p[idtratc];
	}else{
	    kalman=aster->kalman[idtratc];
	}
    }
    if(kalman){
	kalman_init(kalman);
	mpsol=dcellnew(aster->nwfs, 1); //for psol grad.
    }
    const long nwvl=parms->maos.nwvl;
    dcell **psf=0, **mtche=0, **ints=0;
    ccell *wvf=0,*wvfc=0, *otf=0;
    if(hasphy){
	psf=mycalloc(aster->nwfs,dcell*);
	wvf=ccellnew(aster->nwfs,1);
	wvfc=ccellnew(aster->nwfs,1);
	mtche=mycalloc(aster->nwfs,dcell*);
	ints=mycalloc(aster->nwfs,dcell*);
	otf=ccellnew(aster->nwfs,1);
    
	for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
	    const int ipowfs=aster->wfs[iwfs].ipowfs;
	    const long ncomp=parms->maos.ncomp[ipowfs];
	    const long nsa=parms->maos.nsa[ipowfs];
	    wvf->p[iwfs]=cnew(ncomp,ncomp);
	    wvfc->p[iwfs]=NULL;
	    psf[iwfs]=dcellnew(nsa,nwvl);
	    //cfft2plan(wvf->p[iwfs], -1);
	    if(parms->skyc.multirate){
		mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[(int)aster->idtrats->p[iwfs]];
	    }else{
		mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[idtratc];
	    }
	    otf->p[iwfs]=cnew(ncomp,ncomp);
	    //cfft2plan(otf->p[iwfs],-1);
	    //cfft2plan(otf->p[iwfs],1);
	    ints[iwfs]=dcellnew(nsa,1);
	    int pixpsa=parms->skyc.pixpsa[ipowfs];
	    for(long isa=0; isa<nsa; isa++){
		ints[iwfs]->p[isa]=dnew(pixpsa,pixpsa);
	    }
	}
    }
    for(int irep=0; irep<parms->skyc.navg; irep++){
	if(kalman){
	    kalman_init(kalman);
	}else{
	    servo_reset(st2t);
	}
	dcellzero(zgradc);
	dcellzero(gradout);
	if(ints){
	    for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		dcellzero(ints[iwfs]);
	    }
	}
	for(int istep=0; istep<nstep; istep++){
	    memcpy(merr->p, PCOL(mideal,istep), nmod*sizeof(double));
	    dadd(&merr, 1, mreal, -1);/*form NGS mode error; */
	    memcpy(PCOL(mres,istep),merr->p,sizeof(double)*nmod);
	    if(mpsol){//collect averaged modes for PSOL.
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    dadd(&mpsol->p[iwfs], 1, mreal, 1);
		}
	    }
	    pmerrm=0;
	    if(istep>=parms->skyc.evlstart){/*performance evaluation*/
		double res_ngs=dwdot(merr->p,parms->maos.mcc,merr->p);
		if(res_ngs>ngsol*100){
		    dfree(res); res=NULL;
		    break;
		}
		{
		    res->p[0]+=res_ngs;
		    res->p[1]+=dwdot2(merr->p,parms->maos.mcc_tt,merr->p);
		    double dot_oa=dwdot(merr->p, parms->maos.mcc_oa, merr->p);
		    double dot_res_ideal=dwdot(merr->p, parms->maos.mcc_oa, PCOL(mideal,istep));
		    double dot_res_oa=0;
		    for(int imod=0; imod<nmod; imod++){
			dot_res_oa+=merr->p[imod]*IND(mideal_oa,imod,istep);
		    }
		    res->p[2]+=dot_oa-2*dot_res_ideal+2*dot_res_oa;
		    res->p[4]+=dot_oa;
		}
		{
		    double dot_oa_tt=dwdot2(merr->p, parms->maos.mcc_oa_tt, merr->p);
		    /*Notice that mcc_oa_tt2 is 2x5 marix. */
		    double dot_res_ideal_tt=dwdot(merr->p, parms->maos.mcc_oa_tt2, PCOL(mideal,istep));
		    double dot_res_oa_tt=0;
		    for(int imod=0; imod<2; imod++){
			dot_res_oa_tt+=merr->p[imod]*IND(mideal_oa,imod,istep);
		    }
		    res->p[3]+=dot_oa_tt-2*dot_res_ideal_tt+2*dot_res_oa_tt;
		    res->p[5]+=dot_oa_tt;
		}
	    }//if evl

	    if(istep<phystart || phystart<0){
		/*Ztilt, noise free simulation for acquisition. */
		dmm(&zgradc->m, 1, aster->gm, merr, "nn", 1);/*grad due to residual NGS mode. */
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    const int ipowfs=aster->wfs[iwfs].ipowfs;
		    const long ng=parms->maos.nsa[ipowfs]*2;
		    for(long ig=0; ig<ng; ig++){
			zgradc->p[iwfs]->p[ig]+=aster->wfs[iwfs].ztiltout->p[istep*ng+ig];
		    }
		}
	
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc);
		    if((istep+1) % dtrati==0){
			dadd(&gradout->p[iwfs], 0, zgradc->p[iwfs], 1./dtrati);
			dzero(zgradc->p[iwfs]);
			if(noisy){
			    int idtrati=(multirate?(int)aster->idtrats->p[iwfs]:idtratc);
			    dmat *nea=aster->wfs[iwfs].pistat->sanea->p[idtrati];
			    for(int i=0; i<nea->nx; i++){
				gradout->p[iwfs]->p[i]+=nea->p[i]*randn(&aster->rand);
			    }
			}
			pmerrm=merrm;//record output.
		    }
		}
	    }else{
		/*Accumulate PSF intensities*/
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    const double thetax=aster->wfs[iwfs].thetax;
		    const double thetay=aster->wfs[iwfs].thetay;
		    const int ipowfs=aster->wfs[iwfs].ipowfs;
		    const long nsa=parms->maos.nsa[ipowfs];
		    ccell* wvfout=aster->wfs[iwfs].wvfout[istep];
		    for(long iwvl=0; iwvl<nwvl; iwvl++){
			double wvl=parms->maos.wvl[iwvl];
			for(long isa=0; isa<nsa; isa++){
			    ccp(&wvfc->p[iwfs], IND(wvfout,isa,iwvl));
			    /*Apply NGS mode error to PSF. */
			    ngsmod2wvf(wvfc->p[iwfs], wvl, merr, powfs+ipowfs, isa,
				       thetax, thetay, parms);
			    cembedc(wvf->p[iwfs],wvfc->p[iwfs],0,C_FULL);
			    cfft2(wvf->p[iwfs],-1);
			    /*peak in corner. */
			    cabs22d(&psf[iwfs]->p[isa+nsa*iwvl], 1., wvf->p[iwfs], 1.);
			}/*isa */
		    }/*iwvl */
		}/*iwfs */
	
		/*Form detector image from accumulated PSFs*/
		double igrad[2];
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=dtratc, idtrat=idtratc;
		    if(multirate){//multirate
			idtrat=aster->idtrats->p[iwfs];
			dtrati=dtrats->p[iwfs];
		    }
		    if((istep+1) % dtrati == 0){/*has output */
			dcellzero(ints[iwfs]);
			const int ipowfs=aster->wfs[iwfs].ipowfs;
			const long nsa=parms->maos.nsa[ipowfs];
			for(long isa=0; isa<nsa; isa++){
			    for(long iwvl=0; iwvl<nwvl; iwvl++){
				double siglev=aster->wfs[iwfs].siglev->p[iwvl];
				ccpd(&otf->p[iwfs],psf[iwfs]->p[isa+nsa*iwvl]);
				cfft2i(otf->p[iwfs], 1); /*turn to OTF, peak in corner */
				ccwm(otf->p[iwfs], powfs[ipowfs].dtf[iwvl].nominal);
				cfft2(otf->p[iwfs], -1);
				dspmulcreal(ints[iwfs]->p[isa]->p, powfs[ipowfs].dtf[iwvl].si, 
					   otf->p[iwfs]->p, siglev);
			    }
		
			    /*Add noise and apply matched filter. */
#if _OPENMP >= 200805 
#pragma omp critical 
#endif
			    switch(noisy){
			    case 0:/*no noise at all. */
				break;
			    case 1:/*both poisson and read out noise. */
				{
				    double bkgrnd=aster->wfs[iwfs].bkgrnd*dtrati;
				    addnoise(ints[iwfs]->p[isa], &aster->rand, bkgrnd, bkgrnd, 0,0,IND(rnefs,idtrat,ipowfs));
				}
				break;
			    case 2:/*there is still poisson noise. */
				addnoise(ints[iwfs]->p[isa], &aster->rand, 0, 0, 0,0,0);
				break;
			    default:
				error("Invalid noisy\n");
			    }
		
			    igrad[0]=0;
			    igrad[1]=0;
			    double pixtheta=parms->skyc.pixtheta[ipowfs];
			    if(parms->skyc.mtch){
				dmulvec(igrad, mtche[iwfs]->p[isa], ints[iwfs]->p[isa]->p, 1);
			    }
			    if(!parms->skyc.mtch || fabs(igrad[0])>pixtheta || fabs(igrad[1])>pixtheta){
				if(!parms->skyc.mtch){
				    warning2("fall back to cog\n");
				}else{
				    warning_once("mtch is out of range\n");
				}
				dcog(igrad, ints[iwfs]->p[isa], 0, 0, 0, 3*IND(rnefs,idtrat,ipowfs), 0); 
				igrad[0]*=pixtheta;
				igrad[1]*=pixtheta;
			    }
			    gradout->p[iwfs]->p[isa]=igrad[0];
			    gradout->p[iwfs]->p[isa+nsa]=igrad[1];
			}/*isa */
			pmerrm=merrm;
			dcellzero(psf[iwfs]);/*reset accumulation.*/
		    }/*if iwfs has output*/
		}/*for wfs*/
	    }/*if phystart */
	    //output to mreal after using it to ensure two cycle delay.
	    if(st2t){//Type I or II control.
		if(st2t->mint->p[0]){//has output.
		    dcp(&mreal, st2t->mint->p[0]->p[0]);
		}
	    }else{//LQG control
		kalman_output(kalman, &mreal, 0, 1);
	    }
	    if(kalman){//LQG control
		int indk=0;
		//Form PSOL grads and obtain index to LQG M
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc);
		    if((istep+1) % dtrati==0){
			indk|=1<<iwfs;
			dmm(&gradout->p[iwfs], 1, aster->g->p[iwfs], mpsol->p[iwfs], "nn", 1./dtrati);
			dzero(mpsol->p[iwfs]);
		    }
		}
		if(indk){
		    kalman_update(kalman, gradout->m, indk-1);
		}
	    }else if(st2t){
		if(pmerrm){
		    dmm(&merrm->p[0], 0, pgm, gradout->m, "nn", 1);	
		}
		servo_filter(st2t, pmerrm);//do even if merrm is zero. to simulate additional latency
	    }
	    if(parms->skyc.dbg){
		memcpy(PCOL(gradsave, istep), gradout->m->p, sizeof(double)*gradsave->nx);
	    }
	}/*istep; */
    }
    if(parms->skyc.dbg){
	int dtrati=(multirate?(int)dtrats->p[0]:dtratc);
	writebin(gradsave,"%s/skysim_grads_aster%d_dtrat%d",dirsetup, aster->iaster,dtrati);
	writebin(mres,"%s/skysim_sim_mres_aster%d_dtrat%d",dirsetup,aster->iaster,dtrati);
    }
  
    dfree(mreal);
    dcellfree(mpsol);
    dfree(merr);
    dcellfree(merrm);
    dcellfree(zgradc);
    dcellfree(gradout);
    dfree(gradsave);
    if(hasphy){
	dcellfreearr(psf, aster->nwfs);
	dcellfreearr(ints, aster->nwfs);
        ccellfree(wvf);
	ccellfree(wvfc);
	ccellfree(otf);
	free(mtche);
    }
    servo_free(st2t);
    /*dfree(mres); */
    if(mresout) {
	*mresout=mres;
    }else{
	dfree(mres);
    }
    dscale(res, 1./((nstep-parms->skyc.evlstart)*parms->skyc.navg));
    return res;
}
Exemple #24
0
int doAllFeatures() {
    /* Initial weights */
    int i, j, h;
    for (j=0; j<NMOVIES; j++) {
        for (i=0; i<TOTAL_FEATURES; i++) {
            vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution
        }
    }

    /* Initial biases */
    for(i=0;i<TOTAL_FEATURES;i++) {
        hidbiases[i]=0.0;
    }
    for (j=0; j<NMOVIES; j++) {
        unsigned int mtot = moviercount[j*SOFTMAX+0] + moviercount[j*SOFTMAX+1] + moviercount[j*SOFTMAX+2] + moviercount[j*SOFTMAX+3] + moviercount[j*SOFTMAX+4];
        for (i=0; i<SOFTMAX; i++) {
            visbiases[j][i] = log( ((double)moviercount[j*SOFTMAX+i]) / ((double) mtot) );
        }
    }

    /* Optimize current feature */
    double nrmse=2., last_rmse=10.;
    double prmse = 0, last_prmse=0;
    double s;
    int n;
    int loopcount=0;
    double EpsilonW  = epsilonw;
    double EpsilonVB = epsilonvb;
    double EpsilonHB = epsilonhb;
    double Momentum  = momentum;
    ZERO(CDinc);
    ZERO(visbiasinc);
    ZERO(hidbiasinc);
    int tSteps = 1;

    // Iterate through the model while the RMSE is decreasing 
    //while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80  )  {
    while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80  )  {

        if ( loopcount >= 10 )
            tSteps = 3 + (loopcount - 10)/5;

        last_rmse=nrmse;
        last_prmse=prmse;
        clock_t t0=clock();
        loopcount++;
        int ntrain = 0;
        nrmse = 0.0;
        s  = 0.0;
        n = 0;

        if ( loopcount > 5 )
            Momentum = finalmomentum;

        //* CDpos =0, CDneg=0 (matrices)
        ZERO(CDpos);
        ZERO(CDneg);
        ZERO(poshidact);
        ZERO(neghidact);
        ZERO(posvisact);
        ZERO(negvisact);
        ZERO(moviecount);

        int u,m, f;
        for(u=0;u<NUSERS;u++) {

            //* Clear summations for probabilities
            ZERO(negvisprobs);
            ZERO(nvp2);

            //* perform steps 1 to 8
            int base0=useridx[u][0];
            int d0=UNTRAIN(u);
            int dall=UNALL(u);

            // For all rated movies, accumulate contributions to hidden units
            double sumW[TOTAL_FEATURES];
            ZERO(sumW);
            for(j=0;j<d0;j++) {
                int m=userent[base0+j]&USER_MOVIEMASK;
                moviecount[m]++;

                // 1. get one data point from data set.
                // 2. use values of this data point to set state of visible neurons Si
                int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;

                // Add to the bias contribution for set visible units
                posvisact[m][r] += 1.0;
 
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    // sum_j(W[i][j] * v[0][j]))
                    sumW[h]  += vishid[m][r][h];
                }
            }

            // Sample the hidden units state after computing probabilities
            for(h=0;h<TOTAL_FEATURES;h++) {

                // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si
                // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases);
                // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j]))
                poshidprobs[h]  = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h]));

                // sample h[0][i] from Q(h[0][i] = 1 | v[0])
                if  ( poshidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
                    poshidstates[h]=1;
                    poshidact[h] += 1.0;
                } else {
                    poshidstates[h]=0;
                }
            }

            // Load up a copy of poshidstates for use in loop
            for ( h=0; h < TOTAL_FEATURES; h++ ) 
                curposhidstates[h] = poshidstates[h];

            // Make T Contrastive Divergence steps
            int stepT = 0;
            do {
                // Determine if this is the last pass through this loop
                int finalTStep = (stepT+1 >= tSteps);
                
                // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction
                // for all visible units j:
                int r;
                int count = d0;
                count += useridx[u][2];  // too compute probe errors
                for(j=0;j<count;j++) {
                    int m=userent[base0+j]&USER_MOVIEMASK;
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        // Accumulate Weight values for sampled hidden states == 1
                        if ( curposhidstates[h] == 1 ) {
                            for(r=0;r<SOFTMAX;r++) {
                                negvisprobs[m][r]  += vishid[m][r][h];
                            }
                        }

                        // Compute more accurate probabilites for RMSE reporting
                        if ( stepT == 0 ) {  
                            for(r=0;r<SOFTMAX;r++) 
                                nvp2[m][r] += poshidprobs[h] * vishid[m][r][h];
                        }
                    }

                    // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i]))
                    // Softmax elements are handled individually here
                    negvisprobs[m][0]  = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0]));
                    negvisprobs[m][1]  = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1]));
                    negvisprobs[m][2]  = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2]));
                    negvisprobs[m][3]  = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3]));
                    negvisprobs[m][4]  = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4]));

                    // Normalize probabilities
                    double tsum  = 
                      negvisprobs[m][0] +
                      negvisprobs[m][1] +
                      negvisprobs[m][2] +
                      negvisprobs[m][3] +
                      negvisprobs[m][4];
                    if ( tsum != 0 ) {
                        negvisprobs[m][0]  /= tsum;
                        negvisprobs[m][1]  /= tsum;
                        negvisprobs[m][2]  /= tsum;
                        negvisprobs[m][3]  /= tsum;
                        negvisprobs[m][4]  /= tsum;
                    }
                    // Compute and Normalize more accurate RMSE reporting probabilities
                    if ( stepT == 0) {
                        nvp2[m][0]  = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0]));
                        nvp2[m][1]  = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1]));
                        nvp2[m][2]  = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2]));
                        nvp2[m][3]  = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3]));
                        nvp2[m][4]  = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4]));
                        double tsum2  = 
                          nvp2[m][0] +
                          nvp2[m][1] +
                          nvp2[m][2] +
                          nvp2[m][3] +
                          nvp2[m][4];
                        if ( tsum2 != 0 ) {
                            nvp2[m][0]  /= tsum2;
                            nvp2[m][1]  /= tsum2;
                            nvp2[m][2]  /= tsum2;
                            nvp2[m][3]  /= tsum2;
                            nvp2[m][4]  /= tsum2;
                        }
                    }

                    // sample v[1][j] from P(v[1][j] = 1 | h[0])
                    double randval = (rand()/(double)(RAND_MAX));
                    if ( (randval -= negvisprobs[m][0]) <= 0.0 )
                        negvissoftmax[m] = 0;
                    else if ( (randval -= negvisprobs[m][1]) <= 0.0 )
                        negvissoftmax[m] = 1;
                    else if ( (randval -= negvisprobs[m][2]) <= 0.0 )
                        negvissoftmax[m] = 2;
                    else if ( (randval -= negvisprobs[m][3]) <= 0.0 )
                        negvissoftmax[m] = 3;
                    else //if ( (randval -= negvisprobs[m][4]) <= 0.0 )
                        negvissoftmax[m] = 4;

                    // if in training data then train on it
                    if ( j < d0 && finalTStep )  
                        negvisact[m][negvissoftmax[m]] += 1.0;
                }


                // 6. compute state of hidden neurons Sj again using Si from 5 step.
                // For all rated movies accumulate contributions to hidden units from sampled visible units
                ZERO(sumW);
                for(j=0;j<d0;j++) {
                    int m=userent[base0+j]&USER_MOVIEMASK;
     
                    // for all hidden units h:
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        sumW[h]  += vishid[m][negvissoftmax[m]][h];
                    }
                }
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    // compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j]))
                    neghidprobs[h]  = 1./(1 + exp(-sumW[h] - hidbiases[h]));

                    // Sample the hidden units state again.
                    if  ( neghidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
                        neghidstates[h]=1;
                        if ( finalTStep )
                            neghidact[h] += 1.0;
                    } else {
                        neghidstates[h]=0;
                    }
                }

                // Compute error rmse and prmse before we start iterating on T
                if ( stepT == 0 ) {

                    // Compute rmse on training data
                    for(j=0;j<d0;j++) {
                        int m=userent[base0+j]&USER_MOVIEMASK;
                        int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
         
                        //# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
                        double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
                        double vdelta = (((double)r)-expectedV);
                        nrmse += (vdelta * vdelta);
                    }
                    ntrain+=d0;

                    // Sum up probe rmse
                    int base=useridx[u][0];
                    for(i=1;i<2;i++) base+=useridx[u][i];
                    int d=useridx[u][2];
                    for(i=0; i<d;i++) {
                        int m=userent[base+i]&USER_MOVIEMASK;
                        int r=(userent[base+i]>>USER_LMOVIEMASK)&7;
                        //# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
                        double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
                        double vdelta = (((double)r)-expectedV);
                        s+=vdelta*vdelta;
                    }
                    n+=d;
                }

                // If looping again, load the curposvisstates
                if ( !finalTStep ) {
                    for ( h=0; h < TOTAL_FEATURES; h++ ) 
                        curposhidstates[h] = neghidstates[h];
                    ZERO(negvisprobs);
                }

              // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can 
              //    increase with learning steps to achieve better accuracy.

            } while ( ++stepT < tSteps );

            // Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T
            for(j=0;j<d0;j++) {
                int m=userent[base0+j]&USER_MOVIEMASK;
                int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
 
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    if ( poshidstates[h] == 1 ) {
                        // 4. now Si and Sj values can be used to compute (Si.Sj)0  here () means just values not average
                        //* accumulate CDpos = CDpos + (Si.Sj)0
                        CDpos[m][r][h] += 1.0;
                    }

                    // 7. now use Si and Sj to compute (Si.Sj)1 (fig.3)
                    CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h];
                }
            }

            // Update weights and biases after batch
            //
            int bsize = 100;
            if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) {
                int numcases = u % bsize;
                numcases++;

                // Update weights
                for(m=0;m<NMOVIES;m++) {
                    if ( moviecount[m] == 0 ) continue;

                    // for all hidden units h:
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        // for all softmax
                        int rr;
                        for(rr=0;rr<SOFTMAX;rr++) {
                            //# At the end compute average of CDpos and CDneg by dividing them by number of data points.
                            //# Compute CD = < Si.Sj >0  < Si.Sj >n = CDpos  CDneg
                            double CDp = CDpos[m][rr][h];
                            double CDn = CDneg[m][rr][h];
                            if ( CDp != 0.0 || CDn != 0.0 ) {
                                CDp /= ((double)moviecount[m]);
                                CDn /= ((double)moviecount[m]);

                                // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]')
                                //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0)
                                //e.g between data and reconstruction.
                                CDinc[m][rr][h] = Momentum * CDinc[m][rr][h] + EpsilonW * ((CDp - CDn) - weightcost * vishid[m][rr][h]);
                                vishid[m][rr][h] += CDinc[m][rr][h];
                            } 
                        }
                    }

                    // Update visible softmax biases
                    // c += epsilon * (v[0] - v[1])$
                    // for all softmax
                    int rr;
                    for(rr=0;rr<SOFTMAX;rr++) {
                        if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) {
                            posvisact[m][rr] /= ((double)moviecount[m]);
                            negvisact[m][rr] /= ((double)moviecount[m]);
                            visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]));
                            //visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]);
                            visbiases[m][rr]  += visbiasinc[m][rr];
                        }
                    }
                }

                
                // Update hidden biases
                // b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1]))
                for(h=0;h<TOTAL_FEATURES;h++) {
                    if ( poshidact[h]  != 0.0 || neghidact[h]  != 0.0 ) {
                        poshidact[h]  /= ((double)(numcases));
                        neghidact[h]  /= ((double)(numcases));
                        hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]));
                        //hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]);
                        hidbiases[h]  += hidbiasinc[h];
                    }
                }
                ZERO(CDpos);
                ZERO(CDneg);
                ZERO(poshidact);
                ZERO(neghidact);
                ZERO(posvisact);
                ZERO(negvisact);
                ZERO(moviecount);
            }
        }

        nrmse=sqrt(nrmse/ntrain);
        prmse = sqrt(s/n);
        
        printf("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);

        if ( TOTAL_FEATURES == 200 ) {
            if ( loopcount > 6 ) {
                EpsilonW  *= 0.90;
                EpsilonVB *= 0.90;
                EpsilonHB *= 0.90;
            } else if ( loopcount > 5 ) {  // With 200 hidden variables, you need to slow things down a little more
                EpsilonW  *= 0.50;         // This could probably use some more optimization
                EpsilonVB *= 0.50;
                EpsilonHB *= 0.50;
            } else if ( loopcount > 2 ) {
                EpsilonW  *= 0.70;
                EpsilonVB *= 0.70;
                EpsilonHB *= 0.70;
            }
        } else {  // The 100 hidden variable case
            if ( loopcount > 8 ) {
                EpsilonW  *= 0.92;
                EpsilonVB *= 0.92;
                EpsilonHB *= 0.92;
            } else if ( loopcount > 6 ) {
                EpsilonW  *= 0.90;
                EpsilonVB *= 0.90;
                EpsilonHB *= 0.90;
            } else if ( loopcount > 2 ) {
                EpsilonW  *= 0.78;
                EpsilonVB *= 0.78;
                EpsilonHB *= 0.78;
            }
        }
    }
    
    /* Perform a final iteration in which the errors are clipped and stored */
    recordErrors();
    
    //if(save_model) {
        //dappend_bin(fnameV,sV,NMOVIES);
        //dappend_bin(fnameU,sU,NUSERS);
    //}
    
    return 1;
}
/**
* The implementation of the particle filter using OpenMP for many frames
* @see http://openmp.org/wp/
* @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the target matrix and the x and y arrays as arguments and returns the likelihoods
* @param I The video to be run
* @param IszX The x dimension of the video
* @param IszY The y dimension of the video
* @param Nfr The number of frames
* @param seed The seed array used for random number generation
* @param Nparticles The number of particles to be used
* @param x_loc The array that will store the x locations of the desired object
* @param y_loc The array that will store the y locations of the desired object
* @param xe The starting x position of the object
* @param ye The starting y position of the object
* @param target The matrix containing the offsets to be used in the likelihood function
*/
void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles, double * x_loc, double * y_loc, double xe, double ye, mxArray * target){
	int x, y, max_size;
	double * weights, *likelihood, *arrayX, *arrayY, *xj, *yj, *CDF, *u;
	max_size = IszX*IszY*Nfr;
	
	/*original particle centroid*/
	x_loc[0] = xe;
	y_loc[0] = ye;

	/*initial weights are all equal (1/Nparticles)*/
	weights = (double *)malloc(sizeof(double)*Nparticles);
	for(x = 0; x < Nparticles; x++){
		weights[x] = 1/((double)(Nparticles));
	}
	
	/*initial likelihood to 0.0*/
	likelihood = (double *)mxCalloc(Nparticles, sizeof(double));
	arrayX = (double *)mxCalloc(Nparticles, sizeof(double));
	arrayY = (double *)mxCalloc(Nparticles, sizeof(double));
	xj = (double *)mxCalloc(Nparticles, sizeof(double));
	yj = (double *)mxCalloc(Nparticles, sizeof(double));
	CDF = (double *)mxCalloc(Nparticles, sizeof(double));
	u = (double *)mxCalloc(Nparticles, sizeof(double));
	mxArray * arguments[4];
	mxArray * mxIK = mxCreateDoubleMatrix(IszX, IszY, mxREAL);
	mxArray * mxX = mxCreateDoubleMatrix(1, Nparticles, mxREAL);
	mxArray * mxY = mxCreateDoubleMatrix(1, Nparticles, mxREAL);
	double * Ik = (double *)mxCalloc(IszX*IszY, sizeof(double));
	mxArray * result = mxCreateDoubleMatrix(1, Nparticles, mxREAL);

	#pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x)
	for(x = 0; x < Nparticles; x++){
		arrayX[x] = xe;
		arrayY[x] = ye;
	}
	int k;	
	int indX, indY;
	//speed
	double speed = .01;
	
	for(k = 1; k < Nfr; k++){
		/*apply motion model
		//draws sample from motion model (random walk). The only prior information
		//is that the object moves 2x as fast as in the y direction*/
		#pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x)
		for(x = 0; x < Nparticles; x++){
			arrayX[x] += speed + 2*randn(seed, x);
			arrayY[x] += 2*randn(seed, x);
			if(arrayX[x] > IszX)
				arrayX = IszX;
			if(arrayY[x] > IszY)
				arrayY = IszY;
			if(arrayX[x] < 1)
				arrayX[x] = 1;
			if(arrayY[x] < 1)
				arrayY[x] = 1;
		}	

		//get the current image
		for(x = 0; x < IszX; x++)
		{
			for(y = 0; y < IszY; y++)
			{
				Ik[x*IszX + y] = (double)I[k*IszX*IszY + x*IszY + y];
			}
		}
		//copy arguments
		memcpy(mxGetPr(mxIK), Ik, sizeof(double)*IszX*IszY);
		memcpy(mxGetPr(mxX), arrayX, sizeof(double)*Nparticles);
		memcpy(mxGetPr(mxY), arrayY, sizeof(double)*Nparticles);
		arguments[0] = mxIK;
		arguments[1] = target;
		arguments[2] = mxX;
		arguments[3] = mxY;
		mexCallMATLAB(1, &result, 4, arguments, "GetLikelihood");
		memcpy(likelihood, result, sizeof(double)*Nparticles);

		/* update & normalize weights
		// using equation (63) of Arulampalam Tutorial*/
		#pragma omp parallel for shared(Nparticles, weights, likelihood) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x] * exp(likelihood[x]);
		}

		double sumWeights = 0;
		#pragma omp parallel for shared(weights) private(x) reduction(+:sumWeights)
		for(x = 0; x < Nparticles; x++){
			sumWeights += weights[x];
		}
		
		#pragma omp parallel for shared(sumWeights, weights) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x]/sumWeights;
		}

		xe = 0;
		ye = 0;
		/* estimate the object location by expected values*/
		#pragma omp parallel for private(x) reduction(+:xe, ye)
		for(x = 0; x < Nparticles; x++){
			xe += arrayX[x] * weights[x];
			ye += arrayY[x] * weights[x];
		}
		x_loc[k] = xe;
		y_loc[k] = ye;
		/*display(hold off for now)
		
		//pause(hold off for now)
		
		//resampling*/
		
		
		CDF[0] = weights[0];
		for(x = 1; x < Nparticles; x++){
			CDF[x] = weights[x] + CDF[x-1];
		}

		double u1 = (1/((double)(Nparticles)))*randu(seed, 0);
		
		#pragma omp parallel for shared(u, u1, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			u[x] = u1 + x/((double)(Nparticles));
		}

		int j, i;
		#pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j)
		for(j = 0; j < Nparticles; j++){
			i = findIndex(CDF, Nparticles, u[j]);
			/*i = findIndexBin(CDF, 0, Nparticles, u[j]);*/
			if(i == -1)
			i = Nparticles-1;
			xj[j] = arrayX[i];
			yj[j] = arrayY[i];
			
		}

		/*reassign arrayX and arrayY*/
		#pragma omp parallel for shared(weights, arrayX, arrayY, xj, yj, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = 1/((double)(Nparticles));
			arrayX[x] = xj[x];
			arrayY[x] = yj[x];
		}
	}
	mxFree(weights);	
	mxFree(likelihood);
	mxFree(arrayX);
	mxFree(arrayY);
	mxFree(CDF);
	mxFree(u);
	mxFree(xj);
	mxFree(yj);
	mxFree(Ik);
}
/**
* The implementation of the particle filter using OpenMP for many frames
* @see http://openmp.org/wp/
* @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods
* @param I The video to be run
* @param IszX The x dimension of the video
* @param IszY The y dimension of the video
* @param Nfr The number of frames
* @param seed The seed array used for random number generation
* @param Nparticles The number of particles to be used
*/
void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){
	
	int max_size = IszX*IszY*Nfr;
	long long start = get_time();
	//original particle centroid
	double xe = roundDouble(IszY/2.0);
	double ye = roundDouble(IszX/2.0);
	
	//expected object locations, compared to center
	int radius = 5;
	int diameter = radius*2 - 1;
	int * disk = (int *)malloc(diameter*diameter*sizeof(int));
	strelDisk(disk, radius);
	int countOnes = 0;
	int x, y;
	for(x = 0; x < diameter; x++){
		for(y = 0; y < diameter; y++){
			if(disk[x*diameter + y] == 1)
				countOnes++;
		}
	}
	double * objxy = (double *)malloc(countOnes*2*sizeof(double));
	getneighbors(disk, countOnes, objxy, radius);
	
	long long get_neighbors = get_time();
	printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors));
	//initial weights are all equal (1/Nparticles)
	double * weights = (double *)malloc(sizeof(double)*Nparticles);
	#pragma omp parallel for shared(weights, Nparticles) private(x)
	for(x = 0; x < Nparticles; x++){
		weights[x] = 1/((double)(Nparticles));
	}
	long long get_weights = get_time();
	printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights));
	//initial likelihood to 0.0
	double * likelihood = (double *)malloc(sizeof(double)*Nparticles);
	double * arrayX = (double *)malloc(sizeof(double)*Nparticles);
	double * arrayY = (double *)malloc(sizeof(double)*Nparticles);
	double * xj = (double *)malloc(sizeof(double)*Nparticles);
	double * yj = (double *)malloc(sizeof(double)*Nparticles);
	double * CDF = (double *)malloc(sizeof(double)*Nparticles);
	double * u = (double *)malloc(sizeof(double)*Nparticles);
	int * ind = (int*)malloc(sizeof(int)*countOnes*Nparticles);
	#pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x)
	for(x = 0; x < Nparticles; x++){
		arrayX[x] = xe;
		arrayY[x] = ye;
	}
	int k;
	
	printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, get_time()));
	int indX, indY;
	for(k = 1; k < Nfr; k++){
		long long set_arrays = get_time();
		//apply motion model
		//draws sample from motion model (random walk). The only prior information
		//is that the object moves 2x as fast as in the y direction
		#pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x)
		for(x = 0; x < Nparticles; x++){
			arrayX[x] += 1 + 5*randn(seed, x);
			arrayY[x] += -2 + 2*randn(seed, x);
		}
		long long error = get_time();
		printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error));
		//particle filter likelihood
		#pragma omp parallel for shared(likelihood, I, arrayX, arrayY, objxy, ind) private(x, y, indX, indY)
		for(x = 0; x < Nparticles; x++){
			//compute the likelihood: remember our assumption is that you know
			// foreground and the background image intensity distribution.
			// Notice that we consider here a likelihood ratio, instead of
			// p(z|x). It is possible in this case. why? a hometask for you.		
			//calc ind
			for(y = 0; y < countOnes; y++){
				indX = roundDouble(arrayX[x]) + objxy[y*2 + 1];
				indY = roundDouble(arrayY[x]) + objxy[y*2];
				ind[x*countOnes + y] = fabs(indX*IszY*Nfr + indY*Nfr + k);
				if(ind[x*countOnes + y] >= max_size)
					ind[x*countOnes + y] = 0;
			}
			likelihood[x] = 0;
			for(y = 0; y < countOnes; y++)
				likelihood[x] += (pow((I[ind[x*countOnes + y]] - 100),2) - pow((I[ind[x*countOnes + y]]-228),2))/50.0;
			likelihood[x] = likelihood[x]/((double) countOnes);
		}
		long long likelihood_time = get_time();
		printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time));
		// update & normalize weights
		// using equation (63) of Arulampalam Tutorial
		#pragma omp parallel for shared(Nparticles, weights, likelihood) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x] * exp(likelihood[x]);
		}
		long long exponential = get_time();
		printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential));
		double sumWeights = 0;
		#pragma omp parallel for private(x) reduction(+:sumWeights)
		for(x = 0; x < Nparticles; x++){
			sumWeights += weights[x];
		}
		long long sum_time = get_time();
		printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time));
		#pragma omp parallel for shared(sumWeights, weights) private(x)
		for(x = 0; x < Nparticles; x++){
			weights[x] = weights[x]/sumWeights;
		}
		long long normalize = get_time();
		printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize));
		xe = 0;
		ye = 0;
		// estimate the object location by expected values
		#pragma omp parallel for private(x) reduction(+:xe, ye)
		for(x = 0; x < Nparticles; x++){
			xe += arrayX[x] * weights[x];
			ye += arrayY[x] * weights[x];
		}
		long long move_time = get_time();
		printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time));
		printf("XE: %lf\n", xe);
		printf("YE: %lf\n", ye);
		double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) );
		printf("%lf\n", distance);
		//display(hold off for now)
		
		//pause(hold off for now)
		
		//resampling
		
		
		CDF[0] = weights[0];
		for(x = 1; x < Nparticles; x++){
			CDF[x] = weights[x] + CDF[x-1];
		}
		long long cum_sum = get_time();
		printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum));
		double u1 = (1/((double)(Nparticles)))*randu(seed, 0);
		#pragma omp parallel for shared(u, u1, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			u[x] = u1 + x/((double)(Nparticles));
		}
		long long u_time = get_time();
		printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time));
		int j, i;
		
		#pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j)
		for(j = 0; j < Nparticles; j++){
			i = findIndex(CDF, Nparticles, u[j]);
			if(i == -1)
				i = Nparticles-1;
			xj[j] = arrayX[i];
			yj[j] = arrayY[i];
			
		}
		long long xyj_time = get_time();
		printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time));
		
		//#pragma omp parallel for shared(weights, Nparticles) private(x)
		for(x = 0; x < Nparticles; x++){
			//reassign arrayX and arrayY
			arrayX[x] = xj[x];
			arrayY[x] = yj[x];
			weights[x] = 1/((double)(Nparticles));
		}
		long long reset = get_time();
		printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset));
	}
	free(disk);
	free(objxy);
	free(weights);
	free(likelihood);
	free(xj);
	free(yj);
	free(arrayX);
	free(arrayY);
	free(CDF);
	free(u);
	free(ind);
}
Particle2D VectorLocalization2D::createParticle(vector2f loc, float angle, float locationUncertainty, float angleUncertainty)
{
  return Particle2D(randn(locationUncertainty,loc.x), randn(locationUncertainty,loc.y), randn(angleUncertainty,angle),1.0);
}
Exemple #28
0
static void test_psd(){
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./64;
    long N=1024;
    long nx=N;
    long ny=N;
    long ratio=1;
    long xskip=nx*(ratio-1)/2;
    long yskip=ny*(ratio-1)/2;
    long nframe=512;
    seed_rand(&rstat, seed);
    if(1){
	map_t *atm=mapnew(nx+1, ny+1, dx,dx, NULL);
	cmat *hat=cnew(nx*ratio, ny*ratio);
	//cfft2plan(hat, -1);
	dmat *hattot=dnew(nx*ratio, ny*ratio);

	for(long i=0; i<nframe; i++){
	    info2("%ld of %ld\n", i, nframe);
	    for(long j=0; j<(nx+1)*(ny+1); j++){
		atm->p[j]=randn(&rstat);
	    }
	    fractal_do((dmat*)atm, dx, r0,L0,ninit);
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(hat,ix+xskip,iy+yskip)=IND(atm,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	}
	dscale(hattot, 1./nframe);
	dfftshift(hattot);
	writebin(hattot, "PSD_fractal");
    }
    {
	dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5);
	writebin(spect, "spect");
	cmat *hat=cnew(nx*ratio, ny*ratio);
	//cfft2plan(hat, -1);
	dmat *hattot=dnew(nx*ratio, ny*ratio);
	cmat *atm=cnew(nx, ny);
	//cfft2plan(atm, -1);
	dmat *atmr=dnew(atm->nx, atm->ny);
	dmat *atmi=dnew(atm->nx, atm->ny);
	cmat*  phat=hat;
	dmat*  patmr=atmr;
	dmat*  patmi=atmi;
	for(long ii=0; ii<nframe; ii+=2){
	    info2("%ld of %ld\n", ii, nframe);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i];
	    }
	    cfft2(atm, -1);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atmr->p[i]=creal(atm->p[i]);
		atmi->p[i]=cimag(atm->p[i]);
	    }
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(phat,ix+xskip,iy+yskip)=IND(patmr,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(phat,ix+xskip,iy+yskip)=IND(patmi,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	}
	dscale(hattot, 1./nframe);
	dfftshift(hattot);
	writebin(hattot, "PSD_fft");
    }
}
Exemple #29
0
void kalman_dlqe3(real32_T dt, real32_T k1, real32_T k2, real32_T k3, const
                  real32_T x_aposteriori_k[3], real32_T z, real32_T posUpdate,
                  real32_T addNoise, real32_T sigma, real32_T x_aposteriori[3])
{
  real32_T A[9];
  int32_T i0;
  static const int8_T iv0[3] = { 0, 0, 1 };

  real_T b;
  real32_T y;
  real32_T b_y[3];
  int32_T i1;
  static const int8_T iv1[3] = { 1, 0, 0 };

  real32_T b_k1[3];
  real32_T f0;
  A[0] = 1.0F;
  A[3] = dt;
  A[6] = 0.5F * rt_powf_snf(dt, 2.0F);
  A[1] = 0.0F;
  A[4] = 1.0F;
  A[7] = dt;
  for (i0 = 0; i0 < 3; i0++) {
    A[2 + 3 * i0] = (real32_T)iv0[i0];
  }

  if (addNoise == 1.0F) {
    b = randn();
    z += sigma * (real32_T)b;
  }

  if (posUpdate != 0.0F) {
    y = 0.0F;
    for (i0 = 0; i0 < 3; i0++) {
      b_y[i0] = 0.0F;
      for (i1 = 0; i1 < 3; i1++) {
        b_y[i0] += (real32_T)iv1[i1] * A[i1 + 3 * i0];
      }

      y += b_y[i0] * x_aposteriori_k[i0];
    }

    y = z - y;
    b_k1[0] = k1;
    b_k1[1] = k2;
    b_k1[2] = k3;
    for (i0 = 0; i0 < 3; i0++) {
      f0 = 0.0F;
      for (i1 = 0; i1 < 3; i1++) {
        f0 += A[i0 + 3 * i1] * x_aposteriori_k[i1];
      }

      x_aposteriori[i0] = f0 + b_k1[i0] * y;
    }
  } else {
    for (i0 = 0; i0 < 3; i0++) {
      x_aposteriori[i0] = 0.0F;
      for (i1 = 0; i1 < 3; i1++) {
        x_aposteriori[i0] += A[i0 + 3 * i1] * x_aposteriori_k[i1];
      }
    }
  }
}