Exemplo n.º 1
0
void
corStruct_recalc(double *Xy, longint *pdims, longint *ZXcol, double *Factor)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i;
    for(i = 0; i < M;  i++) {
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Factor += (len[i] * len[i]);
    }
}
Exemplo n.º 2
0
int main(){
	int n = 750;
	int* a = (int*) malloc(sizeof(int) * n*n);
	int* b = (int*) malloc(sizeof(int) * n*n);
	int* c = (int*) malloc(sizeof(int) * n*n);

	for(int i = 0; i < n*n; i++) {
		a[i] = i;
		b[i] = i/2;
	}

	mult_mat(a, b, c, n);
	return 0;
}
Exemplo n.º 3
0
void
AR1_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par,
	   double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4,  *start = len + M, i;
    double *Factor;
    /* par assumed in unconstrained form */
    *par = safe_phi( *par );
    for(i = 0; i < M;  i++) {
	Factor = Calloc(len[i] * len[i], double);
	AR1_fact(par, &len[i], Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
}
Exemplo n.º 4
0
void
CAR1_recalc(double *Xy, longint *pdims, longint *ZXcol,
	    double *par, double *time, double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i;
    double aux = exp(*par);
    /* parameter assumed in unconstrained form */
    *par = aux / (1.0 + aux);
    for(i = 0; i < M;  i++) {
	double *Factor = Calloc(len[i] * len[i], double);
	CAR1_fact(par, time + start[i], &len[i], Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
}
Exemplo n.º 5
0
void
compSymm_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par,
		double *inf, double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i;
    double aux = exp(*par);
    /* parameter assumed in unconstrained form */
    *par = (aux + *inf)/(aux + 1.0);
    for(i = 0; i < M;  i++) {
	double *Factor = Calloc(len[i] * len[i], double);
	compSymm_fact(par, &len[i], Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
}
Exemplo n.º 6
0
void
nat_recalc(double *Xy, longint *pdims, longint *ZXcol, double *pars,
	   longint *time, longint *maxC, double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i;
    double *crr = Calloc(*maxC * (*maxC - 1) / 2, double);
    /* parameters assumed in unconstrained form */
    nat_fullCorr(pars, maxC, crr);
    for(i = 0; i < M;  i++) {
	double *Factor = Calloc((len[i] * len[i]), double);
	symm_fact(crr, time + start[i], &len[i], maxC, Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
    Free(crr);
}
Exemplo n.º 7
0
void
spatial_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par,
	       double *dist, double *minD, longint *nug, double *logdet)
{
    longint N = pdims[0], M = pdims[1], spClass = pdims[2],
	*len = pdims + 4, *start = len + M, i;
    double aux, (*corr)(double ) = dummy_corr, *sXy;
    /* parameter assumed in unconstrained form */
    par[0] = exp(par[0]);
    if (*nug == 1) {
	aux = exp(par[1]);
	par[1] = 1 / (1.0 + aux); /* 1 - nugget */
    }

    switch(spClass) {
    case 1:			/* spherical */
	corr = spher_corr;
	par[0] += *minD;
	break;
    case 2:			/* exponential */
	corr = exp_corr;
	break;
    case 3:			/* Gaussian */
	corr = Gaus_corr;
	break;
    case 4:			/* linear */
	corr = lin_corr;
	par[0] +=  *minD;
	break;
    case 5:			/* rational quadratic */
	corr = ratio_corr;
	break;
    default:
	error(_("Unknown spatial correlation class"));
	break;
    }

    for(i = 0, sXy = Xy; i < M;  i++) {
	double *Factor = Calloc(len[i] * len[i], double);
	spatial_fact(par, dist + start[i], &len[i], nug, corr, Factor, logdet);
	mult_mat(sXy, N, Factor, len[i], len[i], len[i], sXy, N, *ZXcol);
	sXy += len[i];
	Free(Factor);
    }
}
Exemplo n.º 8
0
void
ARMA_recalc(double *Xy, longint *pdims, longint *ZXcol, double *pars,
	    longint *p, longint *q, longint *time, longint *maxlag,
	    double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i;
    double *crr = Calloc(*maxlag + 1L, double);
    /* parameters assumed in unconstrained form */
    ARMA_constCoef(p, q, pars);
    ARMA_fullCorr(p, q, maxlag, pars, crr);
    for(i = 0; i < M;  i++) {
	double *Factor = Calloc(len[i] * len[i], double);
	ARMA_fact(crr, time + start[i], &len[i], Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
}
Exemplo n.º 9
0
void
HF_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par,
	  longint *time, longint *maxC, double *logdet)
{
    longint N = pdims[0], M = pdims[1], *len = pdims + 4,  *start = len + M, i;
    double inf = -1.0/(2.0 * ((double) *maxC));
    /* parameter assumed in unconstrained form */
    for(i = 0; i < *maxC; i++) {
	par[i] = 2.0 * (exp(par[i]) + inf) + 1.0;
    }
    for(i = 0; i < M;  i++) {
	double *Factor = Calloc(len[i] * len[i], double);
	HF_fact(par, time + start[i], &len[i], Factor, logdet);
	mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i],
		 Xy + start[i], N, *ZXcol);
	Free(Factor);
    }
}
Exemplo n.º 10
0
double	*my_rotation(double *resultat, double *matrice, char **argv, int i)
{
  double	matrice_rotation[9];
  double	a;
  double	Rx;
  double	Ry;
  double	stock;

  a = atof(argv[i + 1]);
  a = (a * M_PI) / 180;
  Rx = cos(a);
  Ry = sin(a);
  fill_mat_rot(&matrice_rotation[0], Rx, Ry);
  mult_mat(&matrice[0], &matrice_rotation[0]);
  printf("\033[01;37mRotation par rapport à un angle de %s degré(s)\n", argv[i + 1]);
  stock = resultat[0];
  resultat[0] = stock * matrice_rotation[0] + resultat[1] * matrice_rotation[1];
  resultat[1] = stock * matrice_rotation[3] + resultat[1] * matrice_rotation[4];
  return (resultat);
}
Exemplo n.º 11
0
double	*my_symetrie(double *resultat, double *matrice, char **argv, int i)
{
  double	matrice_symetrie[9];
  double	a;
  double	Sx;
  double	Sy;
  double	stock;

  a = atof(argv[i + 1]);
  a = (a * M_PI) / 180;
  Sx = cos(2 * a);
  Sy = sin(2 * a);
  fill_mat_sym(&matrice_symetrie[0], Sx, Sy);
  mult_mat(&matrice[0], &matrice_symetrie[0]);
  printf("\033[01;37mSymétrie par rapport à un axe de %s degré(s)\n", argv[i + 1]);
  stock = resultat[0];
  resultat[0] = stock * matrice_symetrie[0] + resultat[1] * matrice_symetrie[1];
  resultat[1] = stock * matrice_symetrie[3] + resultat[1] * matrice_symetrie[4];
  return (resultat);
}
Exemplo n.º 12
0
/* ベクトル×行列の計算を行う */
double *vecxmat(double *v, int size, double **mat, int nrow, int ncol)
{
	int    i;
	double *vec = NULL;
	double **tmp1 = NULL, **tmp2 = NULL;
 
	/* ベクトルを1×sizeの2次元の配列にする */
	tmp1 = new_double_matrix(1, size);
	for(i = 0; i < size; i++)
		tmp1[0][i] = v[i];

	/* 行列同士の掛け算を行う */
	tmp2 = mult_mat(tmp1, 1, size, mat, nrow, ncol);

	/* 計算結果を代入する */
	vec = new_double_vector(ncol);
	for(i = 0; i < ncol; i++)
		vec[i] = tmp2[0][i];

	free_double_matrix(tmp1);
	free_double_matrix(tmp2);

	return vec;
}
Exemplo n.º 13
0
/* 行列×ベクトルの計算を行う */
double *matxvec(double **mat, int nrow, int ncol, double *v, int size)
{
	int    i;
	double *vec = NULL;
	double **tmp1 = NULL, **tmp2 = NULL;
 
	/* ベクトルを1×sizeの2次元の配列にする */
	tmp1 = new_double_matrix(size, 1);
	for(i = 0; i < size; i++)
		tmp1[i][0] = v[i];

	/* 行列同士の掛け算を行う */
	tmp2 = mult_mat(mat, nrow, ncol, tmp1, size, 1);

	/* 計算結果を代入する */
	vec = new_double_vector(nrow);
	for(i = 0; i < nrow; i++)
		vec[i] = tmp2[i][0];

	free_double_matrix(tmp1);
	free_double_matrix(tmp2);

	return vec;
}
Exemplo n.º 14
0
void Calculate_HF(int *tlist,double *vlist,int nfac,int nvert,double *angles,double *Eo,double *E0o,double *up,double TIME,double dist,double Gamma,double A,double Hdist,int N,double WL,double *freqx,double *freqy,int nfreq,double *offset,double *Fr,double *Fi)
{
    double complex *F=calloc(nfreq,sizeof(double complex)); 
 double complex *F0;
 F0=(double complex*)calloc(nfreq,sizeof(double complex));
 double *Flux,*Fldx,*Fldy,*Fldz,*FldA;
 Flux=(double*)calloc(nfac,sizeof(double));
 double M[3][3],dMb[3][3],dMo[3][3],dMl[3][3],Mt[3][3];
 double R[3][3],Rdb[3][3],Rdl[3][3],Rdo[3][3],RT[3][3];
 double E[3],E0[3];
 double normalr[3],side1[3],side2[3];
 double dechdx[3],dechdy[3],dechdz[3],dechdA[3];
 double n[3],*nb,*cent;
 double *vb1,*vb2,*vb3;
 double vr1[3],vr2[3],vr3[3];
 double *v1,*v2,*v3;
 double scale;
 double complex tscale,FTC;
 double dp;
 double B,TB=0;
 double norm;
 int t1,t2,t3,blocker,sign;
int j1,j2,j3;
 double mu,mu0,area,mub,ech,rexp;
 double *normal,*centroid;
 int *visible;
 int tb1,tb2,tb3; //Indices to the vertices of possible blocker facet
 int blocked=0;
 //Distance km->arcsec
 dp=1/(dist*149597871.0)*180.0/PI*3600.0;
 visible=calloc(nfac,sizeof(int));
  //Allocate for memory
// normal=(double*)malloc(3*nfac*sizeof(double));
 // centroid=(double*)mxCalloc(3*nfac,sizeof(double));
 // IndexofBlocks=(int*)mxCalloc(nfac,sizeof(int));
// NumofBlocks=(int*)mxCalloc(nfac,sizeof(int));
  //Calculate frame change matrix
  Calculate_Frame_Matrix(Eo,up,R);
 
 //FacetsOverHorizon(tlist,vlist,nfac,nvert,normal,centroid,NumofBlocks,IndexofBlocks);
 
 rotate(angles[0],angles[1],angles[2],0.0,TIME,M,dMb,dMl,dMo);
 //Construct asteroid->Camera frame matrix, which is
 //asteroid->world frame->camera frame
 transpose(M,Mt); //Transpose, since we rotate the model, not view directions
 mult_mat(R,Mt,RT); 
 mult_vector(M,Eo,E);
 mult_vector(M,E0o,E0);

 /*For each facet,
  * 1)Check if facet is visible
  * 2) Calculate echo
  * 3) Convert triangle to range-Doppler frame
  * 4) Calculate FT
  */
//Find actual blockers
FindActualBlockers(tlist,vlist,nfac,nvert,E,E,1,visible);
Calculate_Radiance(tlist,vlist,nfac,nvert,angles,Eo,E0o,TIME,Gamma, A,Hdist,WL,N,Flux,Fldx,Fldy,Fldz,FldA,0);
//for(int i=27;i<nfac;i++)
 // mexPrintf("fl%d: %.10e\n",i+1, Flux[i]);

 //visible is nfac vector, visible[j]=1 if facet (j+1)th facet is visible
//NOTE INDEXING
//mexPrintf("%f %f %f\n",vlist[0],vlist[1],vlist[2]);
 for(int j=0;j<nfac;j++)
 {
   if(visible[j]==0)
     continue;
  //Calculate normal from facet vertices
   //Vertex indices of the current facet
   //Note that C indices from 0, matlab from 1
   j1=tlist[j*3]-1;
   j2=tlist[j*3+1]-1;
   j3=tlist[j*3+2]-1;
   //Current vertices
   
   v1=vlist+j1*3;
   v2=vlist+j2*3;
   v3=vlist+j3*3;
   
   
   //Calculate normals and centroids
   for(int i=0;i<3;i++)
   {
     
     side1[i]=dp*(v2[i]-v1[i]); //Convert km->arcsec
     side2[i]=dp*(v3[i]-v1[i]);
    
   }
   cross(side1,side2,n);
   norm=NORM(n);
   n[0]=n[0]/norm;
   n[1]=n[1]/norm;
   n[2]=n[2]/norm;
   
   mu=DOT(E,n);
   mu0=DOT(E0,n);
  //Convert to camera frame
   mult_vector(RT,v1,vr1);
   mult_vector(RT,v2,vr2);
   mult_vector(RT,v3,vr3);
   for(int i=0;i<3;i++)
   {
     vr1[i]=dp*vr1[i];
     vr2[i]=dp*vr2[i];
     vr3[i]=dp*vr3[i];
   }
    
     //Now we should convert to frequency domain, ie calculate the contribution of each facet
     Calc_FTC(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0);
    // printf("Fdd: %f %f\n",creal(FTdd[0]),cimag(FTdd[0]));
     //Note that we sum to F at each round, does not work, we need to multiply with the echo
     //Derivatives wrt angles
   area=0.5*norm;
  //if(j==0)
   //{
    // mexPrintf("area: %f mu: %f F0: %f\n",area,mu,F0[0]);
   //}
   //mexPrintf("area: %f normal: %f %f %f mu: %f mu0: %f\n",area,n[0],n[1],n[2],mu,mu0); 
     
   B=Flux[j];
     for(int jf=0;jf<nfreq;jf++)
     {
       //This should be taken outside of the loop
       
    //   mexPrintf("scale:%f offset: %f %f\n",scale,creal(cexp(2*PI*I*(offset[0]*freqx[jf]+offset[1]*freqy[jf]))),cimag(cexp(2*PI*I*(offset[0]*freqx[jf]+offset[1]*freqy[jf]))));
       //FTC=tscale*F0[jf];
       F[jf]+=B*F0[jf];
      }
      TB=TB+B*area*mu;
//  printf("Flux: %f area: %f mu: %f\n",B,area,mu);

}
//printf("Total brightness: %f\n",TB);  
//Normalize with total brightness
double complex temp;
for(int j=0;j<nfreq;j++)
{
    temp=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j]))*F[j]/TB;
  Fr[j]=creal(temp);
  Fi[j]=cimag(temp);
}

free(visible);
free(Flux);
free(F0);
free(F);
}
Exemplo n.º 15
0
int main(int argc, char** argv)
{
	double serial_time, openCL_time, start_time;
	cl_int err;
	cl_platform_id* platforms = NULL;
	char platform_name[1024];
	cl_device_id device_id = NULL;
	cl_uint	num_of_platforms = 0;
	cl_uint num_of_devices = 0;
	cl_context context;
	cl_kernel kernel;
	cl_command_queue command_queue;
	cl_program program;
	cl_mem input1, input2, input3, output;
	float **A, **B, **C, **serialC;	// matrices
	int d1, d2, d3;         // dimensions of matrices

							/* print user instruction */
	if (argc != 4)
	{
		printf("Matrix multiplication: C = A x B\n");
		printf("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
		return 0;
	}

	/* read user input */
	d1 = 1000;		// rows of A and C
	d2 = 1000;     // cols of A and rows of B
	d3 = 1000;     // cols of B and C
	int d[4] = { 0, d1, d2, d3 };
	size_t global[1] = { (size_t)d1*d3 };

	printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

	/* prepare matrices */
	A = alloc_mat(d1, d2);
	init_mat(A, d1, d2);
	B = alloc_mat(d2, d3);
	init_mat(B, d2, d3);
	C = alloc_mat(d1, d3);
	serialC = alloc_mat(d1, d3);

	err = clGetPlatformIDs(0, NULL, &num_of_platforms);
	if (err != CL_SUCCESS) {
		printf("No platforms found. Error: %d\n", err);
		return 0;
	}

	platforms = (cl_platform_id *)malloc(num_of_platforms);
	err = clGetPlatformIDs(num_of_platforms, platforms, NULL);
	if (err != CL_SUCCESS) {
		printf("No platforms found. Error: %d\n", err);
		return 0;
	}
	else {
		int nvidia_platform = 0;
		for (unsigned int i = 0; i<num_of_platforms; i++) {
			clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
			if (err != CL_SUCCESS) {
				printf("Could not get information about platform. Error: %d\n", err);
				return 0;
			}
			if (strstr(platform_name, "NVIDIA") != NULL) {
				nvidia_platform = i;
				break;
			}
		}
		err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices);
		if (err != CL_SUCCESS) {
			printf("Could not get device in platform. Error: %d\n", err);
			return 0;
		}
	}

	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create context. Error: %d\n", err);
		return 0;
	}

	command_queue = clCreateCommandQueue(context, device_id, 0, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create command queue. Error: %d\n", err);
		return 0;
	}

	program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create program. Error: %d\n", err);
		return 0;
	}

	if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS) {
		char *log;
		size_t size;
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); // 1. Länge des Logbuches?
		log = (char *)malloc(size + 1);
		if (log) {
			clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); // 2. Hole das Logbuch ab
			log[size] = '\0';
			printf("%s", log);
			free(log);
		}
		return 1;
	}


	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS) {
		printf("Error building program. Error: %d\n", err);
		return 0;
	}


	kernel = clCreateKernel(program, "matmult_ocl", &err);
	if (err != CL_SUCCESS) {
		printf("Error setting kernel. Error: %d\n", err);
		return 0;
	}

	input1 = clCreateBuffer(context, CL_MEM_READ_ONLY, d1*d2*sizeof(float), NULL, &err);
	input2 = clCreateBuffer(context, CL_MEM_READ_ONLY, d2*d3*sizeof(float), NULL, &err);
	input3 = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(int), NULL, &err);

	output = clCreateBuffer(context, CL_MEM_READ_WRITE, d1*d3*sizeof(float), NULL, &err);

	start_time = omp_get_wtime();

	clEnqueueWriteBuffer(command_queue, input1, CL_TRUE, 0, d1*d2*sizeof(float), *A, 0, NULL, NULL);
	clEnqueueWriteBuffer(command_queue, input2, CL_TRUE, 0, d2*d3*sizeof(float), *B, 0, NULL, NULL);
	clEnqueueWriteBuffer(command_queue, input3, CL_TRUE, 0, 4 * sizeof(int), d, 0, NULL, NULL);

	clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2);
	clSetKernelArg(kernel, 2, sizeof(cl_mem), &input3);
	clSetKernelArg(kernel, 3, sizeof(cl_mem), &output);

	clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);

	clFinish(command_queue);

	clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, d1*d3*sizeof(float), *C, 0, NULL, NULL);
	// for (unsigned int i = 0; i < (unsigned int) d1*d3; i++)
	//	printf("%f\n", C[0][i]);

	openCL_time = omp_get_wtime() - start_time;

	clReleaseMemObject(input1);
	clReleaseMemObject(input2);
	clReleaseMemObject(input3);
	clReleaseMemObject(output);
	clReleaseProgram(program);
	clReleaseKernel(kernel);
	clReleaseCommandQueue(command_queue);
	clReleaseContext(context);

	printf("Running serial algorithm...\n");
	start_time = omp_get_wtime();
	serialC = mult_mat(A, B, d1, d2, d3);
	serial_time = omp_get_wtime() - start_time;

	printf("Checking results... ");
	is_correct(C, serialC, d1, d3);

	printf("Showing stats...\n");
	printf("   serial runtime = %f\n", serial_time);
	printf("   OpenCL runtime = %f\n", openCL_time);
	printf("   Speedup = %f\n", serial_time / openCL_time);
	return 0;
}
int main(int argc, char *argv[])
{
	FILE	*mat;
	FILE	*vec;
	FILE	*end;

	double	main_buf[MAXSIZE];
	double  main_vec_buf[MAXSIZE];
	Cnum	mat_buf[MAXSIZE];
	Cnum	vec_buf[MAXSIZE];
	Cnum	result[MAXSIZE];
	

	double	Row=0, Col=0;
	double	vec_row;

	double  mat_RC_data[6];

	long	mat_in=0;

	int		j=0;
	int		thread=atoi(argv[3]);
	
	double  final_result[2];

	mat = fopen("matrix.dat","rb");
	vec = fopen("vector_input.dat","rb");
	end = fopen("vector_output.dat","wb");


	// exe input number 검사
	if (argc == 4 && atoi(argv[1]) && atoi(argv[2]))
		printf("input = %d x %d, Thread = %d \n",atoi(argv[1]), atoi(argv[2]),atoi(argv[3]));
	else
	{
		fputs("[Error] Not a correct input", stderr);
		exit(1);
	}


	// 파일 유무 검사

	if(mat==NULL)
	{
		fputs("Matrix File error", stderr);
		exit(1);
	}

	if(vec==NULL)
	{
		fputs("Vector File error", stderr);
		exit(1);
	}

	start_time_measurement(); //시간측정 시작

	// 매트릭스 크기

	int point;

	for(int i=3;i>0;i--)
	{
		point = i*4*sizeof(double);
		point = -point;
		if(fseek(mat, point, SEEK_END)==-1)
		{
			printf("error\n");
		}
		fread(&mat_RC_data[j],sizeof(double),2,mat);
		Row=mat_RC_data[j]+1;
		j++;
		Col=mat_RC_data[j]+1;
		j++;
		//printf("row : %f, col : %f\n",Row,Col);
	}

	// matrix.dat 행 열값 부분 오류 검사
	if(mat_RC_data[4]-mat_RC_data[2]>1)
	{
		fputs("[ERROR] Matrix File ROW_DATA error", stderr);
		exit(1);
	}
	if(mat_RC_data[2]-mat_RC_data[0]>1)
	{
		fputs("[ERROR] Matrix File ROW_DATA error", stderr);
		exit(1);
	}
	if(mat_RC_data[5]-mat_RC_data[3]>2)
	{
		fputs("[ERROR] Matrix File ROW_DATA error", stderr);
		exit(1);
	}
	if(mat_RC_data[3]-mat_RC_data[1]>2)
	{
		fputs("[ERROR] Matrix File ROW_DATA error", stderr);
		exit(1);
	}

	//커맨드 입력 행열 과 입력받은 매트릭스 비교

	if(atoi(argv[1])!=Row)
	{
		fputs("[ERROR] 입력한 행값과 매트릭스의 행값이 다릅니다.", stderr);
		exit(1);
	}

	if(atoi(argv[2])!=Col)
	{
		fputs("[ERROR] 입력한 열값과 매트릭스의 열값이 다릅니다.", stderr);
		exit(1);
	}

	printf("전체 열의 값 = %f\n전체 행의 값 = %f \n", Row, Col);

	//vec 행렬 행 크기
	fseek(vec,0,SEEK_END);
	vec_row = ftell(vec)/(sizeof(double)*2);
	printf("벡터행렬 행 값 = %f\n",vec_row);

	//행렬 크기가 다를 시 오류 메세지 출력
	if(Col!=vec_row)
	{
		fputs("[ERROR] Matrix and Vector are not same", stderr);
		exit(1);
	}

	//행렬값 꺼내서 연산
	j=0;
	int button=0;

	fseek(mat,0,SEEK_SET); //파일 포인터 위치 초기화
	fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화
	fseek(end,0,SEEK_SET); //파일 포인터 위치 초기화

	for(int k=0; k<Row; k++)
	{
		//인덱스 초기화
		button =0;
		j=0;
		fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화

		//mat 행렬 가져와서 mat_buf[]에 저장 100개씩 -> ////vec 행의 갯수에 따라로 수정해야함 //// 일단은 100개단위로
		for(int i=0; i<Col*4; i++)
		{
			fread(&main_buf[i], sizeof(double), 1, mat);
			//printf("값 : %f ", main_buf[i]);

			if(i>3 && i%4==0)
				j++;

			// 구조체에 값저장
			if(button==2)
				mat_buf[j].realnum=main_buf[i];
			else if(button==3)
			{
				mat_buf[j].imanum=main_buf[i];
			}
			button++;
			if(button==4)
				button=0;
		}

		//vec 행렬 가져와서 vec_buf[]에 저장
		j=0;

		for(int y=0; y<Col*2; y++)
		{
			fread(&main_vec_buf[y],sizeof(double),1,vec);
			if(y>1&&y%2==0)
				j++;
			if(y%2==0)
				vec_buf[j].realnum = main_vec_buf[y];
			else
			{
				vec_buf[j].imanum = main_vec_buf[y];
			}			

		}




		// mat * vec
		/*
		for(int i=0; i<Col; i++)
		result[i]=mult(mat_buf[i],vec_buf[i]); 
		*/

		mult_mat(mat_buf,vec_buf,result,(int)Col,thread);
		result[0]=sum_mat(result,(int)Col);

		/*
		for(int i=1; i<Col; i++)
		{
		result[0]=sum(result[0],result[i]);			
		}
		*/

		fwrite(&result[0].realnum,sizeof(double),1,end);
		fwrite(&result[0].imanum,sizeof(double),1,end);

		//printf("%f, %f \n",final_result[0].realnum,final_result[0].imanum);

	}
	printf("Calculation Complete ");
	end_time_measurement(); //시간 측정 끝

	fclose(mat);
	fclose(vec);
	fclose(end);


	return 0;
}
Exemplo n.º 17
0
int main (void){
	//Creación de variables del sistema
	int *a, *b, *c, N;
	int i,j;
	int iteraciones=100,ind=0;
	
	printf("Ingrese el tamano deseado para las matrices:\n");
	scanf("%d",&N);
	
	printf("Creando espacio e inicializando matrices...\n");
	
	//Asignación e inicialización de memoria
	a=(int*)malloc(N*N*sizeof(int));
	b=(int*)malloc(N*N*sizeof(int));
	c=(int*)malloc(N*N*sizeof(int));
	
	for(i=0;i<N;i++)
	{
		for(j=0;j<N;j++)
		{
			a[i*N+j]=i*j;
			b[i*N+j]=i*j;
			c[i*N+j]=0;
		}
	}
	
	//Cálculo de bloques e hilos
	
	printf("Se va a realizar %d iteraciones de matrices %dx%d\n",iteraciones,N,N);
	
	//Ejecución de kernel
	clock_t start = clock();
	while(ind<iteraciones)
	{
		mult_mat(a,b,c,N);
		ind++;
	}
	clock_t end = clock();
	float seconds = (float)(end - start) / CLOCKS_PER_SEC;
	printf("El tiempo tomado para %d iteraciones fue de %3.5f ms\n",iteraciones,seconds*10);

	/*
	for(i=0;i<N;i++)
	{	
		printf("\n");
		for(j=0;j<N;j++)
		{
			printf("\t%d",a[i*N+j]);
		}
		//printf("\t");
		for(j=0;j<N;j++)
		{
			printf("\t%d",b[i*N+j]);
		}
		//printf("\t");
		for(j=0;j<N;j++)
		{
			printf("\t%d",c[i*N+j]);
		}
	}*/
	
	
	free(a);
	free(b);
	free(c);
	
	return 0;
}
Exemplo n.º 18
0
/** **/
int main (int argc, char* argv[])
{
  int WORK_DIM = 2; // Wie viele Dimensionen hat der Indexraum?
  std::chrono::time_point<std::chrono::system_clock> s_start, s_end, p_start, p_end;


  // Lese den Kernel dynamisch ein: (uebernommen von Foliensatz 9, Folie 20)
  FILE *fp;
  const char *FileName = "matmult.cl";
  char *KernelSource;
  fp = fopen(FileName, "r");
  if (!fp) {
    printf("Can't open kernel source: %s", FileName); exit(1);
  }
  KernelSource = (char *)malloc(MAX_SOURCE_SIZE);
  size_t kernel_s_size = fread(KernelSource, 1, MAX_SOURCE_SIZE, fp);
  fclose(fp);

  cl_int            err;
  cl_platform_id*   platforms = NULL;
  char              platform_name[1024];
  cl_device_id      device_id = NULL;
  cl_uint           num_of_platforms = 0,
                    num_of_devices = 0;
  cl_context        context;
  cl_kernel         kernel;
  cl_command_queue  command_queue;
  cl_program        program;


  err = clGetPlatformIDs(0, NULL, &num_of_platforms);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  }

  // Liefert Plattformen
  platforms = (cl_platform_id *)malloc(num_of_platforms);
  err = clGetPlatformIDs(num_of_platforms, platforms, NULL);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  } else {
    int nvidia_platform = 0;  // Speichert den Rang der letzten NVIDIA-Plattform
    for (unsigned int i=0; i<num_of_platforms; i++) // Fuer jede Plattform:
    {
      clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
      if (err != CL_SUCCESS) {
        printf("Could not get information about platform. Error: %d\n", err);
        return 0;
      }

      if (strstr(platform_name, "NVIDIA") != NULL) { // Falls die Plattform eine NVIDIA-Plattform ist: Speichere ihren Rang
        nvidia_platform = i;
        break;
      }
    }
    // Gibt die ID des Devices der NVIDIA-Plattform zurueck
    err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices);
    if (err != CL_SUCCESS) {
      printf("Could not get device in platform. Error: %d\n", err);
      return 0;
    }
  }

  // Erschaffe einen OpenCl-context, in dem OpenCl-Datenobjekte verwaltet werden koennen
  context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create context. Error: %d\n", err);
    return 0;
  }

  // Initialisiere eine Befehlswarteschleife, die Befehle fuer OpenCl-Objekte speichern kann
  command_queue = clCreateCommandQueue(context, device_id, 0, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create command queue. Error: %d\n", err);
    return 0;
  }

  // Initialisiere ein Programm und spezifiziere, aus welchem Code dieses kompiliert werden soll
  program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, (const size_t *)& kernel_s_size, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create program. Error: %d\n", err);
    return 0;
  }

  // Kompiliere das Programm zur Laufzeit
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  if (err != CL_SUCCESS) {
    // Zeige Compilermeldungen an: (uebernommen von Foliensatz 9, Folie 23)
    char *log;
    size_t size;
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
    log = (char *)malloc(size+1);
    if (log) {
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
      log[size] = '\0';
      printf("%s", log);
      free(log);
    }

    printf("Error building program. Error: %d\n", err);
    return 0;
  }

  // Erschaffe einen Kernel und lade oben kompiliertes Programm ein
  kernel = clCreateKernel(program, "matmult", &err);
  if (err != CL_SUCCESS) {
    printf("Error setting kernel. Error: %d\n", err);
    return 0;
  }

  float **A, **B, **C; // Matrizen
  int dim1, dim2, dim3; // Matrixdimensionen
  dim1 = D1; // Zeilen von A, Zeilen von C
  dim2 = D2; // Spalten von A, Zeilen von B
  dim3 = D3; // Spalten von B, Spalten von C

  A = alloc_mat(dim1, dim2);
  B = alloc_mat(dim2, dim3);
  C = alloc_mat(dim1, dim3);

  init_mat(A, dim1, dim2);
  init_mat(B, dim2, dim3);

  cl_mem            in_A, in_B, output;
  // float             data[DATA_SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  size_t global[1] = {dim1*dim3}; // Dimensionen von C
  size_t global_two[2] = {dim1, dim3};

  in_A  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim1*dim2, NULL, &err);
  in_B  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim2*dim3, NULL, &err);
  output = clCreateBuffer (context, CL_MEM_WRITE_ONLY, sizeof(float)*dim1*dim3, NULL, &err);

  clEnqueueWriteBuffer(command_queue, in_A, CL_TRUE, 0, sizeof(float)*dim1*dim2, *A, 0, NULL, NULL);
  clEnqueueWriteBuffer(command_queue, in_B, CL_TRUE, 0, sizeof(float)*dim2*dim3, *B, 0, NULL, NULL);

  clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_A);
  clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_B);
  clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
  // clSetKernelArg(kernel, 3, sizeof(int), &dim2);
  // clSetKernelArg(kernel, 4, sizeof(int), &dim3);

  clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
  if (WORK_DIM == 2) {
    clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_two, NULL, 0, NULL, NULL);
  }

  // Zeitmessung fuer parallele Version
  p_start = std::chrono::system_clock::now();
  err = clFinish(command_queue);
  p_end = std::chrono::system_clock::now();
  std::chrono::duration<double> p_duration = p_end - p_start;


  if (err == CL_INVALID_COMMAND_QUEUE ) {
    printf("CL_INVALID_COMMAND_QUEUE: %d\n", err);
    return 0;
  }

  clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float)*dim1*dim3, *C, 0, NULL, NULL);

  // Ueberpruefe, ob serielle Version und parallele gleich sind:
  float **correct_matrix;
  correct_matrix = alloc_mat(dim1, dim3);

  s_start = std::chrono::system_clock::now(); // Zeitmessung fuer serielle Version
  correct_matrix = mult_mat(A, B, dim1, dim2, dim3);
  s_end = std::chrono::system_clock::now();
  std::chrono::duration<double> s_duration = s_end - s_start;

  is_correct(C, correct_matrix, dim1, dim3); // Numerischer Korrektheitsbeweis

  print_mat(C, dim1, dim3, "C = ");
  print_mat(correct_matrix, dim1, dim3, "correct_matrix = ");
  // printf("Kernel execution time: %f\n", t_end-t_start);

  clReleaseMemObject(in_A);
  clReleaseMemObject(in_B);
  clReleaseMemObject(output);
  clReleaseProgram(program);
  clReleaseKernel(kernel);
  err = clReleaseCommandQueue(command_queue); //!!
  if (err != CL_SUCCESS) {
    printf("Error releasing command queue: %d\n", err);
    return 0;
  }
  clReleaseContext(context);

  printf("Dauer der seriellen Version: %.2f Millisekunden\n", s_duration.count() * 1000);
  printf("Dauer der parallelen Version: %.2f Millisekunden\n", p_duration.count() * 1000);
  printf("Erhaltenes Speed Up: %.2f \n", p_duration.count() / p_duration.count());


  return 0;
}
Exemplo n.º 19
0
void mult_mat4(mat4 a, mat4 b, mat4 c)
{
    mult_mat(a,b,c,4);
}
Exemplo n.º 20
0
void Calculate_HF_deriv(int *tlist,double *vlist,int nfac,int nvert,double *angles,double *Eo,double *E0o,double *up,double TIME,double dist,double Gamma,double A,double Hdist,int N,double WL,double *freqx,double *freqy,int nfreq,double *offset,double *Fr,double *Fi,double *dFdxr,double *dFdxi,double  *dFdyr,double  *dFdyi,double *dFdzr,double *dFdzi,double *dFdAr,double *dFdAi,double *dFdoffr,double *dFdoffi)
{
    double complex *F=calloc(nfreq,sizeof(double complex)); 
 double complex *F0,*FTda,*FTdb,*FTdc,*FTdd,*FTdh,*FTdg,*FTdx,*FTdy,*FTdz,*FTdA;
 FTdx=calloc(nfreq*nvert,sizeof(double complex));
 FTdy=calloc(nfreq*nvert,sizeof(double complex));
 FTdz=calloc(nfreq*nvert,sizeof(double complex));
 FTdA=calloc(nfreq*3,sizeof(double complex));
 F0=calloc(nfreq,sizeof(double complex));
 FTda=malloc(nfreq*sizeof(double complex));
 FTdb=malloc(nfreq*sizeof(double complex));
 FTdc=malloc(nfreq*sizeof(double complex));
 FTdd=malloc(nfreq*sizeof(double complex));
 FTdh=malloc(nfreq*sizeof(double complex));
 FTdg=malloc(nfreq*sizeof(double complex));
 
 double M[3][3],dMb[3][3],dMo[3][3],dMl[3][3],Mt[3][3],dMbT[3][3],dMoT[3][3],dMlT[3][3]; //Rotation matrices and their derivatives and transposes
 double R[3][3],Rdb[3][3],Rdl[3][3],Rdo[3][3],RT[3][3]; //Projection matrix, and derivatives of combined projection+rotation matrix
 double dEdb[3],dEdl[3],dEdo[3],dE0db[3],dE0dl[3],dE0do[3];
 double dndx1[3],dndx2[3],dndx3[3],dndy1[3],dndy2[3],dndy3[3],dndz1[3],dndz2[3],dndz3[3]; //Derivatives of the facet normal vector
 double dBdx1,dBdy1,dBdz1,dBdx2,dBdy2,dBdz2,dBdx3,dBdy3,dBdz3,dBdb,dBdl,dBdo; //Derivatives of facet brightness
 double *dTBdx,*dTBdy,*dTBdz; //Derivatives of total brightness, allocating memory
 double *Flux,*Fldx,*Fldy,*Fldz,*FldA;
 Flux=calloc(nfac,sizeof(double));
 Fldx=calloc(nfac*nvert,sizeof(double));
 Fldy=calloc(nfac*nvert,sizeof(double));
 Fldz=calloc(nfac*nvert,sizeof(double));
 FldA=calloc(nfac*3,sizeof(double));
 double dTBdA[3]={0};
 dTBdx=calloc(nvert,sizeof(double));
 dTBdy=calloc(nvert,sizeof(double));
 dTBdz=calloc(nvert,sizeof(double));
 double dmudx1,dmudy1,dmudz1,dmudx2,dmudy2,dmudz2,dmudx3,dmudy3,dmudz3;
 double dmu0dx1,dmu0dy1,dmu0dz1,dmu0dx2,dmu0dy2,dmu0dz2,dmu0dx3,dmu0dy3,dmu0dz3;
 double dmudl,dmudb,dmudo,dmu0dl,dmu0db,dmu0do; //Derivatives of mu and mu0
 double dAdx[3],dAdy[3],dAdz[3]; //Facet area derivatives
 double dadx,dady,dadz,dbdx,dbdy,dbdz; //Derivatives of projected vertices
 double E[3],E0[3]; //Earth and Sun direction, rotated
 double side1[3],side2[3];
 double n[3];
 double v1db[3],v2db[3],v3db[3],v1dl[3],v2dl[3],v3dl[3],v1do[3],v2do[3],v3do[3]; //Derivatives of 2d vertices wrt angles
 double vr1[3],vr2[3],vr3[3];
 double v1[3],v2[3],v3[3];
 double complex scale;

 double dp;
 double B,TB=0.0;
 double norm;
 double mut,mu0t;
 int t1,t2,t3;
int j1,j2,j3;
 double mu,mu0,area;
 double *normal;
 int *visible;
 int tb1,tb2,tb3; //Indices to the vertices of possible blocker facet
 int blocked=0;
 //Distance km->arcsec
 dp=1/(dist*149597871.0)*180.0/PI*3600.0;
 visible=calloc(nfac,sizeof(int));
 //Calculate_Frame_Matrix_Derivatives(Eo,angles,TIME,rfreq,R,Rdb,Rdl,Rdo);
  Calculate_Frame_Matrix(Eo,up,R);
  
  
  //Calculate frame change matrix
 
 
 //FacetsOverHorizon(tlist,vlist,nfac,nvert,normal,centroid,NumofBlocks,IndexofBlocks);
 
 rotate(angles[0],angles[1],angles[2],0.0,TIME,M,dMb,dMl,dMo);
 //Construct asteroid->Camera frame matrix, which is
 //asteroid->world frame->camera frame
 transpose(M,Mt); //Transpose, since we rotate the model, not view directions
 transpose(dMb,dMbT);
 transpose(dMl,dMlT);
 transpose(dMo,dMoT);
 mult_mat(R,Mt,RT); 
 mult_vector(M,Eo,E);
 mult_vector(M,E0o,E0);
 
 mult_mat(R,dMbT,Rdb);
 mult_mat(R,dMlT,Rdl);
 mult_mat(R,dMoT,Rdo);
//Derivatives of E,E0 wrt beta,lambda,omega
 mult_vector(dMb,Eo,dEdb);
 mult_vector(dMl,Eo,dEdl);
 mult_vector(dMo,Eo,dEdo);
 mult_vector(dMb,E0o,dE0db);
 mult_vector(dMl,E0o,dE0dl);
 mult_vector(dMo,E0o,dE0do);
 dadx=RT[0][0];
  dady=RT[0][1];
  dadz=RT[0][2];
  dbdx=RT[1][0];
  dbdy=RT[1][1];
  dbdz=RT[1][2];
 /*For each facet,
  * 1)Check if facet is visible
  * 2) Calculate echo
  * 3) Convert triangle to range-Doppler frame
  * 4) Calculate FT
  */
//Find actual blockers
FindActualBlockers(tlist,vlist,nfac,nvert,E,E,1,visible);
 //visible is nfac vector, visible[j]=1 if facet (j+1)th facet is visible
//NOTE INDEXING
Calculate_Radiance(tlist,vlist,nfac,nvert,angles,Eo,E0o,TIME,Gamma, A,Hdist,WL,N,Flux,Fldx,Fldy,Fldz,FldA,1);

 //for(int j=0;j<nfac;j++)
for(int j=0;j<nfac;j++) 
{
   if(visible[j]==0)
     continue;
  //Calculate normal from facet vertices
   //Vertex indices of the current facet
   //Note that C indices from 0, matlab from 1
   j1=tlist[j*3]-1;
   j2=tlist[j*3+1]-1;
   j3=tlist[j*3+2]-1;
   //Current vertices
  
   
   for(int i=0;i<3;i++)
   {
   v1[i]=*(vlist+j1*3+i)*dp; //convert km->arcsec
   v2[i]=*(vlist+j2*3+i)*dp;
   v3[i]=*(vlist+j3*3+i)*dp;
   }
   
    //Calculate Normal derivatives (in the original frame)
    Calculate_Area_and_Normal_Derivative(v1,v2,v3,n,dndx1,dndx2,dndx3,dndy1,dndy2,dndy3,dndz1,dndz2,dndz3,&area,dAdx,dAdy,dAdz);
    
   //Calculate normals and centroids
   
   mu=DOT(E,n);
   
  //Convert to camera frame
   mult_vector(RT,v1,vr1);
   mult_vector(RT,v2,vr2);
   mult_vector(RT,v3,vr3);
  
    
     //Now we should convert to frequency domain, ie calculate the contribution of each facet
  //   Calc_FTC(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0);
     Calc_FTC_deriv(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0,FTda,FTdb,FTdc,FTdd,FTdg,FTdh);
  
    //Derivatives wrt angles
     mult_vector(Rdb,v1,v1db);
     mult_vector(Rdb,v2,v2db);
     mult_vector(Rdb,v3,v3db);
     mult_vector(Rdl,v1,v1dl);
     mult_vector(Rdl,v2,v2dl);
     mult_vector(Rdl,v3,v3dl);
     mult_vector(Rdo,v1,v1do);
     mult_vector(Rdo,v2,v2do);
     mult_vector(Rdo,v3,v3do);
     //Derivatives of mu,mu0
     dmudx1=DOT(E,dndx1);
     dmudx2=DOT(E,dndx2);
     dmudx3=DOT(E,dndx3);
     dmudy1=DOT(E,dndy1);
     dmudy2=DOT(E,dndy2);
     dmudy3=DOT(E,dndy3);
     dmudz1=DOT(E,dndz1);
     dmudz2=DOT(E,dndz2);
     dmudz3=DOT(E,dndz3);
     dmudb=DOT(dEdb,n);
     dmudl=DOT(dEdl,n);
     dmudo=DOT(dEdo,n);
     B=Flux[j];
   //Derivatives of B
  
   dBdx1=Fldx[j*nvert+j1]/dp;
   dBdx2=Fldx[j*nvert+j2]/dp;
   dBdx3=Fldx[j*nvert+j3]/dp;
   
   dBdy1=Fldy[j*nvert+j1]/dp;
   dBdy2=Fldy[j*nvert+j2]/dp;
   dBdy3=Fldy[j*nvert+j3]/dp;
   
   dBdz1=Fldz[j*nvert+j1]/dp;
   dBdz2=Fldz[j*nvert+j2]/dp;
   dBdz3=Fldz[j*nvert+j3]/dp;
   dBdb=FldA[3*j];
   dBdl=FldA[3*j+1];
   dBdo=FldA[3*j+2];
   //Derivative of total brightness
   dTBdx[j1]+=dBdx1*area*mu+B*dAdx[0]*mu+B*area*dmudx1;
   dTBdx[j2]+=dBdx2*area*mu+B*dAdx[1]*mu+B*area*dmudx2;
   dTBdx[j3]+=dBdx3*area*mu+B*dAdx[2]*mu+B*area*dmudx3;
   dTBdy[j1]+=dBdy1*area*mu+B*dAdy[0]*mu+B*area*dmudy1;
   dTBdy[j2]+=dBdy2*area*mu+B*dAdy[1]*mu+B*area*dmudy2;
   dTBdy[j3]+=dBdy3*area*mu+B*dAdy[2]*mu+B*area*dmudy3;
   dTBdz[j1]+=dBdz1*area*mu+B*dAdz[0]*mu+B*area*dmudz1;
   dTBdz[j2]+=dBdz2*area*mu+B*dAdz[1]*mu+B*area*dmudz2;
   dTBdz[j3]+=dBdz3*area*mu+B*dAdz[2]*mu+B*area*dmudz3;
   dTBdA[0]+=dBdb*area*mu+B*area*dmudb;
   dTBdA[1]+=dBdl*area*mu+B*area*dmudl;
   dTBdA[2]+=dBdo*area*mu+B*area*dmudo;
   
     for(int jf=0;jf<nfreq;jf++)
     {
     F[jf]+=B*F0[jf];
     
       
       FTdx[jf*nvert+j1]+=dBdx1*F0[jf]+B*(FTda[jf]*dadx+FTdb[jf]*dbdx);
       FTdx[jf*nvert+j2]+=dBdx2*F0[jf]+B*(FTdc[jf]*dadx+FTdd[jf]*dbdx);
       FTdx[jf*nvert+j3]+=dBdx3*F0[jf]+B*(FTdg[jf]*dadx+FTdh[jf]*dbdx);
       
       FTdy[jf*nvert+j1]+=dBdy1*F0[jf]+B*(FTda[jf]*dady+FTdb[jf]*dbdy);
       FTdy[jf*nvert+j2]+=dBdy2*F0[jf]+B*(FTdc[jf]*dady+FTdd[jf]*dbdy);
       FTdy[jf*nvert+j3]+=dBdy3*F0[jf]+B*(FTdg[jf]*dady+FTdh[jf]*dbdy);
       
       FTdz[jf*nvert+j1]+=dBdz1*F0[jf]+B*(FTda[jf]*dadz+FTdb[jf]*dbdz);
       FTdz[jf*nvert+j2]+=dBdz2*F0[jf]+B*(FTdc[jf]*dadz+FTdd[jf]*dbdz);
       FTdz[jf*nvert+j3]+=dBdz3*F0[jf]+B*(FTdg[jf]*dadz+FTdh[jf]*dbdz);
        //angle derivatives
       
       FTdA[jf*3+0]+=dBdb*F0[jf]+B*(FTda[jf]*v1db[0]+FTdb[jf]*v1db[1]+FTdc[jf]*v2db[0]+FTdd[jf]*v2db[1]+FTdg[jf]*v3db[0]+FTdh[jf]*v3db[1]);
       FTdA[jf*3+1]+=dBdl*F0[jf]+B*(FTda[jf]*v1dl[0]+FTdb[jf]*v1dl[1]+FTdc[jf]*v2dl[0]+FTdd[jf]*v2dl[1]+FTdg[jf]*v3dl[0]+FTdh[jf]*v3dl[1]);
       FTdA[jf*3+2]+=dBdo*F0[jf]+B*(FTda[jf]*v1do[0]+FTdb[jf]*v1do[1]+FTdc[jf]*v2do[0]+FTdd[jf]*v2do[1]+FTdg[jf]*v3do[0]+FTdh[jf]*v3do[1]);
     }
    
      TB=TB+B*area*mu;
 
}

//Normalize with total brightness
double complex temp;
for(int j=0;j<nfreq;j++)
{
  scale=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j]));
  for(int k=0;k<nvert;k++)
  {
      temp=dp*scale*(FTdx[j*nvert+k]*TB-F[j]*dTBdx[k])/pow(TB,2);
    dFdxr[j*nvert+k]=creal(temp);
    dFdxi[j*nvert+k]=cimag(temp);
    temp=dp*scale*(FTdy[j*nvert+k]*TB-F[j]*dTBdy[k])/pow(TB,2);
    dFdyr[j*nvert+k]=creal(temp);
    dFdyi[j*nvert+k]=cimag(temp);
    temp=dp*scale*(FTdz[j*nvert+k]*TB-F[j]*dTBdz[k])/pow(TB,2);
    dFdzr[j*nvert+k]=creal(temp);
    dFdzi[j*nvert+k]=cimag(temp);
  }
  temp=scale*(FTdA[j*3+0]*TB-F[j]*dTBdA[0])/pow(TB,2);
  dFdAr[j*3+0]=creal(temp);
  dFdAi[j*3+0]=cimag(temp);
  temp=scale*(FTdA[j*3+1]*TB-F[j]*dTBdA[1])/pow(TB,2);
  dFdAr[j*3+1]=creal(temp);
  dFdAi[j*3+1]=cimag(temp);
  temp=scale*(FTdA[j*3+2]*TB-F[j]*dTBdA[2])/pow(TB,2);
  dFdAr[j*3+2]=creal(temp);
  dFdAi[j*3+2]=cimag(temp);
  temp=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j]))*F[j]/TB;
  Fr[j]=creal(temp);
  Fi[j]=cimag(temp);
  temp=2.0*PI*I*freqx[j]*F[j];
  dFdoffr[j*2+0]=creal(temp);
  dFdoffi[j*2+0]=cimag(temp);
  temp=2.0*PI*I*freqy[j]*F[j];
  dFdoffr[j*2+1]=creal(temp);
  dFdoffi[j*2+1]=cimag(temp);
}


free(FTdx);
free(FTdy);
free(FTdz);
free(FTdA);
free(dTBdx);
free(dTBdy);
free(dTBdz);

free(FTda);
free(FTdb);
free(FTdc);
free(FTdd);
free(FTdg);
free(FTdh);
free(F0);
free(visible);
free(Flux);
free(Fldx);
free(Fldy);
free(Fldz);
free(FldA);
free(F);
}