void corStruct_recalc(double *Xy, longint *pdims, longint *ZXcol, double *Factor) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; for(i = 0; i < M; i++) { mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Factor += (len[i] * len[i]); } }
int main(){ int n = 750; int* a = (int*) malloc(sizeof(int) * n*n); int* b = (int*) malloc(sizeof(int) * n*n); int* c = (int*) malloc(sizeof(int) * n*n); for(int i = 0; i < n*n; i++) { a[i] = i; b[i] = i/2; } mult_mat(a, b, c, n); return 0; }
void AR1_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double *Factor; /* par assumed in unconstrained form */ *par = safe_phi( *par ); for(i = 0; i < M; i++) { Factor = Calloc(len[i] * len[i], double); AR1_fact(par, &len[i], Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } }
void CAR1_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par, double *time, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double aux = exp(*par); /* parameter assumed in unconstrained form */ *par = aux / (1.0 + aux); for(i = 0; i < M; i++) { double *Factor = Calloc(len[i] * len[i], double); CAR1_fact(par, time + start[i], &len[i], Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } }
void compSymm_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par, double *inf, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double aux = exp(*par); /* parameter assumed in unconstrained form */ *par = (aux + *inf)/(aux + 1.0); for(i = 0; i < M; i++) { double *Factor = Calloc(len[i] * len[i], double); compSymm_fact(par, &len[i], Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } }
void nat_recalc(double *Xy, longint *pdims, longint *ZXcol, double *pars, longint *time, longint *maxC, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double *crr = Calloc(*maxC * (*maxC - 1) / 2, double); /* parameters assumed in unconstrained form */ nat_fullCorr(pars, maxC, crr); for(i = 0; i < M; i++) { double *Factor = Calloc((len[i] * len[i]), double); symm_fact(crr, time + start[i], &len[i], maxC, Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } Free(crr); }
void spatial_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par, double *dist, double *minD, longint *nug, double *logdet) { longint N = pdims[0], M = pdims[1], spClass = pdims[2], *len = pdims + 4, *start = len + M, i; double aux, (*corr)(double ) = dummy_corr, *sXy; /* parameter assumed in unconstrained form */ par[0] = exp(par[0]); if (*nug == 1) { aux = exp(par[1]); par[1] = 1 / (1.0 + aux); /* 1 - nugget */ } switch(spClass) { case 1: /* spherical */ corr = spher_corr; par[0] += *minD; break; case 2: /* exponential */ corr = exp_corr; break; case 3: /* Gaussian */ corr = Gaus_corr; break; case 4: /* linear */ corr = lin_corr; par[0] += *minD; break; case 5: /* rational quadratic */ corr = ratio_corr; break; default: error(_("Unknown spatial correlation class")); break; } for(i = 0, sXy = Xy; i < M; i++) { double *Factor = Calloc(len[i] * len[i], double); spatial_fact(par, dist + start[i], &len[i], nug, corr, Factor, logdet); mult_mat(sXy, N, Factor, len[i], len[i], len[i], sXy, N, *ZXcol); sXy += len[i]; Free(Factor); } }
void ARMA_recalc(double *Xy, longint *pdims, longint *ZXcol, double *pars, longint *p, longint *q, longint *time, longint *maxlag, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double *crr = Calloc(*maxlag + 1L, double); /* parameters assumed in unconstrained form */ ARMA_constCoef(p, q, pars); ARMA_fullCorr(p, q, maxlag, pars, crr); for(i = 0; i < M; i++) { double *Factor = Calloc(len[i] * len[i], double); ARMA_fact(crr, time + start[i], &len[i], Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } }
void HF_recalc(double *Xy, longint *pdims, longint *ZXcol, double *par, longint *time, longint *maxC, double *logdet) { longint N = pdims[0], M = pdims[1], *len = pdims + 4, *start = len + M, i; double inf = -1.0/(2.0 * ((double) *maxC)); /* parameter assumed in unconstrained form */ for(i = 0; i < *maxC; i++) { par[i] = 2.0 * (exp(par[i]) + inf) + 1.0; } for(i = 0; i < M; i++) { double *Factor = Calloc(len[i] * len[i], double); HF_fact(par, time + start[i], &len[i], Factor, logdet); mult_mat(Xy + start[i], N, Factor, len[i], len[i], len[i], Xy + start[i], N, *ZXcol); Free(Factor); } }
double *my_rotation(double *resultat, double *matrice, char **argv, int i) { double matrice_rotation[9]; double a; double Rx; double Ry; double stock; a = atof(argv[i + 1]); a = (a * M_PI) / 180; Rx = cos(a); Ry = sin(a); fill_mat_rot(&matrice_rotation[0], Rx, Ry); mult_mat(&matrice[0], &matrice_rotation[0]); printf("\033[01;37mRotation par rapport à un angle de %s degré(s)\n", argv[i + 1]); stock = resultat[0]; resultat[0] = stock * matrice_rotation[0] + resultat[1] * matrice_rotation[1]; resultat[1] = stock * matrice_rotation[3] + resultat[1] * matrice_rotation[4]; return (resultat); }
double *my_symetrie(double *resultat, double *matrice, char **argv, int i) { double matrice_symetrie[9]; double a; double Sx; double Sy; double stock; a = atof(argv[i + 1]); a = (a * M_PI) / 180; Sx = cos(2 * a); Sy = sin(2 * a); fill_mat_sym(&matrice_symetrie[0], Sx, Sy); mult_mat(&matrice[0], &matrice_symetrie[0]); printf("\033[01;37mSymétrie par rapport à un axe de %s degré(s)\n", argv[i + 1]); stock = resultat[0]; resultat[0] = stock * matrice_symetrie[0] + resultat[1] * matrice_symetrie[1]; resultat[1] = stock * matrice_symetrie[3] + resultat[1] * matrice_symetrie[4]; return (resultat); }
/* ベクトル×行列の計算を行う */ double *vecxmat(double *v, int size, double **mat, int nrow, int ncol) { int i; double *vec = NULL; double **tmp1 = NULL, **tmp2 = NULL; /* ベクトルを1×sizeの2次元の配列にする */ tmp1 = new_double_matrix(1, size); for(i = 0; i < size; i++) tmp1[0][i] = v[i]; /* 行列同士の掛け算を行う */ tmp2 = mult_mat(tmp1, 1, size, mat, nrow, ncol); /* 計算結果を代入する */ vec = new_double_vector(ncol); for(i = 0; i < ncol; i++) vec[i] = tmp2[0][i]; free_double_matrix(tmp1); free_double_matrix(tmp2); return vec; }
/* 行列×ベクトルの計算を行う */ double *matxvec(double **mat, int nrow, int ncol, double *v, int size) { int i; double *vec = NULL; double **tmp1 = NULL, **tmp2 = NULL; /* ベクトルを1×sizeの2次元の配列にする */ tmp1 = new_double_matrix(size, 1); for(i = 0; i < size; i++) tmp1[i][0] = v[i]; /* 行列同士の掛け算を行う */ tmp2 = mult_mat(mat, nrow, ncol, tmp1, size, 1); /* 計算結果を代入する */ vec = new_double_vector(nrow); for(i = 0; i < nrow; i++) vec[i] = tmp2[i][0]; free_double_matrix(tmp1); free_double_matrix(tmp2); return vec; }
void Calculate_HF(int *tlist,double *vlist,int nfac,int nvert,double *angles,double *Eo,double *E0o,double *up,double TIME,double dist,double Gamma,double A,double Hdist,int N,double WL,double *freqx,double *freqy,int nfreq,double *offset,double *Fr,double *Fi) { double complex *F=calloc(nfreq,sizeof(double complex)); double complex *F0; F0=(double complex*)calloc(nfreq,sizeof(double complex)); double *Flux,*Fldx,*Fldy,*Fldz,*FldA; Flux=(double*)calloc(nfac,sizeof(double)); double M[3][3],dMb[3][3],dMo[3][3],dMl[3][3],Mt[3][3]; double R[3][3],Rdb[3][3],Rdl[3][3],Rdo[3][3],RT[3][3]; double E[3],E0[3]; double normalr[3],side1[3],side2[3]; double dechdx[3],dechdy[3],dechdz[3],dechdA[3]; double n[3],*nb,*cent; double *vb1,*vb2,*vb3; double vr1[3],vr2[3],vr3[3]; double *v1,*v2,*v3; double scale; double complex tscale,FTC; double dp; double B,TB=0; double norm; int t1,t2,t3,blocker,sign; int j1,j2,j3; double mu,mu0,area,mub,ech,rexp; double *normal,*centroid; int *visible; int tb1,tb2,tb3; //Indices to the vertices of possible blocker facet int blocked=0; //Distance km->arcsec dp=1/(dist*149597871.0)*180.0/PI*3600.0; visible=calloc(nfac,sizeof(int)); //Allocate for memory // normal=(double*)malloc(3*nfac*sizeof(double)); // centroid=(double*)mxCalloc(3*nfac,sizeof(double)); // IndexofBlocks=(int*)mxCalloc(nfac,sizeof(int)); // NumofBlocks=(int*)mxCalloc(nfac,sizeof(int)); //Calculate frame change matrix Calculate_Frame_Matrix(Eo,up,R); //FacetsOverHorizon(tlist,vlist,nfac,nvert,normal,centroid,NumofBlocks,IndexofBlocks); rotate(angles[0],angles[1],angles[2],0.0,TIME,M,dMb,dMl,dMo); //Construct asteroid->Camera frame matrix, which is //asteroid->world frame->camera frame transpose(M,Mt); //Transpose, since we rotate the model, not view directions mult_mat(R,Mt,RT); mult_vector(M,Eo,E); mult_vector(M,E0o,E0); /*For each facet, * 1)Check if facet is visible * 2) Calculate echo * 3) Convert triangle to range-Doppler frame * 4) Calculate FT */ //Find actual blockers FindActualBlockers(tlist,vlist,nfac,nvert,E,E,1,visible); Calculate_Radiance(tlist,vlist,nfac,nvert,angles,Eo,E0o,TIME,Gamma, A,Hdist,WL,N,Flux,Fldx,Fldy,Fldz,FldA,0); //for(int i=27;i<nfac;i++) // mexPrintf("fl%d: %.10e\n",i+1, Flux[i]); //visible is nfac vector, visible[j]=1 if facet (j+1)th facet is visible //NOTE INDEXING //mexPrintf("%f %f %f\n",vlist[0],vlist[1],vlist[2]); for(int j=0;j<nfac;j++) { if(visible[j]==0) continue; //Calculate normal from facet vertices //Vertex indices of the current facet //Note that C indices from 0, matlab from 1 j1=tlist[j*3]-1; j2=tlist[j*3+1]-1; j3=tlist[j*3+2]-1; //Current vertices v1=vlist+j1*3; v2=vlist+j2*3; v3=vlist+j3*3; //Calculate normals and centroids for(int i=0;i<3;i++) { side1[i]=dp*(v2[i]-v1[i]); //Convert km->arcsec side2[i]=dp*(v3[i]-v1[i]); } cross(side1,side2,n); norm=NORM(n); n[0]=n[0]/norm; n[1]=n[1]/norm; n[2]=n[2]/norm; mu=DOT(E,n); mu0=DOT(E0,n); //Convert to camera frame mult_vector(RT,v1,vr1); mult_vector(RT,v2,vr2); mult_vector(RT,v3,vr3); for(int i=0;i<3;i++) { vr1[i]=dp*vr1[i]; vr2[i]=dp*vr2[i]; vr3[i]=dp*vr3[i]; } //Now we should convert to frequency domain, ie calculate the contribution of each facet Calc_FTC(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0); // printf("Fdd: %f %f\n",creal(FTdd[0]),cimag(FTdd[0])); //Note that we sum to F at each round, does not work, we need to multiply with the echo //Derivatives wrt angles area=0.5*norm; //if(j==0) //{ // mexPrintf("area: %f mu: %f F0: %f\n",area,mu,F0[0]); //} //mexPrintf("area: %f normal: %f %f %f mu: %f mu0: %f\n",area,n[0],n[1],n[2],mu,mu0); B=Flux[j]; for(int jf=0;jf<nfreq;jf++) { //This should be taken outside of the loop // mexPrintf("scale:%f offset: %f %f\n",scale,creal(cexp(2*PI*I*(offset[0]*freqx[jf]+offset[1]*freqy[jf]))),cimag(cexp(2*PI*I*(offset[0]*freqx[jf]+offset[1]*freqy[jf])))); //FTC=tscale*F0[jf]; F[jf]+=B*F0[jf]; } TB=TB+B*area*mu; // printf("Flux: %f area: %f mu: %f\n",B,area,mu); } //printf("Total brightness: %f\n",TB); //Normalize with total brightness double complex temp; for(int j=0;j<nfreq;j++) { temp=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j]))*F[j]/TB; Fr[j]=creal(temp); Fi[j]=cimag(temp); } free(visible); free(Flux); free(F0); free(F); }
int main(int argc, char** argv) { double serial_time, openCL_time, start_time; cl_int err; cl_platform_id* platforms = NULL; char platform_name[1024]; cl_device_id device_id = NULL; cl_uint num_of_platforms = 0; cl_uint num_of_devices = 0; cl_context context; cl_kernel kernel; cl_command_queue command_queue; cl_program program; cl_mem input1, input2, input3, output; float **A, **B, **C, **serialC; // matrices int d1, d2, d3; // dimensions of matrices /* print user instruction */ if (argc != 4) { printf("Matrix multiplication: C = A x B\n"); printf("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); return 0; } /* read user input */ d1 = 1000; // rows of A and C d2 = 1000; // cols of A and rows of B d3 = 1000; // cols of B and C int d[4] = { 0, d1, d2, d3 }; size_t global[1] = { (size_t)d1*d3 }; printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3); /* prepare matrices */ A = alloc_mat(d1, d2); init_mat(A, d1, d2); B = alloc_mat(d2, d3); init_mat(B, d2, d3); C = alloc_mat(d1, d3); serialC = alloc_mat(d1, d3); err = clGetPlatformIDs(0, NULL, &num_of_platforms); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } platforms = (cl_platform_id *)malloc(num_of_platforms); err = clGetPlatformIDs(num_of_platforms, platforms, NULL); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } else { int nvidia_platform = 0; for (unsigned int i = 0; i<num_of_platforms; i++) { clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL); if (err != CL_SUCCESS) { printf("Could not get information about platform. Error: %d\n", err); return 0; } if (strstr(platform_name, "NVIDIA") != NULL) { nvidia_platform = i; break; } } err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices); if (err != CL_SUCCESS) { printf("Could not get device in platform. Error: %d\n", err); return 0; } } context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (err != CL_SUCCESS) { printf("Unable to create context. Error: %d\n", err); return 0; } command_queue = clCreateCommandQueue(context, device_id, 0, &err); if (err != CL_SUCCESS) { printf("Unable to create command queue. Error: %d\n", err); return 0; } program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, NULL, &err); if (err != CL_SUCCESS) { printf("Unable to create program. Error: %d\n", err); return 0; } if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS) { char *log; size_t size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); // 1. Länge des Logbuches? log = (char *)malloc(size + 1); if (log) { clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); // 2. Hole das Logbuch ab log[size] = '\0'; printf("%s", log); free(log); } return 1; } err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { printf("Error building program. Error: %d\n", err); return 0; } kernel = clCreateKernel(program, "matmult_ocl", &err); if (err != CL_SUCCESS) { printf("Error setting kernel. Error: %d\n", err); return 0; } input1 = clCreateBuffer(context, CL_MEM_READ_ONLY, d1*d2*sizeof(float), NULL, &err); input2 = clCreateBuffer(context, CL_MEM_READ_ONLY, d2*d3*sizeof(float), NULL, &err); input3 = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(int), NULL, &err); output = clCreateBuffer(context, CL_MEM_READ_WRITE, d1*d3*sizeof(float), NULL, &err); start_time = omp_get_wtime(); clEnqueueWriteBuffer(command_queue, input1, CL_TRUE, 0, d1*d2*sizeof(float), *A, 0, NULL, NULL); clEnqueueWriteBuffer(command_queue, input2, CL_TRUE, 0, d2*d3*sizeof(float), *B, 0, NULL, NULL); clEnqueueWriteBuffer(command_queue, input3, CL_TRUE, 0, 4 * sizeof(int), d, 0, NULL, NULL); clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1); clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2); clSetKernelArg(kernel, 2, sizeof(cl_mem), &input3); clSetKernelArg(kernel, 3, sizeof(cl_mem), &output); clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); clFinish(command_queue); clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, d1*d3*sizeof(float), *C, 0, NULL, NULL); // for (unsigned int i = 0; i < (unsigned int) d1*d3; i++) // printf("%f\n", C[0][i]); openCL_time = omp_get_wtime() - start_time; clReleaseMemObject(input1); clReleaseMemObject(input2); clReleaseMemObject(input3); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(command_queue); clReleaseContext(context); printf("Running serial algorithm...\n"); start_time = omp_get_wtime(); serialC = mult_mat(A, B, d1, d2, d3); serial_time = omp_get_wtime() - start_time; printf("Checking results... "); is_correct(C, serialC, d1, d3); printf("Showing stats...\n"); printf(" serial runtime = %f\n", serial_time); printf(" OpenCL runtime = %f\n", openCL_time); printf(" Speedup = %f\n", serial_time / openCL_time); return 0; }
int main(int argc, char *argv[]) { FILE *mat; FILE *vec; FILE *end; double main_buf[MAXSIZE]; double main_vec_buf[MAXSIZE]; Cnum mat_buf[MAXSIZE]; Cnum vec_buf[MAXSIZE]; Cnum result[MAXSIZE]; double Row=0, Col=0; double vec_row; double mat_RC_data[6]; long mat_in=0; int j=0; int thread=atoi(argv[3]); double final_result[2]; mat = fopen("matrix.dat","rb"); vec = fopen("vector_input.dat","rb"); end = fopen("vector_output.dat","wb"); // exe input number 검사 if (argc == 4 && atoi(argv[1]) && atoi(argv[2])) printf("input = %d x %d, Thread = %d \n",atoi(argv[1]), atoi(argv[2]),atoi(argv[3])); else { fputs("[Error] Not a correct input", stderr); exit(1); } // 파일 유무 검사 if(mat==NULL) { fputs("Matrix File error", stderr); exit(1); } if(vec==NULL) { fputs("Vector File error", stderr); exit(1); } start_time_measurement(); //시간측정 시작 // 매트릭스 크기 int point; for(int i=3;i>0;i--) { point = i*4*sizeof(double); point = -point; if(fseek(mat, point, SEEK_END)==-1) { printf("error\n"); } fread(&mat_RC_data[j],sizeof(double),2,mat); Row=mat_RC_data[j]+1; j++; Col=mat_RC_data[j]+1; j++; //printf("row : %f, col : %f\n",Row,Col); } // matrix.dat 행 열값 부분 오류 검사 if(mat_RC_data[4]-mat_RC_data[2]>1) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[2]-mat_RC_data[0]>1) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[5]-mat_RC_data[3]>2) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[3]-mat_RC_data[1]>2) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } //커맨드 입력 행열 과 입력받은 매트릭스 비교 if(atoi(argv[1])!=Row) { fputs("[ERROR] 입력한 행값과 매트릭스의 행값이 다릅니다.", stderr); exit(1); } if(atoi(argv[2])!=Col) { fputs("[ERROR] 입력한 열값과 매트릭스의 열값이 다릅니다.", stderr); exit(1); } printf("전체 열의 값 = %f\n전체 행의 값 = %f \n", Row, Col); //vec 행렬 행 크기 fseek(vec,0,SEEK_END); vec_row = ftell(vec)/(sizeof(double)*2); printf("벡터행렬 행 값 = %f\n",vec_row); //행렬 크기가 다를 시 오류 메세지 출력 if(Col!=vec_row) { fputs("[ERROR] Matrix and Vector are not same", stderr); exit(1); } //행렬값 꺼내서 연산 j=0; int button=0; fseek(mat,0,SEEK_SET); //파일 포인터 위치 초기화 fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화 fseek(end,0,SEEK_SET); //파일 포인터 위치 초기화 for(int k=0; k<Row; k++) { //인덱스 초기화 button =0; j=0; fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화 //mat 행렬 가져와서 mat_buf[]에 저장 100개씩 -> ////vec 행의 갯수에 따라로 수정해야함 //// 일단은 100개단위로 for(int i=0; i<Col*4; i++) { fread(&main_buf[i], sizeof(double), 1, mat); //printf("값 : %f ", main_buf[i]); if(i>3 && i%4==0) j++; // 구조체에 값저장 if(button==2) mat_buf[j].realnum=main_buf[i]; else if(button==3) { mat_buf[j].imanum=main_buf[i]; } button++; if(button==4) button=0; } //vec 행렬 가져와서 vec_buf[]에 저장 j=0; for(int y=0; y<Col*2; y++) { fread(&main_vec_buf[y],sizeof(double),1,vec); if(y>1&&y%2==0) j++; if(y%2==0) vec_buf[j].realnum = main_vec_buf[y]; else { vec_buf[j].imanum = main_vec_buf[y]; } } // mat * vec /* for(int i=0; i<Col; i++) result[i]=mult(mat_buf[i],vec_buf[i]); */ mult_mat(mat_buf,vec_buf,result,(int)Col,thread); result[0]=sum_mat(result,(int)Col); /* for(int i=1; i<Col; i++) { result[0]=sum(result[0],result[i]); } */ fwrite(&result[0].realnum,sizeof(double),1,end); fwrite(&result[0].imanum,sizeof(double),1,end); //printf("%f, %f \n",final_result[0].realnum,final_result[0].imanum); } printf("Calculation Complete "); end_time_measurement(); //시간 측정 끝 fclose(mat); fclose(vec); fclose(end); return 0; }
int main (void){ //Creación de variables del sistema int *a, *b, *c, N; int i,j; int iteraciones=100,ind=0; printf("Ingrese el tamano deseado para las matrices:\n"); scanf("%d",&N); printf("Creando espacio e inicializando matrices...\n"); //Asignación e inicialización de memoria a=(int*)malloc(N*N*sizeof(int)); b=(int*)malloc(N*N*sizeof(int)); c=(int*)malloc(N*N*sizeof(int)); for(i=0;i<N;i++) { for(j=0;j<N;j++) { a[i*N+j]=i*j; b[i*N+j]=i*j; c[i*N+j]=0; } } //Cálculo de bloques e hilos printf("Se va a realizar %d iteraciones de matrices %dx%d\n",iteraciones,N,N); //Ejecución de kernel clock_t start = clock(); while(ind<iteraciones) { mult_mat(a,b,c,N); ind++; } clock_t end = clock(); float seconds = (float)(end - start) / CLOCKS_PER_SEC; printf("El tiempo tomado para %d iteraciones fue de %3.5f ms\n",iteraciones,seconds*10); /* for(i=0;i<N;i++) { printf("\n"); for(j=0;j<N;j++) { printf("\t%d",a[i*N+j]); } //printf("\t"); for(j=0;j<N;j++) { printf("\t%d",b[i*N+j]); } //printf("\t"); for(j=0;j<N;j++) { printf("\t%d",c[i*N+j]); } }*/ free(a); free(b); free(c); return 0; }
/** **/ int main (int argc, char* argv[]) { int WORK_DIM = 2; // Wie viele Dimensionen hat der Indexraum? std::chrono::time_point<std::chrono::system_clock> s_start, s_end, p_start, p_end; // Lese den Kernel dynamisch ein: (uebernommen von Foliensatz 9, Folie 20) FILE *fp; const char *FileName = "matmult.cl"; char *KernelSource; fp = fopen(FileName, "r"); if (!fp) { printf("Can't open kernel source: %s", FileName); exit(1); } KernelSource = (char *)malloc(MAX_SOURCE_SIZE); size_t kernel_s_size = fread(KernelSource, 1, MAX_SOURCE_SIZE, fp); fclose(fp); cl_int err; cl_platform_id* platforms = NULL; char platform_name[1024]; cl_device_id device_id = NULL; cl_uint num_of_platforms = 0, num_of_devices = 0; cl_context context; cl_kernel kernel; cl_command_queue command_queue; cl_program program; err = clGetPlatformIDs(0, NULL, &num_of_platforms); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } // Liefert Plattformen platforms = (cl_platform_id *)malloc(num_of_platforms); err = clGetPlatformIDs(num_of_platforms, platforms, NULL); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } else { int nvidia_platform = 0; // Speichert den Rang der letzten NVIDIA-Plattform for (unsigned int i=0; i<num_of_platforms; i++) // Fuer jede Plattform: { clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL); if (err != CL_SUCCESS) { printf("Could not get information about platform. Error: %d\n", err); return 0; } if (strstr(platform_name, "NVIDIA") != NULL) { // Falls die Plattform eine NVIDIA-Plattform ist: Speichere ihren Rang nvidia_platform = i; break; } } // Gibt die ID des Devices der NVIDIA-Plattform zurueck err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices); if (err != CL_SUCCESS) { printf("Could not get device in platform. Error: %d\n", err); return 0; } } // Erschaffe einen OpenCl-context, in dem OpenCl-Datenobjekte verwaltet werden koennen context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (err != CL_SUCCESS) { printf("Unable to create context. Error: %d\n", err); return 0; } // Initialisiere eine Befehlswarteschleife, die Befehle fuer OpenCl-Objekte speichern kann command_queue = clCreateCommandQueue(context, device_id, 0, &err); if (err != CL_SUCCESS) { printf("Unable to create command queue. Error: %d\n", err); return 0; } // Initialisiere ein Programm und spezifiziere, aus welchem Code dieses kompiliert werden soll program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, (const size_t *)& kernel_s_size, &err); if (err != CL_SUCCESS) { printf("Unable to create program. Error: %d\n", err); return 0; } // Kompiliere das Programm zur Laufzeit err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { // Zeige Compilermeldungen an: (uebernommen von Foliensatz 9, Folie 23) char *log; size_t size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); log = (char *)malloc(size+1); if (log) { clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); log[size] = '\0'; printf("%s", log); free(log); } printf("Error building program. Error: %d\n", err); return 0; } // Erschaffe einen Kernel und lade oben kompiliertes Programm ein kernel = clCreateKernel(program, "matmult", &err); if (err != CL_SUCCESS) { printf("Error setting kernel. Error: %d\n", err); return 0; } float **A, **B, **C; // Matrizen int dim1, dim2, dim3; // Matrixdimensionen dim1 = D1; // Zeilen von A, Zeilen von C dim2 = D2; // Spalten von A, Zeilen von B dim3 = D3; // Spalten von B, Spalten von C A = alloc_mat(dim1, dim2); B = alloc_mat(dim2, dim3); C = alloc_mat(dim1, dim3); init_mat(A, dim1, dim2); init_mat(B, dim2, dim3); cl_mem in_A, in_B, output; // float data[DATA_SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; size_t global[1] = {dim1*dim3}; // Dimensionen von C size_t global_two[2] = {dim1, dim3}; in_A = clCreateBuffer (context, CL_MEM_READ_ONLY, sizeof(float)*dim1*dim2, NULL, &err); in_B = clCreateBuffer (context, CL_MEM_READ_ONLY, sizeof(float)*dim2*dim3, NULL, &err); output = clCreateBuffer (context, CL_MEM_WRITE_ONLY, sizeof(float)*dim1*dim3, NULL, &err); clEnqueueWriteBuffer(command_queue, in_A, CL_TRUE, 0, sizeof(float)*dim1*dim2, *A, 0, NULL, NULL); clEnqueueWriteBuffer(command_queue, in_B, CL_TRUE, 0, sizeof(float)*dim2*dim3, *B, 0, NULL, NULL); clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_A); clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_B); clSetKernelArg(kernel, 2, sizeof(cl_mem), &output); // clSetKernelArg(kernel, 3, sizeof(int), &dim2); // clSetKernelArg(kernel, 4, sizeof(int), &dim3); clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); if (WORK_DIM == 2) { clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_two, NULL, 0, NULL, NULL); } // Zeitmessung fuer parallele Version p_start = std::chrono::system_clock::now(); err = clFinish(command_queue); p_end = std::chrono::system_clock::now(); std::chrono::duration<double> p_duration = p_end - p_start; if (err == CL_INVALID_COMMAND_QUEUE ) { printf("CL_INVALID_COMMAND_QUEUE: %d\n", err); return 0; } clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float)*dim1*dim3, *C, 0, NULL, NULL); // Ueberpruefe, ob serielle Version und parallele gleich sind: float **correct_matrix; correct_matrix = alloc_mat(dim1, dim3); s_start = std::chrono::system_clock::now(); // Zeitmessung fuer serielle Version correct_matrix = mult_mat(A, B, dim1, dim2, dim3); s_end = std::chrono::system_clock::now(); std::chrono::duration<double> s_duration = s_end - s_start; is_correct(C, correct_matrix, dim1, dim3); // Numerischer Korrektheitsbeweis print_mat(C, dim1, dim3, "C = "); print_mat(correct_matrix, dim1, dim3, "correct_matrix = "); // printf("Kernel execution time: %f\n", t_end-t_start); clReleaseMemObject(in_A); clReleaseMemObject(in_B); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); err = clReleaseCommandQueue(command_queue); //!! if (err != CL_SUCCESS) { printf("Error releasing command queue: %d\n", err); return 0; } clReleaseContext(context); printf("Dauer der seriellen Version: %.2f Millisekunden\n", s_duration.count() * 1000); printf("Dauer der parallelen Version: %.2f Millisekunden\n", p_duration.count() * 1000); printf("Erhaltenes Speed Up: %.2f \n", p_duration.count() / p_duration.count()); return 0; }
void mult_mat4(mat4 a, mat4 b, mat4 c) { mult_mat(a,b,c,4); }
void Calculate_HF_deriv(int *tlist,double *vlist,int nfac,int nvert,double *angles,double *Eo,double *E0o,double *up,double TIME,double dist,double Gamma,double A,double Hdist,int N,double WL,double *freqx,double *freqy,int nfreq,double *offset,double *Fr,double *Fi,double *dFdxr,double *dFdxi,double *dFdyr,double *dFdyi,double *dFdzr,double *dFdzi,double *dFdAr,double *dFdAi,double *dFdoffr,double *dFdoffi) { double complex *F=calloc(nfreq,sizeof(double complex)); double complex *F0,*FTda,*FTdb,*FTdc,*FTdd,*FTdh,*FTdg,*FTdx,*FTdy,*FTdz,*FTdA; FTdx=calloc(nfreq*nvert,sizeof(double complex)); FTdy=calloc(nfreq*nvert,sizeof(double complex)); FTdz=calloc(nfreq*nvert,sizeof(double complex)); FTdA=calloc(nfreq*3,sizeof(double complex)); F0=calloc(nfreq,sizeof(double complex)); FTda=malloc(nfreq*sizeof(double complex)); FTdb=malloc(nfreq*sizeof(double complex)); FTdc=malloc(nfreq*sizeof(double complex)); FTdd=malloc(nfreq*sizeof(double complex)); FTdh=malloc(nfreq*sizeof(double complex)); FTdg=malloc(nfreq*sizeof(double complex)); double M[3][3],dMb[3][3],dMo[3][3],dMl[3][3],Mt[3][3],dMbT[3][3],dMoT[3][3],dMlT[3][3]; //Rotation matrices and their derivatives and transposes double R[3][3],Rdb[3][3],Rdl[3][3],Rdo[3][3],RT[3][3]; //Projection matrix, and derivatives of combined projection+rotation matrix double dEdb[3],dEdl[3],dEdo[3],dE0db[3],dE0dl[3],dE0do[3]; double dndx1[3],dndx2[3],dndx3[3],dndy1[3],dndy2[3],dndy3[3],dndz1[3],dndz2[3],dndz3[3]; //Derivatives of the facet normal vector double dBdx1,dBdy1,dBdz1,dBdx2,dBdy2,dBdz2,dBdx3,dBdy3,dBdz3,dBdb,dBdl,dBdo; //Derivatives of facet brightness double *dTBdx,*dTBdy,*dTBdz; //Derivatives of total brightness, allocating memory double *Flux,*Fldx,*Fldy,*Fldz,*FldA; Flux=calloc(nfac,sizeof(double)); Fldx=calloc(nfac*nvert,sizeof(double)); Fldy=calloc(nfac*nvert,sizeof(double)); Fldz=calloc(nfac*nvert,sizeof(double)); FldA=calloc(nfac*3,sizeof(double)); double dTBdA[3]={0}; dTBdx=calloc(nvert,sizeof(double)); dTBdy=calloc(nvert,sizeof(double)); dTBdz=calloc(nvert,sizeof(double)); double dmudx1,dmudy1,dmudz1,dmudx2,dmudy2,dmudz2,dmudx3,dmudy3,dmudz3; double dmu0dx1,dmu0dy1,dmu0dz1,dmu0dx2,dmu0dy2,dmu0dz2,dmu0dx3,dmu0dy3,dmu0dz3; double dmudl,dmudb,dmudo,dmu0dl,dmu0db,dmu0do; //Derivatives of mu and mu0 double dAdx[3],dAdy[3],dAdz[3]; //Facet area derivatives double dadx,dady,dadz,dbdx,dbdy,dbdz; //Derivatives of projected vertices double E[3],E0[3]; //Earth and Sun direction, rotated double side1[3],side2[3]; double n[3]; double v1db[3],v2db[3],v3db[3],v1dl[3],v2dl[3],v3dl[3],v1do[3],v2do[3],v3do[3]; //Derivatives of 2d vertices wrt angles double vr1[3],vr2[3],vr3[3]; double v1[3],v2[3],v3[3]; double complex scale; double dp; double B,TB=0.0; double norm; double mut,mu0t; int t1,t2,t3; int j1,j2,j3; double mu,mu0,area; double *normal; int *visible; int tb1,tb2,tb3; //Indices to the vertices of possible blocker facet int blocked=0; //Distance km->arcsec dp=1/(dist*149597871.0)*180.0/PI*3600.0; visible=calloc(nfac,sizeof(int)); //Calculate_Frame_Matrix_Derivatives(Eo,angles,TIME,rfreq,R,Rdb,Rdl,Rdo); Calculate_Frame_Matrix(Eo,up,R); //Calculate frame change matrix //FacetsOverHorizon(tlist,vlist,nfac,nvert,normal,centroid,NumofBlocks,IndexofBlocks); rotate(angles[0],angles[1],angles[2],0.0,TIME,M,dMb,dMl,dMo); //Construct asteroid->Camera frame matrix, which is //asteroid->world frame->camera frame transpose(M,Mt); //Transpose, since we rotate the model, not view directions transpose(dMb,dMbT); transpose(dMl,dMlT); transpose(dMo,dMoT); mult_mat(R,Mt,RT); mult_vector(M,Eo,E); mult_vector(M,E0o,E0); mult_mat(R,dMbT,Rdb); mult_mat(R,dMlT,Rdl); mult_mat(R,dMoT,Rdo); //Derivatives of E,E0 wrt beta,lambda,omega mult_vector(dMb,Eo,dEdb); mult_vector(dMl,Eo,dEdl); mult_vector(dMo,Eo,dEdo); mult_vector(dMb,E0o,dE0db); mult_vector(dMl,E0o,dE0dl); mult_vector(dMo,E0o,dE0do); dadx=RT[0][0]; dady=RT[0][1]; dadz=RT[0][2]; dbdx=RT[1][0]; dbdy=RT[1][1]; dbdz=RT[1][2]; /*For each facet, * 1)Check if facet is visible * 2) Calculate echo * 3) Convert triangle to range-Doppler frame * 4) Calculate FT */ //Find actual blockers FindActualBlockers(tlist,vlist,nfac,nvert,E,E,1,visible); //visible is nfac vector, visible[j]=1 if facet (j+1)th facet is visible //NOTE INDEXING Calculate_Radiance(tlist,vlist,nfac,nvert,angles,Eo,E0o,TIME,Gamma, A,Hdist,WL,N,Flux,Fldx,Fldy,Fldz,FldA,1); //for(int j=0;j<nfac;j++) for(int j=0;j<nfac;j++) { if(visible[j]==0) continue; //Calculate normal from facet vertices //Vertex indices of the current facet //Note that C indices from 0, matlab from 1 j1=tlist[j*3]-1; j2=tlist[j*3+1]-1; j3=tlist[j*3+2]-1; //Current vertices for(int i=0;i<3;i++) { v1[i]=*(vlist+j1*3+i)*dp; //convert km->arcsec v2[i]=*(vlist+j2*3+i)*dp; v3[i]=*(vlist+j3*3+i)*dp; } //Calculate Normal derivatives (in the original frame) Calculate_Area_and_Normal_Derivative(v1,v2,v3,n,dndx1,dndx2,dndx3,dndy1,dndy2,dndy3,dndz1,dndz2,dndz3,&area,dAdx,dAdy,dAdz); //Calculate normals and centroids mu=DOT(E,n); //Convert to camera frame mult_vector(RT,v1,vr1); mult_vector(RT,v2,vr2); mult_vector(RT,v3,vr3); //Now we should convert to frequency domain, ie calculate the contribution of each facet // Calc_FTC(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0); Calc_FTC_deriv(freqx,freqy,nfreq,vr1[0],vr1[1],vr2[0],vr2[1],vr3[0],vr3[1],F0,FTda,FTdb,FTdc,FTdd,FTdg,FTdh); //Derivatives wrt angles mult_vector(Rdb,v1,v1db); mult_vector(Rdb,v2,v2db); mult_vector(Rdb,v3,v3db); mult_vector(Rdl,v1,v1dl); mult_vector(Rdl,v2,v2dl); mult_vector(Rdl,v3,v3dl); mult_vector(Rdo,v1,v1do); mult_vector(Rdo,v2,v2do); mult_vector(Rdo,v3,v3do); //Derivatives of mu,mu0 dmudx1=DOT(E,dndx1); dmudx2=DOT(E,dndx2); dmudx3=DOT(E,dndx3); dmudy1=DOT(E,dndy1); dmudy2=DOT(E,dndy2); dmudy3=DOT(E,dndy3); dmudz1=DOT(E,dndz1); dmudz2=DOT(E,dndz2); dmudz3=DOT(E,dndz3); dmudb=DOT(dEdb,n); dmudl=DOT(dEdl,n); dmudo=DOT(dEdo,n); B=Flux[j]; //Derivatives of B dBdx1=Fldx[j*nvert+j1]/dp; dBdx2=Fldx[j*nvert+j2]/dp; dBdx3=Fldx[j*nvert+j3]/dp; dBdy1=Fldy[j*nvert+j1]/dp; dBdy2=Fldy[j*nvert+j2]/dp; dBdy3=Fldy[j*nvert+j3]/dp; dBdz1=Fldz[j*nvert+j1]/dp; dBdz2=Fldz[j*nvert+j2]/dp; dBdz3=Fldz[j*nvert+j3]/dp; dBdb=FldA[3*j]; dBdl=FldA[3*j+1]; dBdo=FldA[3*j+2]; //Derivative of total brightness dTBdx[j1]+=dBdx1*area*mu+B*dAdx[0]*mu+B*area*dmudx1; dTBdx[j2]+=dBdx2*area*mu+B*dAdx[1]*mu+B*area*dmudx2; dTBdx[j3]+=dBdx3*area*mu+B*dAdx[2]*mu+B*area*dmudx3; dTBdy[j1]+=dBdy1*area*mu+B*dAdy[0]*mu+B*area*dmudy1; dTBdy[j2]+=dBdy2*area*mu+B*dAdy[1]*mu+B*area*dmudy2; dTBdy[j3]+=dBdy3*area*mu+B*dAdy[2]*mu+B*area*dmudy3; dTBdz[j1]+=dBdz1*area*mu+B*dAdz[0]*mu+B*area*dmudz1; dTBdz[j2]+=dBdz2*area*mu+B*dAdz[1]*mu+B*area*dmudz2; dTBdz[j3]+=dBdz3*area*mu+B*dAdz[2]*mu+B*area*dmudz3; dTBdA[0]+=dBdb*area*mu+B*area*dmudb; dTBdA[1]+=dBdl*area*mu+B*area*dmudl; dTBdA[2]+=dBdo*area*mu+B*area*dmudo; for(int jf=0;jf<nfreq;jf++) { F[jf]+=B*F0[jf]; FTdx[jf*nvert+j1]+=dBdx1*F0[jf]+B*(FTda[jf]*dadx+FTdb[jf]*dbdx); FTdx[jf*nvert+j2]+=dBdx2*F0[jf]+B*(FTdc[jf]*dadx+FTdd[jf]*dbdx); FTdx[jf*nvert+j3]+=dBdx3*F0[jf]+B*(FTdg[jf]*dadx+FTdh[jf]*dbdx); FTdy[jf*nvert+j1]+=dBdy1*F0[jf]+B*(FTda[jf]*dady+FTdb[jf]*dbdy); FTdy[jf*nvert+j2]+=dBdy2*F0[jf]+B*(FTdc[jf]*dady+FTdd[jf]*dbdy); FTdy[jf*nvert+j3]+=dBdy3*F0[jf]+B*(FTdg[jf]*dady+FTdh[jf]*dbdy); FTdz[jf*nvert+j1]+=dBdz1*F0[jf]+B*(FTda[jf]*dadz+FTdb[jf]*dbdz); FTdz[jf*nvert+j2]+=dBdz2*F0[jf]+B*(FTdc[jf]*dadz+FTdd[jf]*dbdz); FTdz[jf*nvert+j3]+=dBdz3*F0[jf]+B*(FTdg[jf]*dadz+FTdh[jf]*dbdz); //angle derivatives FTdA[jf*3+0]+=dBdb*F0[jf]+B*(FTda[jf]*v1db[0]+FTdb[jf]*v1db[1]+FTdc[jf]*v2db[0]+FTdd[jf]*v2db[1]+FTdg[jf]*v3db[0]+FTdh[jf]*v3db[1]); FTdA[jf*3+1]+=dBdl*F0[jf]+B*(FTda[jf]*v1dl[0]+FTdb[jf]*v1dl[1]+FTdc[jf]*v2dl[0]+FTdd[jf]*v2dl[1]+FTdg[jf]*v3dl[0]+FTdh[jf]*v3dl[1]); FTdA[jf*3+2]+=dBdo*F0[jf]+B*(FTda[jf]*v1do[0]+FTdb[jf]*v1do[1]+FTdc[jf]*v2do[0]+FTdd[jf]*v2do[1]+FTdg[jf]*v3do[0]+FTdh[jf]*v3do[1]); } TB=TB+B*area*mu; } //Normalize with total brightness double complex temp; for(int j=0;j<nfreq;j++) { scale=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j])); for(int k=0;k<nvert;k++) { temp=dp*scale*(FTdx[j*nvert+k]*TB-F[j]*dTBdx[k])/pow(TB,2); dFdxr[j*nvert+k]=creal(temp); dFdxi[j*nvert+k]=cimag(temp); temp=dp*scale*(FTdy[j*nvert+k]*TB-F[j]*dTBdy[k])/pow(TB,2); dFdyr[j*nvert+k]=creal(temp); dFdyi[j*nvert+k]=cimag(temp); temp=dp*scale*(FTdz[j*nvert+k]*TB-F[j]*dTBdz[k])/pow(TB,2); dFdzr[j*nvert+k]=creal(temp); dFdzi[j*nvert+k]=cimag(temp); } temp=scale*(FTdA[j*3+0]*TB-F[j]*dTBdA[0])/pow(TB,2); dFdAr[j*3+0]=creal(temp); dFdAi[j*3+0]=cimag(temp); temp=scale*(FTdA[j*3+1]*TB-F[j]*dTBdA[1])/pow(TB,2); dFdAr[j*3+1]=creal(temp); dFdAi[j*3+1]=cimag(temp); temp=scale*(FTdA[j*3+2]*TB-F[j]*dTBdA[2])/pow(TB,2); dFdAr[j*3+2]=creal(temp); dFdAi[j*3+2]=cimag(temp); temp=cexp(2.0*PI*I*(offset[0]*freqx[j]+offset[1]*freqy[j]))*F[j]/TB; Fr[j]=creal(temp); Fi[j]=cimag(temp); temp=2.0*PI*I*freqx[j]*F[j]; dFdoffr[j*2+0]=creal(temp); dFdoffi[j*2+0]=cimag(temp); temp=2.0*PI*I*freqy[j]*F[j]; dFdoffr[j*2+1]=creal(temp); dFdoffi[j*2+1]=cimag(temp); } free(FTdx); free(FTdy); free(FTdz); free(FTdA); free(dTBdx); free(dTBdy); free(dTBdz); free(FTda); free(FTdb); free(FTdc); free(FTdd); free(FTdg); free(FTdh); free(F0); free(visible); free(Flux); free(Fldx); free(Fldy); free(Fldz); free(FldA); free(F); }