double * multipleLinearRegression(double **X, double *y, int n, int m){ double **A, **XT, **B, *b, *x, **Y; /* int i,j; */ Y=vectorToMatrix(y,m,1); x=allocateDoubleVector(n); XT=trasposeMatrix(X,n,m); /* XT.X.x = XT.b */ A=multiplyMatrix(XT,X,m,n,n); B=multiplyMatrix(XT,Y,m,n,1); b=matrixToVector(B,n,1); /* for (i=0; i<n; i++){ for (j=0; j<n; j++){ fprintf(stdout,"%f ",A[i][j]); } fprintf(stdout,"\n"); } for(i=0;i<n;i++) fprintf(stdout,"%f ",b[i]); fprintf(stdout,"\n"); */ x = gaussianElimination (n, A, b); return x; }
//----------------------------------------------------------------------------- // Returns the eigendecomposition A = X*D*X^-1. // d is the diagonal entries of D. // X and d must be preallocated: X should be n x n and d should be length n. //----------------------------------------------------------------------------- void eigendecomp (double **A, double complex **X, double complex *d, int n) { double *a; double complex **Xtmp; double complex *dtmp; gsl_matrix_view m; gsl_vector_complex *eval; gsl_matrix_complex *evec; gsl_eigen_nonsymmv_workspace *w; // Use GSL routine to compute eigenvalues and eigenvectors a = matrixToVector (A, n, n); m = gsl_matrix_view_array (a, n, n); eval = gsl_vector_complex_alloc (n); evec = gsl_matrix_complex_alloc (n, n); w = gsl_eigen_nonsymmv_alloc (n); gsl_eigen_nonsymmv (&m.matrix, eval, evec, w); gsl_eigen_nonsymmv_free (w); // Convert from GSL to intrinsic types Xtmp = gslmToCx (evec, n, n); dtmp = gslvToCx (eval, n); copycm_inplace (X, Xtmp, n, n); copycv_inplace (d, dtmp, n); freecmatrix(Xtmp, n); free(a); free(dtmp); gsl_vector_complex_free(eval); gsl_matrix_complex_free(evec); }
void transpose (Real **b, int size, int *len, int *disp, int rank, int m){ int i, *sendcounts, *rdispls; Real *sendbuf, *recvbuf; sendbuf = createRealArray (m * len[rank]); recvbuf = createRealArray (m * len[rank]); sendcounts = calloc(size,sizeof(int)); rdispls = calloc(size,sizeof(int)); matrixToVector(b,sendbuf,len,disp, size, rank); int index = 0; for (int i = 0; i < size; ++i) { sendcounts[i]= len[rank]*len[i]; rdispls[i]=index; index=index+sendcounts[i]; } MPI_Alltoallv(sendbuf, sendcounts, rdispls, MPI_DOUBLE, recvbuf, sendcounts, rdispls, MPI_DOUBLE, MPI_COMM_WORLD); vectorToMatrix(b,recvbuf,len,disp, size, rank); }
int main(int argc, char *argv[]){ char *ruta_db, *ruta_queries; double **db, **queries, *db_vector, *queries_vector; int num_db, num_queries, dim, k, i, j; int num_threads, thread_num; Elem *answer; //variables para medir el tiempo struct rusage r1, r2; double user_time, sys_time, real_time; struct timeval t1, t2; if (argc != 8){ printf("Error :: Ejecutar como : main.out archivo_BD Num_elem archivo_queries Num_queries dim k nombre_usuario\n"); return 1; } ruta_db = (char *)malloc(sizeof(char)*(strlen(argv[1])+1)); strcpy(ruta_db, argv[1]); num_db = atoi(argv[2]); ruta_queries = (char *)malloc(sizeof(char)*(strlen(argv[3])+1)); strcpy(ruta_queries, argv[3]); num_queries = atoi(argv[4]); int dimaux=0,add=0; dim = atoi(argv[5]); k = atoi(argv[6]); char path[256]; sprintf(path, "/home/%s/Salida.txt",argv[7]); printf("%s\n",path ); int validamod=dim%16; if(validamod!=0){ if (dim<16){ dimaux=16; }else{ add = 16-validamod; dimaux=dim+add; } }else{ dimaux=0; } fflush(stdout); db= (double **)malloc(sizeof(double *)*num_db); for (i=0; i<num_db; i++) db[i] = (double *)malloc(sizeof(double)*dimaux); queries = (double **)malloc(sizeof(double *)*num_queries); for (i=0; i<num_queries; i++) queries[i] = (double *)malloc(sizeof(double)*dimaux); answer = (Elem *)malloc(sizeof(Elem)*num_queries*k); //scan DB scanFile(ruta_db, db, dim, num_db,dimaux); scanFile(ruta_queries, queries, dim, num_queries,dimaux); //Se transfieren datos de una matriz a un vector db_vector = (double *)_mm_malloc(sizeof(double)*dimaux*num_db, 64); for (i=0; i < dimaux*num_db; i++) db_vector[i] = 0.0; if (sizeof(double)*dimaux*num_queries < 64) { queries_vector = (double *)_mm_malloc(sizeof(double)*16, 64); for (i=0; i < 16; i++) queries_vector[i] = 0.0; } else { queries_vector = (double *)_mm_malloc(sizeof(double)*dimaux*num_queries, 64); for (i=0; i < dimaux*num_queries; i++) queries_vector[i] = 0.0; } matrixToVector(db, dimaux, num_db, db_vector); matrixToVector(queries, dimaux, num_queries, queries_vector); fflush(stdout); //inicio de la medida de tiempo getrusage(RUSAGE_SELF, &r1); gettimeofday(&t1, 0); #pragma offload target(mic:0) in(dim) in(db_vector:length(num_db*dimaux)) in(queries_vector:length(num_queries*dimaux)) out(answer:length(k*num_queries)) { #pragma omp parallel private(i, j, thread_num) shared(db_vector, num_db, queries_vector, num_queries, dimaux, k, answer, num_threads) { Elem *heap; heap = (Elem *)malloc(sizeof(Elem)*k); #pragma omp master { num_threads = omp_get_num_threads(); printf("run with %d threads\n", num_threads); } #pragma omp barrier thread_num = omp_get_thread_num(); int n_elem; Elem e_temp; double d; for(i=thread_num*dimaux; i<num_queries*dimaux; i+=num_threads*dimaux){ n_elem = 0; for(j=0; j<k; j++){ e_temp.dist = distancia(&(queries_vector[i]), &(db_vector[j*dimaux]), dimaux); e_temp.ind = j; inserta2(heap, &e_temp, &n_elem); } for(j=k; j<num_db; j++){ d = distancia(&(queries_vector[i]), &(db_vector[j*dimaux]), dimaux); if(d < topH(heap, &n_elem)) { e_temp.dist = d; e_temp.ind = j; popush2(heap, &n_elem, &e_temp); } } for(j=0; j<k; j++){ extrae2(heap, &n_elem, &e_temp); answer[(i/dimaux)*k+j].ind = e_temp.ind; answer[(i/dimaux)*k+j].dist = e_temp.dist; } } free(heap); } } gettimeofday(&t2, 0); getrusage(RUSAGE_SELF, &r2); Salida = fopen(path, "w"); for (i = 0; i < num_queries; ++i){ fprintf(Salida, "Consulta id:: %d\n",i); for (j = 0; j < k; ++j) { fprintf(Salida,"ind = %d :: dist = %f\n",answer[(i*k)+j].ind,answer[(i*k)+j].dist); } fprintf(Salida, "---------------------------------\n"); } fclose(Salida); user_time = (r2.ru_utime.tv_sec - r1.ru_utime.tv_sec) + (r2.ru_utime.tv_usec - r1.ru_utime.tv_usec)/1000000.0; sys_time = (r2.ru_stime.tv_sec - r1.ru_stime.tv_sec) + (r2.ru_stime.tv_usec - r1.ru_stime.tv_usec)/1000000.0; real_time = (t2.tv_sec - t1.tv_sec) + (double)(t2.tv_usec - t1.tv_usec)/1000000; printf("\nCPU Time = %lf", sys_time + user_time); printf("\nReal Time = %lf\n", real_time); return 0; }