arma::vec DIIS::get_w() { // DIIS error arma::mat de=get_diis_error(); double err=arma::max(arma::abs(de.col(de.n_cols-1))); // Weight arma::vec w; if(useadiis && !usediis) { w=get_w_adiis(); if(verbose) { printf("ADIIS weights\n"); print_mat(w.t(),"% .2e "); } } else if(!useadiis && usediis) { // Use DIIS only if error is smaller than threshold if(err>diisthr) throw std::runtime_error("DIIS error too large.\n"); w=get_w_diis(); if(verbose) { printf("DIIS weights\n"); print_mat(w.t(),"% .2e "); } } else if(useadiis && usediis) { // Sliding scale double diisw=std::max(std::min(1.0 - (err-diisthr)/(diiseps-diisthr), 1.0), 0.0); // Determine cooloff if(cooloff>0) { diisw=0.0; cooloff--; } else { // Check if energy has increased arma::vec E=get_energies(); if(E.n_elem>1 && E(E.n_elem-1)-E(E.n_elem-2) > COOLTHR) { cooloff=2; diisw=0.0; } } arma::vec wa=get_w_adiis(); arma::vec wd=get_w_diis(); w=diisw*wd + (1.0-diisw)*wa; if(verbose) { printf("ADIIS weights\n"); print_mat(wa.t(),"% .2e "); printf("CDIIS weights\n"); print_mat(wd.t(),"% .2e "); printf(" DIIS weights\n"); print_mat(w.t(),"% .2e "); } } else throw std::runtime_error("Nor DIIS or ADIIS has been turned on.\n"); return w; }
int main(int argc, char *argv[]) { // export OMP_NUM_THREADS=1 float **A, **B, **C; // matrices int d1, d2, d3; // dimensions of matrices int i, j, k; // loop variables double start, end; start = omp_get_wtime(); /* print user instruction */ if (argc != 4) { printf ("Matrix multiplication: C = A x B\n"); printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); return 0; } /* read user input */ d1 = atoi(argv[1]); // rows of A and C d2 = atoi(argv[2]); // cols of A and rows of B d3 = atoi(argv[3]); // cols of B and C printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3); /* prepare matrices */ A = alloc_mat(d1, d2); init_mat(A, d1, d2); B = alloc_mat(d2, d3); init_mat(B, d2, d3); C = alloc_mat(d1, d3); // no initialisation of C, because it gets filled by matmult /* serial version of matmult */ printf("Perform matrix multiplication...\n"); /* spezielle collapse-Schleife, um über ein mehrdimensionales Array zu iterieren Schleifen müssen sehr einfach gehalten sein, damit Parallelisierung erfolgen kann Alle Schleifenvariablen müssen völlig unabhängig voneinander sein. Sind es aber nicht, die Ausgabe war Fehlerhaft. */ double sum; // #pragma omp parallel for collapse(3) schedule(dynamik) for (i = 0; i < d1; i++) for (j = 0; j < d3; j++) #pragma omp parallel for private(sum)// Rechenintensive Operation wird parallelisiert. for (k = 0; k < d2; k++) { // Nur hier darf beliebiger Code stehen! wenn collaps verwendet würde sum = A[i][k] * B[k][j]; #pragma omp atomic C[i][j] += sum; } /* test output */ print_mat(A, d1, d2, "A"); print_mat(B, d2, d3, "B"); print_mat(C, d1, d3, "C"); printf ("\nDone.\n"); end = omp_get_wtime(); printf("This task took %f seconds\n", end-start); return 0; }
void init_model(char* output_dir, int num_words, int num_labels, int num_topics){ char log_theta_file[1000]; char log_phi_file[1000]; char pi_file[1000]; sprintf(log_theta_file, "%s/init.theta", output_dir); sprintf(log_phi_file, "%s/init.phi", output_dir); sprintf(pi_file, "%s/init.pi", output_dir); double* log_theta = (double*) calloc(num_labels * num_topics, sizeof(double)); double* log_phi = (double*) calloc(num_topics * num_words, sizeof(double)); double* pi = (double*) calloc(num_labels, sizeof(double)); for (int i = 0; i < num_labels; i++) { pi[i] = myrandom() * 0.5 + 1; double temp = 0; for (int k = 0; k < num_topics; k++) { double v = myrandom(); temp += v; log_theta[i * num_topics + k] = v; } for (int k = 0; k < num_topics; k++)log_theta[i*num_topics + k] = log(log_theta[i*num_topics + k] / temp); } for (int k = 0; k < num_topics; k++) { for (int i = 0; i < num_words; i++)log_phi[k*num_words + i] = log(1.0/num_words); } print_mat(log_theta, num_labels, num_topics, log_theta_file); print_mat(log_phi, num_topics, num_words, log_phi_file); print_mat(pi, num_labels, 1, pi_file); char info_file[1000]; sprintf(info_file, "%s/info.txt", output_dir); FILE* info_fp = fopen(info_file,"w"); fprintf(info_fp, "num_labels: %d\nnum_words: %d\nnum_topics: %d\n", num_labels, num_words, num_topics); fclose(info_fp); free(log_theta); free(pi); free(log_phi); }
int main(int argc, char** argv) { realtype **a = newDenseMat(NROWS, NCOLS); realtype **b = newDenseMat(NROWS, NCOLS); sundials_ml_index p[NROWS] = { 0.0 }; realtype s[NROWS] = { 5.0, 18.0, 6.0 }; int i, j; for (i=0; i < NROWS; ++i) { for (j=0; j < NCOLS; ++j) { a[j][i] = a_init[i][j]; } } printf("initially: a=\n"); print_mat(a, NROWS, NCOLS); printf("\n"); #if SUNDIALS_LIB_VERSION >= 260 { realtype x[NCOLS] = { 1.0, 2.0, 3.0 }; realtype y[NROWS] = { 0.0 }; printf("matvec: y=\n"); denseMatvec(a, x, y, NROWS, NCOLS); print_vec(y, NROWS); printf("\n"); } #endif denseCopy(a, b, NROWS, NCOLS); denseScale(2.0, b, NROWS, NCOLS); printf("scale copy x2: b=\n"); print_mat(b, NROWS, NCOLS); printf("\n"); denseAddIdentity(b, NROWS); printf("add identity: b=\n"); print_mat(b, NROWS, NCOLS); printf("\n"); denseGETRF(a, NROWS, NCOLS, p); printf("getrf: a=\n"); print_mat(a, NROWS, NCOLS); printf("\n p=\n"); print_pivots(p, NROWS); printf("\n"); denseGETRS(a, NROWS, p, s); printf("getrs: s=\n"); print_vec(s, NROWS); destroyMat(a); destroyMat(b); return 0; }
dosc_t *load_dosc(FILE *in) { dosc_t *dosc = (dosc_t *)malloc(sizeof(dosc_t)); dosc->tcpsyn_nbc = create_nbc(2, 1); dosc->udp_nbc = create_nbc(2, 1); if(in) { load_nbc(in, dosc->tcpsyn_nbc); load_nbc(in, dosc->udp_nbc); printf("TCP SYN Trained:\n"); print_mat(dosc->tcpsyn_nbc->training); printf("UDP Trained:\n"); print_mat(dosc->udp_nbc->training); } return dosc; }
int main() { int num_items; int* items; int i,k; int** combs; printf("How many items do you have: "); scanf("%d", &num_items); items = (int*) malloc(num_items * sizeof(int)); printf("Enter your items: "); for(i = 0; i < num_items; i++) { scanf("%d", &items[i]); } printf("Enter k: "); scanf("%d", &k); combs = get_combs(items, k, num_items); print_mat(combs,num_combs(num_items, k) ,k); free(items); free_mat(combs,num_combs(num_items, k), k); return 0; }
void main() { int mat[3]; int i,j,k,l,col1,found=0; for(i=0;i<n;i++) for(j=0;j<n;j++) for(k=0;k<n;k++) { if(list[i]!=list[j] && list[i]!=list[k] && list[j]!=list[k])//al distinct not a!=b && !=c this does not mean b!=c ..it may be b=c { col1=(list[i]/100)*100+(list[j]/100)*10+(list[k]/100);// check for 1st colum if exists then only send check if(search(list,col1)) { mat[0]=list[i]; mat[1]=list[j]; mat[2]=list[k]; if(check_mat(mat)!=0) { printf("matrix exists..\n"); print_mat(mat);found=1; } } } } if(!found) { printf("matrix does NOT exists... .\n"); } }
int main (int argc, char **argv) { //~ char *host = argv[1]; char *host = "127.0.0.1"; enum clnt_stat stat; mat2 res; mats2 donnees; fill_entiers2(&donnees.m1.l1, 1, 2); fill_entiers2(&donnees.m1.l2, 3, 4); fill_entiers2(&donnees.m2.l1, 1, 2); fill_entiers2(&donnees.m2.l2, 3, 4); int procnum; if (argv[1][0]=='a') procnum = APROCNUM; if (argv[1][0]=='m') procnum = MPROCNUM; stat = callrpc(/* host */ host, /* prognum */ PROGNUM, /* versnum */ VERSNUM, /* procnum */ procnum, /* encodage argument */ (xdrproc_t) xdr_mats2, /* argument */ (char *)&donnees, /* decodage retour */ (xdrproc_t)xdr_mat2, /* retour de la fonction distante */(char *)&res); if (stat != RPC_SUCCESS) { fprintf(stderr, "Echec de l'appel distant\n"); clnt_perrno(stat); fprintf(stderr, "\n"); } else { print_mat(&donnees.m1); printf("\n\n"); print_mat(&donnees.m2); printf("\n\n"); print_mat(&res); printf("\n"); } return(0); }
void rot(t_mat ret, const float alpha, const float beta, const float gamma) { t_mat tmp; t_mat matalpha; t_mat matbeta; t_mat matgamma; ft_bzero(tmp, MAT_SIZE); ft_bzero(ret, MAT_SIZE); rotalpha(alpha, matalpha); rotbeta(beta, matbeta); rotgamma(gamma, matgamma); print_mat(matalpha); dot_mat(matalpha, matbeta, tmp); print_mat(tmp); dot_mat(tmp, matgamma, ret); print_mat(ret); }
int main() { int m[MAX_SIZE][MAX_SIZE]; // creat matrix int i=0,j=0; // counter for initializing for(i=0;i<MAX_SIZE;i++) // initiliaze the matrix for(j=0;j<MAX_SIZE;j++) m[i][j]=0; fill(m,9); //test for size 9 print_mat(m,9); fill(m,10); //test for size 10 print_mat(m,10); return 0; }
int main(int argc, char *argv[]) { float **A, **B, **C; // matrices int d1, d2, d3; // dimensions of matrices int i, j, k; // loop variables /* print user instruction */ if (argc != 4) { printf ("Matrix multiplication: C = A x B\n"); printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); return 0; } /* read user input */ d1 = atoi(argv[1]); // rows of A and C d2 = atoi(argv[2]); // cols of A and rows of B d3 = atoi(argv[3]); // cols of B and C printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3); /* prepare matrices */ A = alloc_mat(d1, d2); init_mat(A, d1, d2); B = alloc_mat(d2, d3); init_mat(B, d2, d3); C = alloc_mat(d1, d3); // no initialisation of C, because it gets filled by matmult /* serial version of matmult */ printf("Perform matrix multiplication...\n"); for (i = 0; i < d1; i++) for (j = 0; j < d3; j++) for (k = 0; k < d2; k++) C[i][j] += A[i][k] * B[k][j]; /* test output */ print_mat(A, d1, d2, "A"); print_mat(B, d2, d3, "B"); print_mat(C, d1, d3, "C"); printf ("\nDone.\n"); return 0; }
int main() { std::vector<char> v(20, 'a'); // create std vector of 20 chars and initialize all with the value 'a' print_row(v); std::cout << std::endl; // create a 20 x 15 matrix: // make a std vector of 20 standard vectors, each initialized to be a standard vector of size 15 std::vector< std::vector<int> > M(20, std::vector<int>(15)); // be sure to put spaces between >s print_mat(M); }
int main(){ int m,n,i; float **mat; printf("\nType the size of the matrix: "); scanf("%d %d", &m,&n); mat = create(m,n); mat = fill(m,n,mat); print_mat(m,n,mat); }
/** * USAGE: ./a.out 10 **/ int main(int argc, char *argv[]){ if(argc<2) return 1; srand(time(0)); int n=atoi(argv[1]); int **arr=malloc(n*sizeof(int*)); for(int i=0;i<n;i++) arr[i]=malloc(n*sizeof(int)); for(int i=0;i<n;i++) for(int j=0;j<n;j++) arr[i][j]=rand()%100; printf("\nInput=\n"); print_mat(arr,n); rotate_90_degree(arr,n,0); printf("\nOutput=\n"); print_mat(arr,n); for(int i=0;i<n;i++) free(arr[i]); free(arr); return 0; }
int main(int argc, char** argv) { int i, j, k = 1; parse_opt( argc, argv ); for( i = 0; i < NDIM; i++ ) { for( j = 0; j < NDIM; j++ ) { a[i][j] = k; b[i][j] = k; k++; } } timer_start(1); mat_mul( c, a, b ); timer_stop(1); printf("Time elapsed : %lf sec\n", timer_read(1)); if( validation ) check_mat_mul( c, a, b ); if( print_matrix ) { printf("MATRIX A: \n"); print_mat(a); printf("MATRIX B: \n"); print_mat(b); printf("MATRIX C: \n"); print_mat(c); } return 0; }
main (){ int i, j, k, l, Ne, tr; double **data, **Covar, **Corr, **Star, **Cov_Ext, **Corr_Ext; FILE *Dados; Dados = fopen("dados.txt", "r"); fscanf(Dados, "%d", &Ne); // numero de observaçoes fscanf(Dados, "%d", &tr); // numero de dimensoes dos dados data = (double **) malloc (Ne*sizeof(double)); for ( j = 0; j < Ne; j++) data[j] = (double *) malloc (tr*sizeof(double)); Covar = (double **) malloc (tr*sizeof(double)); for ( j = 0; j < tr; j++) Covar[j] = (double *) malloc (tr*sizeof(double)); Corr = (double **) malloc (tr*sizeof(double)); for ( j = 0; j < tr; j++) Corr[j] = (double *) malloc (tr*sizeof(double)); Star = (double **) malloc (tr*sizeof(double)); for ( j = 0; j < tr; j++) Star[j] = (double *) malloc (tr*sizeof(double)); for ( k = 0; k < Ne; k++) for (j = 0; j < tr; j++) fscanf(Dados, "%lf", &data[k][j]); mat_cov (data, Ne, tr, Covar, Corr, Star); print_mat(Covar, "Covar.csv", tr, tr); print_mat(Corr, "Corr.csv", tr, tr); print_mat(Star, "Star.csv", tr, tr); }
int exec_cmd_mat(char *t,int l_t) { node n,n1; trunc_str(&t,&l_t); store_fn(t,l_t,&n); n.method->eval_n(&n,&n1); if(n1.node_type==ar_mat) { printf("\n-----------------------------------------------\n"); print_mat(n1.d.mat_ptr); printf("\n-----------------------------------------------\n"); } return 0; }
int main() { int m[ MAX_SIZE][ MAX_SIZE],m2[ MAX_SIZE][ MAX_SIZE]; // creat matrix int i=0,j=0; // counter for initializing for( i = 0 ; i < MAX_SIZE ; i++) // initiliaze the matrix for( j = 0 ; j < MAX_SIZE ; j++) { m[ i ][ j ] = 0; m2[ i][ j ] = 0; } int val=0,val2=0; //test for size 9 fill(m,9,0,&val); print_mat(m,9); //test for size 10 fill(m2,10,0,&val2); print_mat(m2,10); return 0; }
main () { int i, j, scale, gcd, C[N][N], S[N][N], Madj[N][N], Tadj[N][N], Mdet, Tdet; Tdet = adjoint (T, Tadj); /* inverse without division by */ Mdet = adjoint (M, Madj); /* determinant of T and M */ matmult (Madj, Tadj, C); matmult (C, M, S); /* Madj*Tadj*M -> S */ scale = gcd = Mdet * Tdet; /* scale factors of both determinants */ for (i = 0; i < N; i++) /* find the greatest common */ { /* denominator of S and determinants */ for (j = 0; j < N; j++) gcd = Gcd (gcd, S[i][j]); } scale /= gcd; /* divide everything by gcd to get */ for (i = 0; i < N; i++) /* matrix and scale factor in lowest */ { /* integer terms possible */ for (j = 0; j < N; j++) S[i][j] /= gcd; } printf ("scale factor = 1/%d ", scale); print_mat ("M=", M, N); /* display the results */ print_mat ("T=", T, N); print_mat ("S=", S, N); /* subdivision matrix */ exit (0); }
void train_udp_nbc(nbc_t *nbc, FILE *in_normal, FILE *in_attack) { list_t *list = create_list(); mat_t *mat; //printf("TRAINING NORMAL\n"); load_udpp_file(in_normal, list); mat = create_udp_mat(list); destroy_list_nodes(list); destroy_list(&list); //print_mat(mat); nbc_train(nbc, 0, mat); destroy_mat(&mat); //printf("TRAINED NORMAL\n"); //printf("TRAINING ATTACK\n"); list = create_list(); load_udpp_file(in_attack, list); mat = create_udp_mat(list); destroy_list_nodes(list); destroy_list(&list); //print_mat(mat); nbc_train(nbc, 1, mat); destroy_mat(&mat); printf("UDP Trained:\n"); print_mat(nbc->training); }
/** **/ int main (int argc, char* argv[]) { int WORK_DIM = 2; // Wie viele Dimensionen hat der Indexraum? std::chrono::time_point<std::chrono::system_clock> s_start, s_end, p_start, p_end; // Lese den Kernel dynamisch ein: (uebernommen von Foliensatz 9, Folie 20) FILE *fp; const char *FileName = "matmult.cl"; char *KernelSource; fp = fopen(FileName, "r"); if (!fp) { printf("Can't open kernel source: %s", FileName); exit(1); } KernelSource = (char *)malloc(MAX_SOURCE_SIZE); size_t kernel_s_size = fread(KernelSource, 1, MAX_SOURCE_SIZE, fp); fclose(fp); cl_int err; cl_platform_id* platforms = NULL; char platform_name[1024]; cl_device_id device_id = NULL; cl_uint num_of_platforms = 0, num_of_devices = 0; cl_context context; cl_kernel kernel; cl_command_queue command_queue; cl_program program; err = clGetPlatformIDs(0, NULL, &num_of_platforms); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } // Liefert Plattformen platforms = (cl_platform_id *)malloc(num_of_platforms); err = clGetPlatformIDs(num_of_platforms, platforms, NULL); if (err != CL_SUCCESS) { printf("No platforms found. Error: %d\n", err); return 0; } else { int nvidia_platform = 0; // Speichert den Rang der letzten NVIDIA-Plattform for (unsigned int i=0; i<num_of_platforms; i++) // Fuer jede Plattform: { clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL); if (err != CL_SUCCESS) { printf("Could not get information about platform. Error: %d\n", err); return 0; } if (strstr(platform_name, "NVIDIA") != NULL) { // Falls die Plattform eine NVIDIA-Plattform ist: Speichere ihren Rang nvidia_platform = i; break; } } // Gibt die ID des Devices der NVIDIA-Plattform zurueck err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices); if (err != CL_SUCCESS) { printf("Could not get device in platform. Error: %d\n", err); return 0; } } // Erschaffe einen OpenCl-context, in dem OpenCl-Datenobjekte verwaltet werden koennen context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (err != CL_SUCCESS) { printf("Unable to create context. Error: %d\n", err); return 0; } // Initialisiere eine Befehlswarteschleife, die Befehle fuer OpenCl-Objekte speichern kann command_queue = clCreateCommandQueue(context, device_id, 0, &err); if (err != CL_SUCCESS) { printf("Unable to create command queue. Error: %d\n", err); return 0; } // Initialisiere ein Programm und spezifiziere, aus welchem Code dieses kompiliert werden soll program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, (const size_t *)& kernel_s_size, &err); if (err != CL_SUCCESS) { printf("Unable to create program. Error: %d\n", err); return 0; } // Kompiliere das Programm zur Laufzeit err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { // Zeige Compilermeldungen an: (uebernommen von Foliensatz 9, Folie 23) char *log; size_t size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); log = (char *)malloc(size+1); if (log) { clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); log[size] = '\0'; printf("%s", log); free(log); } printf("Error building program. Error: %d\n", err); return 0; } // Erschaffe einen Kernel und lade oben kompiliertes Programm ein kernel = clCreateKernel(program, "matmult", &err); if (err != CL_SUCCESS) { printf("Error setting kernel. Error: %d\n", err); return 0; } float **A, **B, **C; // Matrizen int dim1, dim2, dim3; // Matrixdimensionen dim1 = D1; // Zeilen von A, Zeilen von C dim2 = D2; // Spalten von A, Zeilen von B dim3 = D3; // Spalten von B, Spalten von C A = alloc_mat(dim1, dim2); B = alloc_mat(dim2, dim3); C = alloc_mat(dim1, dim3); init_mat(A, dim1, dim2); init_mat(B, dim2, dim3); cl_mem in_A, in_B, output; // float data[DATA_SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; size_t global[1] = {dim1*dim3}; // Dimensionen von C size_t global_two[2] = {dim1, dim3}; in_A = clCreateBuffer (context, CL_MEM_READ_ONLY, sizeof(float)*dim1*dim2, NULL, &err); in_B = clCreateBuffer (context, CL_MEM_READ_ONLY, sizeof(float)*dim2*dim3, NULL, &err); output = clCreateBuffer (context, CL_MEM_WRITE_ONLY, sizeof(float)*dim1*dim3, NULL, &err); clEnqueueWriteBuffer(command_queue, in_A, CL_TRUE, 0, sizeof(float)*dim1*dim2, *A, 0, NULL, NULL); clEnqueueWriteBuffer(command_queue, in_B, CL_TRUE, 0, sizeof(float)*dim2*dim3, *B, 0, NULL, NULL); clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_A); clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_B); clSetKernelArg(kernel, 2, sizeof(cl_mem), &output); // clSetKernelArg(kernel, 3, sizeof(int), &dim2); // clSetKernelArg(kernel, 4, sizeof(int), &dim3); clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); if (WORK_DIM == 2) { clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_two, NULL, 0, NULL, NULL); } // Zeitmessung fuer parallele Version p_start = std::chrono::system_clock::now(); err = clFinish(command_queue); p_end = std::chrono::system_clock::now(); std::chrono::duration<double> p_duration = p_end - p_start; if (err == CL_INVALID_COMMAND_QUEUE ) { printf("CL_INVALID_COMMAND_QUEUE: %d\n", err); return 0; } clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float)*dim1*dim3, *C, 0, NULL, NULL); // Ueberpruefe, ob serielle Version und parallele gleich sind: float **correct_matrix; correct_matrix = alloc_mat(dim1, dim3); s_start = std::chrono::system_clock::now(); // Zeitmessung fuer serielle Version correct_matrix = mult_mat(A, B, dim1, dim2, dim3); s_end = std::chrono::system_clock::now(); std::chrono::duration<double> s_duration = s_end - s_start; is_correct(C, correct_matrix, dim1, dim3); // Numerischer Korrektheitsbeweis print_mat(C, dim1, dim3, "C = "); print_mat(correct_matrix, dim1, dim3, "correct_matrix = "); // printf("Kernel execution time: %f\n", t_end-t_start); clReleaseMemObject(in_A); clReleaseMemObject(in_B); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); err = clReleaseCommandQueue(command_queue); //!! if (err != CL_SUCCESS) { printf("Error releasing command queue: %d\n", err); return 0; } clReleaseContext(context); printf("Dauer der seriellen Version: %.2f Millisekunden\n", s_duration.count() * 1000); printf("Dauer der parallelen Version: %.2f Millisekunden\n", p_duration.count() * 1000); printf("Erhaltenes Speed Up: %.2f \n", p_duration.count() / p_duration.count()); return 0; }
int main() { /** Step 1: Read the Coordinate Data * */ FILE *xyzfile; xyzfile = fopen("h2o_geom.txt", "r"); int natom; fscanf(xyzfile, "%d", &natom); Molecule mol(natom, 0); for (int i = 0; i < natom; i++) fscanf(xyzfile, "%lf %lf %lf %lf", &mol.zvals[i], &mol.geom[i][0], &mol.geom[i][1], &mol.geom[i][2]); fclose(xyzfile); /** Step 2: Read the Cartesian Hessian Data * */ FILE *hessfile; hessfile = fopen("h2o_hessian.txt", "r"); int hessatom; fscanf(hessfile, "%d", &hessatom); if (fabs(natom-hessatom) > 0) { printf("The number of atoms doesn't match.\n"); return -1; } double **H = new double* [3*natom]; for (int i = 0; i < 3*natom; i++) H[i] = new double[3*natom]; for (int i = 0; i < 3*natom; i++) for (int j = 0; j < natom; j++) fscanf(hessfile, "%lf %lf %lf", &H[i][3*j], &H[i][3*j+1], &H[i][3*j+2]); fclose(hessfile); printf("Hessian:\n"); print_mat(H, 3*natom, 3*natom); printf("\n"); /** Step 3: Mass-Weight the Hessian Matrix * Divide each element of the Hessian matrix by the product of square roots of the masses of the atoms associated with the given coordinates: * \vect{F}_{M}^{ij} = \frac{F_{ij}}{\sqrt{m_{i}m_{j}}} */ double **Hmw = new double* [3*natom]; for (int i = 0; i < 3*natom; i++) Hmw[i] = new double[3*natom]; double mi, mj, mimj; for (int i = 0; i < natom; i++) { for (int j = 0; j < natom; j++) { mi = masses[(int)mol.zvals[i]]; mj = masses[(int)mol.zvals[j]]; mimj = sqrt(mi*mj); Hmw[i*natom+0][j*natom+0] = H[i*natom+0][j*natom+0]/mimj; Hmw[i*natom+0][j*natom+1] = H[i*natom+0][j*natom+1]/mimj; Hmw[i*natom+0][j*natom+2] = H[i*natom+0][j*natom+2]/mimj; Hmw[i*natom+1][j*natom+0] = H[i*natom+1][j*natom+0]/mimj; Hmw[i*natom+1][j*natom+1] = H[i*natom+1][j*natom+1]/mimj; Hmw[i*natom+1][j*natom+2] = H[i*natom+1][j*natom+2]/mimj; Hmw[i*natom+2][j*natom+0] = H[i*natom+2][j*natom+0]/mimj; Hmw[i*natom+2][j*natom+1] = H[i*natom+2][j*natom+1]/mimj; Hmw[i*natom+2][j*natom+2] = H[i*natom+2][j*natom+2]/mimj; } } printf("Mass-weighted Hessian:\n"); print_mat(Hmw, 3*natom, 3*natom); printf("\n"); /** Step 4: Diagonalize the Mass-Weighted Hessian Matrix * Compute the eigenvalues of the mass-weighted Hessian: * \vect{F}^{M}\vect{L} = \vect{L}\vect{\Lambda} */ double *evals = new double[3*natom]; for (int i = 0; i < 3*natom; i++) evals[i] = 0.0; double **evecs = new double* [3*natom]; for (int i = 0; i < 3*natom; i++) evecs[i] = new double[3*natom]; diag(3*natom, 3*natom, Hmw, evals, false, evecs, 1e-19); for (int i = 0; i < 3*natom; i++) delete[] evecs[i]; delete[] evecs; printf("Mass-weighted Hessian eigenvalues:\n"); for (int i = 0; i < 3*natom; i++) printf("%12.10f\n", evals[i]); printf("\n"); /** Step 5: Compute the Harmonic Vibrational Frequencies * The vibrational frequencies are proportional to the square root of the eigenvalues of the mass-weighted Hessian: * \omega_{i} = \textrm{constant} \times \sqrt{\lambda_{i}} */ printf("Harmonic vibrational frequences [cm]^-1:\n"); for (int i = 0; i < 3*natom; i++) printf("%10.4f\n", sqrt(evals[i])*vib_constant); printf("\n"); /// Clean up after ourselves... for (int i = 0; i < 3*natom; i++) { delete[] H[i]; delete[] Hmw[i]; } delete[] H; delete[] Hmw; delete[] evals; return 0; }
int main(int argc, char** argv) { // ============================================= // vector init/add/sub/scale/normalize/dot/cross // ============================================= std::cout << "===========" << std::endl; std::cout << "vector init" << std::endl; std::cout << "===========" << std::endl << std::endl; { std::cout << "default c-tor:\t\t"; print_vec(glm::vec3()); std::cout << std::endl; std::cout << "arg c-tor (explicit):\t"; print_vec(glm::vec3(1, 2, 3)); std::cout << std::endl; std::cout << "arg c-tor (1 float):\t"; print_vec(glm::vec3(2)); std::cout << std::endl; std::cout << "arg c-tor (array):\t"; float arr[] = {1, 2, 3}; print_vec(glm::make_vec3(arr)); std::cout << std::endl; } std::cout << std::endl; std::cout << "==========" << std::endl; std::cout << "vector add" << std::endl; std::cout << "==========" << std::endl << std::endl; { glm::vec3 v1(1, 2, 3); glm::vec3 v2(4, 5, 6); std::cout << "v1:\t\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t\t"; print_vec(v2); std::cout << std::endl; std::cout << "v1+v2:\t\t"; print_vec(v1+v2); std::cout << std::endl; std::cout << "v1 += v2:\t"; v1 += v2; print_vec(v1); std::cout << std::endl; } std::cout << std::endl; std::cout << "==========" << std::endl; std::cout << "vector sub" << std::endl; std::cout << "==========" << std::endl << std::endl; { glm::vec3 v1(1, 2, 3); glm::vec3 v2(4, 5, 6); std::cout << "v1:\t\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t\t"; print_vec(v2); std::cout << std::endl; std::cout << "v1-v2:\t\t"; print_vec(v1-v2); std::cout << std::endl; std::cout << "v1 -= v2:\t"; v1 -= v2; print_vec(v1); std::cout << std::endl; } std::cout << std::endl; std::cout << "============" << std::endl; std::cout << "vector scale" << std::endl; std::cout << "============" << std::endl << std::endl; { glm::vec3 v(1, 2, 3); float k = 2; std::cout << "v:\t\t"; print_vec(v); std::cout << std::endl; std::cout << "k:\t\t" << k << std::endl; std::cout << "v*k:\t\tn/a"; //print_vec(v*k); std::cout << std::endl; std::cout << "v *= k:\t\t"; v *= k; print_vec(v); std::cout << std::endl; } std::cout << std::endl; std::cout << "================" << std::endl; std::cout << "vector normalize" << std::endl; std::cout << "================" << std::endl << std::endl; { glm::vec3 v(1, 2, 3); std::cout << "v:\t\t"; print_vec(v); std::cout << std::endl; std::cout << "normalize(v):\t"; print_vec(glm::normalize(v)); std::cout << std::endl; } std::cout << std::endl; std::cout << "==================" << std::endl; std::cout << "vector dot product" << std::endl; std::cout << "==================" << std::endl << std::endl; { glm::vec3 v1(1, 2, 3); glm::vec3 v2(4, 5, 6); std::cout << "v1:\t\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t\t"; print_vec(v2); std::cout << std::endl; std::cout << "v1*v2:\t\t"; print_vec(v1*v2); std::cout << std::endl; std::cout << "v1 *= v2:\t"; v1 *= v2; print_vec(v1); std::cout << std::endl; } std::cout << std::endl; std::cout << "====================" << std::endl; std::cout << "vector cross product" << std::endl; std::cout << "====================" << std::endl << std::endl; { glm::vec3 v1(1, 0, 0); glm::vec3 v2(0, 1, 0); std::cout << "v1:\t\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t\t"; print_vec(v2); std::cout << std::endl; std::cout << "cross(v1, v2):\t"; print_vec(glm::cross(v1, v2)); std::cout << std::endl; } std::cout << std::endl; // ======================================== // matrix init/transpose/invert/determinant // ======================================== std::cout << "===========" << std::endl; std::cout << "matrix init" << std::endl; std::cout << "===========" << std::endl << std::endl; { std::cout << "default c-tor:" << std::endl; print_mat(glm::mat4()); std::cout << std::endl; std::cout << "arg c-tor (explicit):" << std::endl; print_mat(glm::mat4( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); std::cout << std::endl; std::cout << "arg c-tor (1 float):" << std::endl; print_mat(glm::mat4(2)); std::cout << std::endl; std::cout << "arg c-tor (array):" << std::endl; float arr[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; print_mat(glm::make_mat4(arr)); } std::cout << std::endl; std::cout << "================" << std::endl; std::cout << "matrix transpose" << std::endl; std::cout << "================" << std::endl << std::endl; { glm::mat4 m; int n = 0; for(int i = 0; i<4; i++) { for(int j = 0; j<4; j++) m[i][j] = n++; } std::cout << "m:" << std::endl; print_mat(m); std::cout << std::endl; std::cout << "transpose(m):" << std::endl; print_mat(glm::transpose(m)); } std::cout << std::endl; std::cout << "==============" << std::endl; std::cout << "matrix inverse" << std::endl; std::cout << "==============" << std::endl << std::endl; { glm::mat4 m = glm::translate( glm::mat4(1), glm::vec3(10, 20, 30)); std::cout << "m (translate by [10, 20, 30]):" << std::endl; print_mat(m); std::cout << std::endl; std::cout << "inverse(m):" << std::endl; print_mat(glm::inverse(m)); } std::cout << std::endl; std::cout << "==================" << std::endl; std::cout << "matrix determinant" << std::endl; std::cout << "==================" << std::endl << std::endl; { glm::mat4 m = glm::translate( glm::mat4(2), glm::vec3(10, 20, 30)); std::cout << "m (scale by 2, translate by [10, 20, 30]):" << std::endl; print_mat(m); std::cout << std::endl; std::cout << "determinant(m):\t" << glm::determinant(m) << std::endl; } std::cout << std::endl; // vector matrix mult std::cout << "==================" << std::endl; std::cout << "vector-matrix mult" << std::endl; std::cout << "==================" << std::endl << std::endl; { glm::vec3 v(1, 2, 3); glm::mat4 m = glm::translate( glm::mat4(1), glm::vec3(10, 20, 30)); std::cout << "v:\t"; print_vec(v); std::cout << std::endl << std::endl; std::cout << "m (translate by [10, 20, 30]):" << std::endl; print_mat(m); std::cout << std::endl; std::cout << "m*v:\t"; print_vec(glm::vec3(m*glm::vec4(v, 1))); std::cout << std::endl; } std::cout << std::endl; std::cout << "==================" << std::endl; std::cout << "matrix-matrix mult" << std::endl; std::cout << "==================" << std::endl << std::endl; { glm::vec3 v(1, 2, 3); glm::mat4 m1 = glm::translate( glm::mat4(1), glm::vec3(10, 20, 30)); glm::mat4 m2 = glm::scale( glm::mat4(1), glm::vec3(2, 2, 2)); std::cout << "v:\t"; print_vec(v); std::cout << std::endl << std::endl; std::cout << "m1 (translate by [10, 20, 30]):" << std::endl; print_mat(m1); std::cout << std::endl; std::cout << "m2 (scale by 2):" << std::endl; print_mat(m2); std::cout << std::endl; std::cout << "m1*m2:" << std::endl; print_mat(m1*m2); std::cout << std::endl; std::cout << "m1*m2*v4:\t"; print_vec(glm::vec3(m1*m2*glm::vec4(v, 1))); std::cout << std::endl; } std::cout << std::endl; std::cout << "=============" << std::endl; std::cout << "vector rotate" << std::endl; std::cout << "=============" << std::endl << std::endl; { glm::vec3 v1(1, 0, 0); glm::vec3 v2(0, 1, 0); glm::vec3 v3(0, 0, 1); glm::mat4 m1 = glm::rotate( glm::mat4(1), 90.0f, glm::vec3(0.0f, 1.0f, 0.0f)); glm::mat4 m2 = glm::rotate( glm::mat4(1), 90.0f, glm::vec3(0.0f, 0.0f, 1.0f)); glm::mat4 m3 = glm::rotate( glm::mat4(1), 90.0f, glm::vec3(1.0f, 0.0f, 0.0f)); std::cout << "v1:\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t"; print_vec(v2); std::cout << std::endl; std::cout << "v3:\t"; print_vec(v3); std::cout << std::endl << std::endl; std::cout << "m1 (rotate +90 deg around y axis):" << std::endl; print_mat(m1); std::cout << std::endl; std::cout << "m2 (rotate +90 deg around z axis):" << std::endl; print_mat(m2); std::cout << std::endl; std::cout << "m3 (rotate +90 deg around x axis):" << std::endl; print_mat(m3); std::cout << std::endl; std::cout << "m1*v1:\t"; print_vec(glm::vec3(m1*glm::vec4(v1, 1))); std::cout << std::endl; std::cout << "m2*v2:\t"; print_vec(glm::vec3(m2*glm::vec4(v2, 1))); std::cout << std::endl; std::cout << "m3*v3:\t"; print_vec(glm::vec3(m3*glm::vec4(v3, 1))); std::cout << std::endl; } std::cout << std::endl; std::cout << "============" << std::endl; std::cout << "vector angle" << std::endl; std::cout << "============" << std::endl << std::endl; { glm::vec3 v1(1, 0, 0); glm::vec3 v2(0, 1, 0); glm::vec3 v3(-1, 0, 0); std::cout << "v1:\t"; print_vec(v1); std::cout << std::endl; std::cout << "v2:\t"; print_vec(v2); std::cout << std::endl; std::cout << "v3:\t"; print_vec(v3); std::cout << std::endl << std::endl; std::cout << "angle(v1, v2):\t" << glm::angle(v1, v2) << std::endl; std::cout << "angle(v2, v3):\t" << glm::angle(v2, v3) << std::endl; std::cout << "angle(v3, v1):\t" << glm::angle(v3, v1) << std::endl; } }
int main(int argc, char** argv) { int i; int j; pthread_mutex_init(&mp, NULL); /* Fill in matrix A and B with random numbers */ pthread_t tid[N][N]; int pos[N][N][2]; for(i = 0; i < N; i++) { for(j = 0; j < N; j++) { pos[i][j][0] = i; pos[i][j][1] = j; //Generate seed for each thread struct timeval tv; gettimeofday(&tv,NULL); seeds[i][j] = (unsigned int)tid + i * 10 + j * 13 + tv.tv_usec; pthread_create(&tid[i][j], NULL, write_mat, pos[i][j]); } } for(i = 0; i < N; i++) { for(j = 0; j < N; j++) { pthread_join(tid[i][j], NULL); } } printf("==== Matrix A ====\n"); print_mat(A); printf("\n"); printf("==== Matrix B ====\n"); print_mat(B); printf("\n"); /* Calculate C = A x B */ //TODO: Optimize: use DP //Too lazy to optimize, not sure how DP works in multithread pthread_t tids[N][N]; for(i = 0; i < N; i++) { for(j = 0; j < N; j++) { pos[i][j][0] = i; pos[i][j][1] = j; pthread_create(&tids[i][j], NULL, eval_mult, pos[i][j]); } } for(i = 0; i < N; i++) { for(j = 0; j < N; j++) { pthread_join(tids[i][j], NULL); } } printf("==== Matrix C ====\n"); print_mat(C); printf("\n"); /* Calculate the max row sum */ MAX_ROW_SUM = 0; pthread_t stid[N]; int row[N]; for(i = 0; i < N; i++) { row[i] = i; pthread_create(&stid[i], NULL, update_row_sum, &row[i]); } for(i = 0; i < N; i++) { pthread_join(stid[i], NULL); } printf("Max Sum: %d\n", MAX_ROW_SUM); pthread_mutex_destroy(&mp); return (EXIT_SUCCESS); }