예제 #1
0
arma::vec DIIS::get_w() {
  // DIIS error
  arma::mat de=get_diis_error();
  double err=arma::max(arma::abs(de.col(de.n_cols-1)));

  // Weight
  arma::vec w;
  
  if(useadiis && !usediis) {
    w=get_w_adiis();
    if(verbose) {
      printf("ADIIS weights\n");
      print_mat(w.t(),"% .2e ");
    }    
  } else if(!useadiis && usediis) {
    // Use DIIS only if error is smaller than threshold
    if(err>diisthr)
      throw std::runtime_error("DIIS error too large.\n");

    w=get_w_diis();

    if(verbose) {
      printf("DIIS weights\n");
      print_mat(w.t(),"% .2e ");
    }    
  } else if(useadiis && usediis) {
    // Sliding scale
    double diisw=std::max(std::min(1.0 - (err-diisthr)/(diiseps-diisthr), 1.0), 0.0);

    // Determine cooloff
    if(cooloff>0) {
      diisw=0.0;
      cooloff--;
    } else {
      // Check if energy has increased
      arma::vec E=get_energies();
      if(E.n_elem>1 &&  E(E.n_elem-1)-E(E.n_elem-2) > COOLTHR) {
	cooloff=2;
	diisw=0.0;
      }
    }

    arma::vec wa=get_w_adiis();
    arma::vec wd=get_w_diis();
    w=diisw*wd + (1.0-diisw)*wa;
    
    if(verbose) {
      printf("ADIIS weights\n");
      print_mat(wa.t(),"% .2e ");
      printf("CDIIS weights\n");
      print_mat(wd.t(),"% .2e ");
      printf(" DIIS weights\n");
      print_mat(w.t(),"% .2e ");
    }    
    
  } else
    throw std::runtime_error("Nor DIIS or ADIIS has been turned on.\n");
    
  return w;
}
예제 #2
0
int main(int argc, char *argv[])
{ // export OMP_NUM_THREADS=1
    float **A, **B, **C;	// matrices
    int d1, d2, d3;         // dimensions of matrices
    int i, j, k;			// loop variables
    double start, end;
    start = omp_get_wtime();

    /* print user instruction */
    if (argc != 4)
    {
        printf ("Matrix multiplication: C = A x B\n");
        printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
        return 0;
    }

    /* read user input */
    d1 = atoi(argv[1]);		// rows of A and C
    d2 = atoi(argv[2]);     // cols of A and rows of B
    d3 = atoi(argv[3]);     // cols of B and C

    printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

    /* prepare matrices */
    A = alloc_mat(d1, d2);
    init_mat(A, d1, d2); 
    B = alloc_mat(d2, d3);
    init_mat(B, d2, d3);
    C = alloc_mat(d1, d3);	// no initialisation of C, because it gets filled by matmult

    /* serial version of matmult */
    printf("Perform matrix multiplication...\n");
                      /* spezielle collapse-Schleife, um über ein mehrdimensionales Array zu iterieren
                         Schleifen müssen sehr einfach gehalten sein, damit Parallelisierung erfolgen kann
                         Alle Schleifenvariablen müssen völlig unabhängig voneinander sein. 
                         Sind es aber nicht, die Ausgabe war Fehlerhaft. */

    double sum;
    // #pragma omp parallel for collapse(3) schedule(dynamik)
    for (i = 0; i < d1; i++)
       for (j = 0; j < d3; j++)
          #pragma omp parallel for private(sum)// Rechenintensive Operation wird parallelisiert.
          for (k = 0; k < d2; k++)
            { // Nur hier darf beliebiger Code stehen! wenn collaps verwendet würde
             sum = A[i][k] * B[k][j];
             #pragma omp atomic
             C[i][j] += sum;
         }

    /* test output */
    print_mat(A, d1, d2, "A"); 
    print_mat(B, d2, d3, "B"); 
    print_mat(C, d1, d3, "C"); 

    printf ("\nDone.\n");

    end = omp_get_wtime();
    printf("This task took %f seconds\n", end-start);
    return 0;
}
예제 #3
0
void init_model(char* output_dir, int num_words, int num_labels, int num_topics){
    char log_theta_file[1000];
    char log_phi_file[1000];
    char pi_file[1000];
    sprintf(log_theta_file, "%s/init.theta", output_dir);
    sprintf(log_phi_file, "%s/init.phi", output_dir);
    sprintf(pi_file, "%s/init.pi", output_dir);
    double* log_theta = (double*) calloc(num_labels * num_topics, sizeof(double));
    double* log_phi = (double*) calloc(num_topics * num_words, sizeof(double));
    double* pi = (double*) calloc(num_labels, sizeof(double));
    for (int i = 0; i < num_labels; i++) {
        pi[i] = myrandom() * 0.5 + 1;
        double temp = 0;
        for (int k = 0; k < num_topics; k++) {
            double v = myrandom();
            temp += v;
            log_theta[i * num_topics + k] = v;
        }
        for (int k = 0; k < num_topics; k++)log_theta[i*num_topics + k] = log(log_theta[i*num_topics + k] / temp);
    }
    for (int k = 0; k < num_topics; k++) {
        for (int i = 0; i < num_words; i++)log_phi[k*num_words + i] = log(1.0/num_words);
    }
    print_mat(log_theta, num_labels, num_topics, log_theta_file);
    print_mat(log_phi, num_topics, num_words, log_phi_file);
    print_mat(pi, num_labels, 1, pi_file);
    char info_file[1000];
    sprintf(info_file, "%s/info.txt", output_dir);
    FILE* info_fp = fopen(info_file,"w");
    fprintf(info_fp, "num_labels: %d\nnum_words: %d\nnum_topics: %d\n", num_labels, num_words, num_topics);
    fclose(info_fp);
    free(log_theta);
    free(pi);
    free(log_phi);
}
예제 #4
0
int main(int argc, char** argv)
{
    realtype **a = newDenseMat(NROWS, NCOLS);
    realtype **b = newDenseMat(NROWS, NCOLS);
    sundials_ml_index p[NROWS] = { 0.0 };
    realtype s[NROWS] = { 5.0, 18.0, 6.0 };
    int i, j;

    for (i=0; i < NROWS; ++i) {
	for (j=0; j < NCOLS; ++j) {
	    a[j][i] = a_init[i][j];
	}
    }

    printf("initially: a=\n");
    print_mat(a, NROWS, NCOLS);
    printf("\n");

#if SUNDIALS_LIB_VERSION >= 260
    {
	realtype x[NCOLS] = { 1.0,  2.0, 3.0 };
	realtype y[NROWS] = { 0.0 };
	printf("matvec: y=\n");
	denseMatvec(a, x, y, NROWS, NCOLS);
	print_vec(y, NROWS);
	printf("\n");
    }
#endif

    denseCopy(a, b, NROWS, NCOLS);
    denseScale(2.0, b, NROWS, NCOLS);
    printf("scale copy x2: b=\n");
    print_mat(b, NROWS, NCOLS);
    printf("\n");

    denseAddIdentity(b, NROWS);
    printf("add identity: b=\n");
    print_mat(b, NROWS, NCOLS);
    printf("\n");

    denseGETRF(a, NROWS, NCOLS, p);
    printf("getrf: a=\n");
    print_mat(a, NROWS, NCOLS);
    printf("\n       p=\n");
    print_pivots(p, NROWS);
    printf("\n");

    denseGETRS(a, NROWS, p, s);
    printf("getrs: s=\n");
    print_vec(s, NROWS);

    destroyMat(a);
    destroyMat(b);

    return 0;
}
예제 #5
0
파일: dosc.c 프로젝트: rcorcs/netmonitor
dosc_t *load_dosc(FILE *in)
{
   dosc_t *dosc = (dosc_t *)malloc(sizeof(dosc_t));
   dosc->tcpsyn_nbc = create_nbc(2, 1);
   dosc->udp_nbc = create_nbc(2, 1);
   if(in) {
      load_nbc(in, dosc->tcpsyn_nbc);
      load_nbc(in, dosc->udp_nbc);
      printf("TCP SYN Trained:\n");
      print_mat(dosc->tcpsyn_nbc->training);
      printf("UDP Trained:\n");
      print_mat(dosc->udp_nbc->training);
   }
   return dosc;
}
예제 #6
0
파일: combs.c 프로젝트: skjena/Assemblyx86
int main() {
    int num_items;
    int* items;
    int i,k;
    int** combs;
    printf("How many items do you have: ");
    scanf("%d", &num_items);

    items = (int*) malloc(num_items * sizeof(int));

    printf("Enter your items: ");
    for(i = 0; i < num_items; i++) {
        scanf("%d", &items[i]);
    }

    printf("Enter k: ");
    scanf("%d", &k);

    combs = get_combs(items, k, num_items);
    print_mat(combs,num_combs(num_items, k) ,k);
    free(items);
    free_mat(combs,num_combs(num_items, k), k);

    return 0;
}
void main()
{
  int mat[3];
  int i,j,k,l,col1,found=0;
  for(i=0;i<n;i++)
    for(j=0;j<n;j++)
      for(k=0;k<n;k++)
        {
          if(list[i]!=list[j] && list[i]!=list[k] && list[j]!=list[k])//al distinct not a!=b && !=c this does not mean  b!=c ..it may be b=c
           {
               col1=(list[i]/100)*100+(list[j]/100)*10+(list[k]/100);// check for 1st colum if exists then only send check
               if(search(list,col1))
                  {
                      mat[0]=list[i];
                      mat[1]=list[j];
                      mat[2]=list[k];

                      if(check_mat(mat)!=0)
                      {
                         printf("matrix exists..\n");
                         print_mat(mat);found=1;
                      }
                  }


            }
        }
        if(!found)
        {
            printf("matrix does NOT exists... .\n");
        }
}
예제 #8
0
int main (int argc, char **argv) {
	//~ char *host = argv[1];
	char *host = "127.0.0.1";
	enum clnt_stat stat;

	mat2 res;
	mats2 donnees;

	fill_entiers2(&donnees.m1.l1, 1, 2);
	fill_entiers2(&donnees.m1.l2, 3, 4);

	fill_entiers2(&donnees.m2.l1, 1, 2);
	fill_entiers2(&donnees.m2.l2, 3, 4);

	int procnum;
	if (argv[1][0]=='a')
		procnum = APROCNUM;
	if (argv[1][0]=='m')
		procnum = MPROCNUM;

	stat = callrpc(/* host */ host,
		/* prognum */ PROGNUM,
		/* versnum */ VERSNUM,
		/* procnum */ procnum,
		/* encodage argument */ (xdrproc_t) xdr_mats2,
		/* argument */ (char *)&donnees,
		/* decodage retour */ (xdrproc_t)xdr_mat2,
		/* retour de la fonction distante */(char *)&res);

  if (stat != RPC_SUCCESS) 
  { 
	fprintf(stderr, "Echec de l'appel distant\n");
	clnt_perrno(stat);      fprintf(stderr, "\n");
  } 
  else
   {
	print_mat(&donnees.m1);
	printf("\n\n");
	print_mat(&donnees.m2);
	printf("\n\n");   

	print_mat(&res);
	printf("\n");
  }
  return(0);
}
예제 #9
0
파일: rot.c 프로젝트: ggila/raytracer
void		rot(t_mat ret, const float alpha, const float beta, const float gamma)
{
	t_mat	tmp;
	t_mat	matalpha;
	t_mat	matbeta;
	t_mat	matgamma;

	ft_bzero(tmp, MAT_SIZE);
	ft_bzero(ret, MAT_SIZE);
	rotalpha(alpha, matalpha);
	rotbeta(beta, matbeta);
	rotgamma(gamma, matgamma);
	print_mat(matalpha);
	dot_mat(matalpha, matbeta, tmp);
	print_mat(tmp);
	dot_mat(tmp, matgamma, ret);
	print_mat(ret);
}
예제 #10
0
int main()
{

	int m[MAX_SIZE][MAX_SIZE]; // creat matrix
	int i=0,j=0;               // counter for initializing
	
	for(i=0;i<MAX_SIZE;i++)    // initiliaze the matrix
		for(j=0;j<MAX_SIZE;j++)
	   		m[i][j]=0;
	
	fill(m,9);  //test for size 9
	print_mat(m,9);

	fill(m,10); //test for size 10
	print_mat(m,10);
       
	return 0;
}
예제 #11
0
int main(int argc, char *argv[])
{
	float **A, **B, **C;	// matrices
    int d1, d2, d3;         // dimensions of matrices
    int i, j, k;			// loop variables

    /* print user instruction */
    if (argc != 4)
    {
        printf ("Matrix multiplication: C = A x B\n");
        printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
        return 0;
    }

    /* read user input */
    d1 = atoi(argv[1]);		// rows of A and C
    d2 = atoi(argv[2]);     // cols of A and rows of B
    d3 = atoi(argv[3]);     // cols of B and C

    printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

    /* prepare matrices */
    A = alloc_mat(d1, d2);
    init_mat(A, d1, d2); 
    B = alloc_mat(d2, d3);
    init_mat(B, d2, d3);
    C = alloc_mat(d1, d3);	// no initialisation of C, because it gets filled by matmult

    /* serial version of matmult */
    printf("Perform matrix multiplication...\n");
    for (i = 0; i < d1; i++)
       for (j = 0; j < d3; j++)
          for (k = 0; k < d2; k++)
             C[i][j] += A[i][k] * B[k][j];

    /* test output */
    print_mat(A, d1, d2, "A"); 
    print_mat(B, d2, d3, "B"); 
    print_mat(C, d1, d3, "C"); 

    printf ("\nDone.\n");

    return 0;
}
예제 #12
0
int main()
{
  std::vector<char> v(20, 'a'); // create std vector of 20 chars and initialize all with the value 'a'
  print_row(v); std::cout << std::endl;

  // create a 20 x 15 matrix:
  // make a std vector of 20 standard vectors, each initialized to be a standard vector of size 15
  std::vector< std::vector<int> > M(20, std::vector<int>(15)); // be sure to put spaces between >s
  print_mat(M);
}
예제 #13
0
파일: main.c 프로젝트: flaviomb/matrix
int main(){
	int m,n,i;
	float **mat;
	printf("\nType the size of the matrix: ");
	scanf("%d %d", &m,&n);
	mat = create(m,n);
	mat = fill(m,n,mat);
	print_mat(m,n,mat);

}
/**
* USAGE: ./a.out 10
**/
int main(int argc, char *argv[]){
	if(argc<2) return 1;
	srand(time(0));
	int n=atoi(argv[1]);
	int **arr=malloc(n*sizeof(int*));
	for(int i=0;i<n;i++)
		arr[i]=malloc(n*sizeof(int));
	for(int i=0;i<n;i++)
		for(int j=0;j<n;j++)
			arr[i][j]=rand()%100;
	printf("\nInput=\n");
	print_mat(arr,n);
	rotate_90_degree(arr,n,0);
	printf("\nOutput=\n");
	print_mat(arr,n);
	for(int i=0;i<n;i++)
		free(arr[i]);
	free(arr);
	return 0;
}
예제 #15
0
int main(int argc, char** argv)
{
	int i, j, k = 1;

	parse_opt( argc, argv );

	for( i = 0; i < NDIM; i++ )
	{
		for( j = 0; j < NDIM; j++ )
		{
			a[i][j] = k;
			b[i][j] = k;
			k++;
		}
	}

	timer_start(1);
	mat_mul( c, a, b );
	timer_stop(1);

	printf("Time elapsed : %lf sec\n", timer_read(1));


	if( validation )
		check_mat_mul( c, a, b );

	if( print_matrix )
	{
		printf("MATRIX A: \n");
		print_mat(a);

		printf("MATRIX B: \n");
		print_mat(b);

		printf("MATRIX C: \n");
		print_mat(c);
	}

	return 0;
}
예제 #16
0
main (){
   int i, j, k, l, Ne, tr;
   double **data, **Covar, **Corr, **Star, **Cov_Ext, **Corr_Ext;

   FILE *Dados;

   Dados = fopen("dados.txt", "r");

   fscanf(Dados, "%d", &Ne); // numero de observaçoes
   fscanf(Dados, "%d", &tr); // numero de dimensoes dos dados

   data = (double **) malloc (Ne*sizeof(double));
   for ( j = 0; j < Ne; j++)
      data[j] = (double *) malloc (tr*sizeof(double));
   
   Covar = (double **) malloc (tr*sizeof(double));
   for ( j = 0; j < tr; j++)
      Covar[j] = (double *) malloc (tr*sizeof(double));
   
   Corr = (double **) malloc (tr*sizeof(double));
   for ( j = 0; j < tr; j++)
      Corr[j] = (double *) malloc (tr*sizeof(double));
   
   Star = (double **) malloc (tr*sizeof(double));
   for ( j = 0; j < tr; j++)
      Star[j] = (double *) malloc (tr*sizeof(double));
   
   
   for ( k = 0; k < Ne; k++)
      for (j = 0; j < tr; j++)
         fscanf(Dados, "%lf", &data[k][j]);
   
   mat_cov (data, Ne, tr, Covar, Corr, Star);

   print_mat(Covar, "Covar.csv", tr, tr);
   print_mat(Corr, "Corr.csv", tr, tr);
   print_mat(Star, "Star.csv", tr, tr);

}
예제 #17
0
int exec_cmd_mat(char *t,int l_t)
{
	node n,n1;
	trunc_str(&t,&l_t);
	store_fn(t,l_t,&n);
	n.method->eval_n(&n,&n1);
	if(n1.node_type==ar_mat)
	{
		printf("\n-----------------------------------------------\n");
			print_mat(n1.d.mat_ptr);
		printf("\n-----------------------------------------------\n");
	}
	return 0;
}
예제 #18
0
int main()
{

	int m[ MAX_SIZE][ MAX_SIZE],m2[ MAX_SIZE][ MAX_SIZE];   // creat matrix
	int i=0,j=0;            				// counter for initializing
	
	for( i = 0 ; i < MAX_SIZE ; i++)   			// initiliaze the matrix
		for( j = 0 ; j < MAX_SIZE ; j++)
	   	{	
			m[ i ][ j ] = 0;
			m2[ i][ j ] = 0;
		}
	int val=0,val2=0;
	
	//test for size 9
	fill(m,9,0,&val);  
	print_mat(m,9);
        //test for size 10
	fill(m2,10,0,&val2); 
	print_mat(m2,10);
       
	return 0;
}
예제 #19
0
main ()
{
    int     i,
            j,
            scale,
            gcd,
            C[N][N],
            S[N][N],
            Madj[N][N],
            Tadj[N][N],
            Mdet,
            Tdet;


    Tdet = adjoint (T, Tadj);   /* inverse without division by */
    Mdet = adjoint (M, Madj);   /* determinant of T and M */
    matmult (Madj, Tadj, C);
    matmult (C, M, S);		/* Madj*Tadj*M -> S */
    scale = gcd = Mdet * Tdet;  /* scale factors of both determinants */
    for (i = 0; i < N; i++)	/* find the greatest common */
    {				/* denominator of S and determinants */
		for (j = 0; j < N; j++)
	    	gcd = Gcd (gcd, S[i][j]);
    }
    scale /= gcd;		/* divide everything by gcd to get */
    for (i = 0; i < N; i++)	/* matrix and scale factor in lowest */
    {				/* integer terms possible */
		for (j = 0; j < N; j++)
	    	S[i][j] /= gcd;
    }
    printf ("scale factor = 1/%d  ", scale);
    print_mat ("M=", M, N);     /* display the results */
    print_mat ("T=", T, N);
    print_mat ("S=", S, N);     /* subdivision matrix */
    exit (0);
}
예제 #20
0
파일: dosc.c 프로젝트: rcorcs/netmonitor
void train_udp_nbc(nbc_t *nbc, FILE *in_normal, FILE *in_attack)
{
   list_t *list = create_list();
   mat_t *mat;

   //printf("TRAINING NORMAL\n");

   load_udpp_file(in_normal, list);

   mat = create_udp_mat(list);

   destroy_list_nodes(list);
   destroy_list(&list);

   //print_mat(mat);
   nbc_train(nbc, 0, mat);

   destroy_mat(&mat);

   //printf("TRAINED NORMAL\n");

   //printf("TRAINING ATTACK\n");

   list = create_list();
   load_udpp_file(in_attack, list);

   mat = create_udp_mat(list);

   destroy_list_nodes(list);
   destroy_list(&list);

   //print_mat(mat);
   nbc_train(nbc, 1, mat);

   destroy_mat(&mat);

   printf("UDP Trained:\n");
   print_mat(nbc->training);

}
예제 #21
0
/** **/
int main (int argc, char* argv[])
{
  int WORK_DIM = 2; // Wie viele Dimensionen hat der Indexraum?
  std::chrono::time_point<std::chrono::system_clock> s_start, s_end, p_start, p_end;


  // Lese den Kernel dynamisch ein: (uebernommen von Foliensatz 9, Folie 20)
  FILE *fp;
  const char *FileName = "matmult.cl";
  char *KernelSource;
  fp = fopen(FileName, "r");
  if (!fp) {
    printf("Can't open kernel source: %s", FileName); exit(1);
  }
  KernelSource = (char *)malloc(MAX_SOURCE_SIZE);
  size_t kernel_s_size = fread(KernelSource, 1, MAX_SOURCE_SIZE, fp);
  fclose(fp);

  cl_int            err;
  cl_platform_id*   platforms = NULL;
  char              platform_name[1024];
  cl_device_id      device_id = NULL;
  cl_uint           num_of_platforms = 0,
                    num_of_devices = 0;
  cl_context        context;
  cl_kernel         kernel;
  cl_command_queue  command_queue;
  cl_program        program;


  err = clGetPlatformIDs(0, NULL, &num_of_platforms);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  }

  // Liefert Plattformen
  platforms = (cl_platform_id *)malloc(num_of_platforms);
  err = clGetPlatformIDs(num_of_platforms, platforms, NULL);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  } else {
    int nvidia_platform = 0;  // Speichert den Rang der letzten NVIDIA-Plattform
    for (unsigned int i=0; i<num_of_platforms; i++) // Fuer jede Plattform:
    {
      clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
      if (err != CL_SUCCESS) {
        printf("Could not get information about platform. Error: %d\n", err);
        return 0;
      }

      if (strstr(platform_name, "NVIDIA") != NULL) { // Falls die Plattform eine NVIDIA-Plattform ist: Speichere ihren Rang
        nvidia_platform = i;
        break;
      }
    }
    // Gibt die ID des Devices der NVIDIA-Plattform zurueck
    err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices);
    if (err != CL_SUCCESS) {
      printf("Could not get device in platform. Error: %d\n", err);
      return 0;
    }
  }

  // Erschaffe einen OpenCl-context, in dem OpenCl-Datenobjekte verwaltet werden koennen
  context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create context. Error: %d\n", err);
    return 0;
  }

  // Initialisiere eine Befehlswarteschleife, die Befehle fuer OpenCl-Objekte speichern kann
  command_queue = clCreateCommandQueue(context, device_id, 0, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create command queue. Error: %d\n", err);
    return 0;
  }

  // Initialisiere ein Programm und spezifiziere, aus welchem Code dieses kompiliert werden soll
  program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, (const size_t *)& kernel_s_size, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create program. Error: %d\n", err);
    return 0;
  }

  // Kompiliere das Programm zur Laufzeit
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  if (err != CL_SUCCESS) {
    // Zeige Compilermeldungen an: (uebernommen von Foliensatz 9, Folie 23)
    char *log;
    size_t size;
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
    log = (char *)malloc(size+1);
    if (log) {
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
      log[size] = '\0';
      printf("%s", log);
      free(log);
    }

    printf("Error building program. Error: %d\n", err);
    return 0;
  }

  // Erschaffe einen Kernel und lade oben kompiliertes Programm ein
  kernel = clCreateKernel(program, "matmult", &err);
  if (err != CL_SUCCESS) {
    printf("Error setting kernel. Error: %d\n", err);
    return 0;
  }

  float **A, **B, **C; // Matrizen
  int dim1, dim2, dim3; // Matrixdimensionen
  dim1 = D1; // Zeilen von A, Zeilen von C
  dim2 = D2; // Spalten von A, Zeilen von B
  dim3 = D3; // Spalten von B, Spalten von C

  A = alloc_mat(dim1, dim2);
  B = alloc_mat(dim2, dim3);
  C = alloc_mat(dim1, dim3);

  init_mat(A, dim1, dim2);
  init_mat(B, dim2, dim3);

  cl_mem            in_A, in_B, output;
  // float             data[DATA_SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  size_t global[1] = {dim1*dim3}; // Dimensionen von C
  size_t global_two[2] = {dim1, dim3};

  in_A  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim1*dim2, NULL, &err);
  in_B  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim2*dim3, NULL, &err);
  output = clCreateBuffer (context, CL_MEM_WRITE_ONLY, sizeof(float)*dim1*dim3, NULL, &err);

  clEnqueueWriteBuffer(command_queue, in_A, CL_TRUE, 0, sizeof(float)*dim1*dim2, *A, 0, NULL, NULL);
  clEnqueueWriteBuffer(command_queue, in_B, CL_TRUE, 0, sizeof(float)*dim2*dim3, *B, 0, NULL, NULL);

  clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_A);
  clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_B);
  clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
  // clSetKernelArg(kernel, 3, sizeof(int), &dim2);
  // clSetKernelArg(kernel, 4, sizeof(int), &dim3);

  clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
  if (WORK_DIM == 2) {
    clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_two, NULL, 0, NULL, NULL);
  }

  // Zeitmessung fuer parallele Version
  p_start = std::chrono::system_clock::now();
  err = clFinish(command_queue);
  p_end = std::chrono::system_clock::now();
  std::chrono::duration<double> p_duration = p_end - p_start;


  if (err == CL_INVALID_COMMAND_QUEUE ) {
    printf("CL_INVALID_COMMAND_QUEUE: %d\n", err);
    return 0;
  }

  clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float)*dim1*dim3, *C, 0, NULL, NULL);

  // Ueberpruefe, ob serielle Version und parallele gleich sind:
  float **correct_matrix;
  correct_matrix = alloc_mat(dim1, dim3);

  s_start = std::chrono::system_clock::now(); // Zeitmessung fuer serielle Version
  correct_matrix = mult_mat(A, B, dim1, dim2, dim3);
  s_end = std::chrono::system_clock::now();
  std::chrono::duration<double> s_duration = s_end - s_start;

  is_correct(C, correct_matrix, dim1, dim3); // Numerischer Korrektheitsbeweis

  print_mat(C, dim1, dim3, "C = ");
  print_mat(correct_matrix, dim1, dim3, "correct_matrix = ");
  // printf("Kernel execution time: %f\n", t_end-t_start);

  clReleaseMemObject(in_A);
  clReleaseMemObject(in_B);
  clReleaseMemObject(output);
  clReleaseProgram(program);
  clReleaseKernel(kernel);
  err = clReleaseCommandQueue(command_queue); //!!
  if (err != CL_SUCCESS) {
    printf("Error releasing command queue: %d\n", err);
    return 0;
  }
  clReleaseContext(context);

  printf("Dauer der seriellen Version: %.2f Millisekunden\n", s_duration.count() * 1000);
  printf("Dauer der parallelen Version: %.2f Millisekunden\n", p_duration.count() * 1000);
  printf("Erhaltenes Speed Up: %.2f \n", p_duration.count() / p_duration.count());


  return 0;
}
예제 #22
0
int main()
{
  /** Step 1: Read the Coordinate Data
   *
   */
  FILE *xyzfile;
  xyzfile = fopen("h2o_geom.txt", "r");
  int natom;
  fscanf(xyzfile, "%d", &natom);
  Molecule mol(natom, 0);
  for (int i = 0; i < natom; i++)
    fscanf(xyzfile, "%lf %lf %lf %lf", &mol.zvals[i], &mol.geom[i][0], &mol.geom[i][1], &mol.geom[i][2]);
  fclose(xyzfile);

  /** Step 2: Read the Cartesian Hessian Data
   *
   */
  FILE *hessfile;
  hessfile = fopen("h2o_hessian.txt", "r");
  int hessatom;
  fscanf(hessfile, "%d", &hessatom);
  if (fabs(natom-hessatom) > 0) {
    printf("The number of atoms doesn't match.\n");
    return -1;
  }
  double **H = new double* [3*natom];
  for (int i = 0; i < 3*natom; i++)
    H[i] = new double[3*natom];
  for (int i = 0; i < 3*natom; i++)
    for (int j = 0; j < natom; j++)
      fscanf(hessfile, "%lf %lf %lf", &H[i][3*j], &H[i][3*j+1], &H[i][3*j+2]);
  fclose(hessfile);

  printf("Hessian:\n");
  print_mat(H, 3*natom, 3*natom);
  printf("\n");

  /** Step 3: Mass-Weight the Hessian Matrix
   * Divide each element of the Hessian matrix by the product of square roots of the masses of the atoms associated with the given coordinates:
   *  \vect{F}_{M}^{ij} = \frac{F_{ij}}{\sqrt{m_{i}m_{j}}}
   */
  double **Hmw = new double* [3*natom];
  for (int i = 0; i < 3*natom; i++)
    Hmw[i] = new double[3*natom];
  double mi, mj, mimj;
  for (int i = 0; i < natom; i++) {
    for (int j = 0; j < natom; j++) {
      mi = masses[(int)mol.zvals[i]]; mj = masses[(int)mol.zvals[j]];
      mimj = sqrt(mi*mj);
      Hmw[i*natom+0][j*natom+0] = H[i*natom+0][j*natom+0]/mimj;
      Hmw[i*natom+0][j*natom+1] = H[i*natom+0][j*natom+1]/mimj;
      Hmw[i*natom+0][j*natom+2] = H[i*natom+0][j*natom+2]/mimj;
      Hmw[i*natom+1][j*natom+0] = H[i*natom+1][j*natom+0]/mimj;
      Hmw[i*natom+1][j*natom+1] = H[i*natom+1][j*natom+1]/mimj;
      Hmw[i*natom+1][j*natom+2] = H[i*natom+1][j*natom+2]/mimj;
      Hmw[i*natom+2][j*natom+0] = H[i*natom+2][j*natom+0]/mimj;
      Hmw[i*natom+2][j*natom+1] = H[i*natom+2][j*natom+1]/mimj;
      Hmw[i*natom+2][j*natom+2] = H[i*natom+2][j*natom+2]/mimj;
    }
  }

  printf("Mass-weighted Hessian:\n");
  print_mat(Hmw, 3*natom, 3*natom);
  printf("\n");

  /** Step 4: Diagonalize the Mass-Weighted Hessian Matrix
   * Compute the eigenvalues of the mass-weighted Hessian:
   *  \vect{F}^{M}\vect{L} = \vect{L}\vect{\Lambda}
   */
  double *evals = new double[3*natom];
  for (int i = 0; i < 3*natom; i++) evals[i] = 0.0;
  double **evecs = new double* [3*natom];
  for (int i = 0; i < 3*natom; i++) evecs[i] = new double[3*natom];
  diag(3*natom, 3*natom, Hmw, evals, false, evecs, 1e-19);
  for (int i = 0; i < 3*natom; i++) delete[] evecs[i];
  delete[] evecs;

  printf("Mass-weighted Hessian eigenvalues:\n");
  for (int i = 0; i < 3*natom; i++)
    printf("%12.10f\n", evals[i]);
  printf("\n");

  /** Step 5: Compute the Harmonic Vibrational Frequencies
   * The vibrational frequencies are proportional to the square root of the eigenvalues of the mass-weighted Hessian:
   *  \omega_{i} = \textrm{constant} \times \sqrt{\lambda_{i}}
   */

  printf("Harmonic vibrational frequences [cm]^-1:\n");
  for (int i = 0; i < 3*natom; i++)
    printf("%10.4f\n", sqrt(evals[i])*vib_constant);
  printf("\n");

  /// Clean up after ourselves...
  for (int i = 0; i < 3*natom; i++) {
    delete[] H[i];
    delete[] Hmw[i];
  }
  delete[] H;
  delete[] Hmw;
  delete[] evals;

  return 0;
}
예제 #23
0
파일: basic.cpp 프로젝트: onlyuser/Sandbox
int main(int argc, char** argv)
{
    // =============================================
    // vector init/add/sub/scale/normalize/dot/cross
    // =============================================

    std::cout << "===========" << std::endl;
    std::cout << "vector init" << std::endl;
    std::cout << "===========" << std::endl << std::endl;

    {
        std::cout << "default c-tor:\t\t";
        print_vec(glm::vec3());
        std::cout << std::endl;

        std::cout << "arg c-tor (explicit):\t";
        print_vec(glm::vec3(1, 2, 3));
        std::cout << std::endl;

        std::cout << "arg c-tor (1 float):\t";
        print_vec(glm::vec3(2));
        std::cout << std::endl;

        std::cout << "arg c-tor (array):\t";
        float arr[] = {1, 2, 3};
        print_vec(glm::make_vec3(arr));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "==========" << std::endl;
    std::cout << "vector add" << std::endl;
    std::cout << "==========" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 2, 3);
        glm::vec3 v2(4, 5, 6);

        std::cout << "v1:\t\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t\t";
        print_vec(v2);
        std::cout << std::endl;

        std::cout << "v1+v2:\t\t";
        print_vec(v1+v2);
        std::cout << std::endl;

        std::cout << "v1 += v2:\t";
        v1 += v2;
        print_vec(v1);
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "==========" << std::endl;
    std::cout << "vector sub" << std::endl;
    std::cout << "==========" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 2, 3);
        glm::vec3 v2(4, 5, 6);

        std::cout << "v1:\t\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t\t";
        print_vec(v2);
        std::cout << std::endl;

        std::cout << "v1-v2:\t\t";
        print_vec(v1-v2);
        std::cout << std::endl;

        std::cout << "v1 -= v2:\t";
        v1 -= v2;
        print_vec(v1);
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "============" << std::endl;
    std::cout << "vector scale" << std::endl;
    std::cout << "============" << std::endl << std::endl;

    {
        glm::vec3 v(1, 2, 3);
        float k = 2;

        std::cout << "v:\t\t";
        print_vec(v);
        std::cout << std::endl;
        std::cout << "k:\t\t" << k << std::endl;

        std::cout << "v*k:\t\tn/a";
        //print_vec(v*k);
        std::cout << std::endl;

        std::cout << "v *= k:\t\t";
        v *= k;
        print_vec(v);
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "================" << std::endl;
    std::cout << "vector normalize" << std::endl;
    std::cout << "================" << std::endl << std::endl;

    {
        glm::vec3 v(1, 2, 3);

        std::cout << "v:\t\t";
        print_vec(v);
        std::cout << std::endl;

        std::cout << "normalize(v):\t";
        print_vec(glm::normalize(v));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "==================" << std::endl;
    std::cout << "vector dot product" << std::endl;
    std::cout << "==================" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 2, 3);
        glm::vec3 v2(4, 5, 6);

        std::cout << "v1:\t\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t\t";
        print_vec(v2);
        std::cout << std::endl;

        std::cout << "v1*v2:\t\t";
        print_vec(v1*v2);
        std::cout << std::endl;

        std::cout << "v1 *= v2:\t";
        v1 *= v2;
        print_vec(v1);
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "====================" << std::endl;
    std::cout << "vector cross product" << std::endl;
    std::cout << "====================" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 0, 0);
        glm::vec3 v2(0, 1, 0);

        std::cout << "v1:\t\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t\t";
        print_vec(v2);
        std::cout << std::endl;

        std::cout << "cross(v1, v2):\t";
        print_vec(glm::cross(v1, v2));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    // ========================================
    // matrix init/transpose/invert/determinant
    // ========================================

    std::cout << "===========" << std::endl;
    std::cout << "matrix init" << std::endl;
    std::cout << "===========" << std::endl << std::endl;

    {
        std::cout << "default c-tor:" << std::endl;
        print_mat(glm::mat4());
        std::cout << std::endl;

        std::cout << "arg c-tor (explicit):" << std::endl;
        print_mat(glm::mat4(
                1,   2,  3,  4,
                5,   6,  7,  8,
                9,  10, 11, 12,
                13, 14, 15, 16));
        std::cout << std::endl;

        std::cout << "arg c-tor (1 float):" << std::endl;
        print_mat(glm::mat4(2));
        std::cout << std::endl;

        std::cout << "arg c-tor (array):" << std::endl;
        float arr[] = {
                1,   2,  3,  4,
                5,   6,  7,  8,
                9,  10, 11, 12,
                13, 14, 15, 16};
        print_mat(glm::make_mat4(arr));
    }
    std::cout << std::endl;

    std::cout << "================" << std::endl;
    std::cout << "matrix transpose" << std::endl;
    std::cout << "================" << std::endl << std::endl;

    {
        glm::mat4 m;
        int n = 0;
        for(int i = 0; i<4; i++)
        {
            for(int j = 0; j<4; j++)
                m[i][j] = n++;
        }

        std::cout << "m:" << std::endl;
        print_mat(m);
        std::cout << std::endl;

        std::cout << "transpose(m):" << std::endl;
        print_mat(glm::transpose(m));
    }
    std::cout << std::endl;

    std::cout << "==============" << std::endl;
    std::cout << "matrix inverse" << std::endl;
    std::cout << "==============" << std::endl << std::endl;

    {
        glm::mat4 m = glm::translate(
                glm::mat4(1),
                glm::vec3(10, 20, 30));

        std::cout << "m (translate by [10, 20, 30]):" << std::endl;
        print_mat(m);
        std::cout << std::endl;

        std::cout << "inverse(m):" << std::endl;
        print_mat(glm::inverse(m));
    }
    std::cout << std::endl;

    std::cout << "==================" << std::endl;
    std::cout << "matrix determinant" << std::endl;
    std::cout << "==================" << std::endl << std::endl;

    {
        glm::mat4 m = glm::translate(
                glm::mat4(2),
                glm::vec3(10, 20, 30));

        std::cout << "m (scale by 2, translate by [10, 20, 30]):" << std::endl;
        print_mat(m);
        std::cout << std::endl;

        std::cout << "determinant(m):\t" << glm::determinant(m) << std::endl;
    }
    std::cout << std::endl;

    // vector matrix mult

    std::cout << "==================" << std::endl;
    std::cout << "vector-matrix mult" << std::endl;
    std::cout << "==================" << std::endl << std::endl;

    {
        glm::vec3 v(1, 2, 3);
        glm::mat4 m = glm::translate(
                glm::mat4(1),
                glm::vec3(10, 20, 30));

        std::cout << "v:\t";
        print_vec(v);
        std::cout << std::endl << std::endl;
        std::cout << "m (translate by [10, 20, 30]):" << std::endl;
        print_mat(m);
        std::cout << std::endl;

        std::cout << "m*v:\t";
        print_vec(glm::vec3(m*glm::vec4(v, 1)));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "==================" << std::endl;
    std::cout << "matrix-matrix mult" << std::endl;
    std::cout << "==================" << std::endl << std::endl;

    {
        glm::vec3 v(1, 2, 3);
        glm::mat4 m1 = glm::translate(
                glm::mat4(1),
                glm::vec3(10, 20, 30));
        glm::mat4 m2 = glm::scale(
                glm::mat4(1),
                glm::vec3(2, 2, 2));

        std::cout << "v:\t";
        print_vec(v);
        std::cout << std::endl << std::endl;
        std::cout << "m1 (translate by [10, 20, 30]):" << std::endl;
        print_mat(m1);
        std::cout << std::endl;
        std::cout << "m2 (scale by 2):" << std::endl;
        print_mat(m2);
        std::cout << std::endl;

        std::cout << "m1*m2:" << std::endl;
        print_mat(m1*m2);
        std::cout << std::endl;

        std::cout << "m1*m2*v4:\t";
        print_vec(glm::vec3(m1*m2*glm::vec4(v, 1)));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "=============" << std::endl;
    std::cout << "vector rotate" << std::endl;
    std::cout << "=============" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 0, 0);
        glm::vec3 v2(0, 1, 0);
        glm::vec3 v3(0, 0, 1);
        glm::mat4 m1 = glm::rotate(
                glm::mat4(1),
                90.0f,
                glm::vec3(0.0f, 1.0f, 0.0f));
        glm::mat4 m2 = glm::rotate(
                glm::mat4(1),
                90.0f,
                glm::vec3(0.0f, 0.0f, 1.0f));
        glm::mat4 m3 = glm::rotate(
                glm::mat4(1),
                90.0f,
                glm::vec3(1.0f, 0.0f, 0.0f));

        std::cout << "v1:\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t";
        print_vec(v2);
        std::cout << std::endl;
        std::cout << "v3:\t";
        print_vec(v3);
        std::cout << std::endl << std::endl;
        std::cout << "m1 (rotate +90 deg around y axis):" << std::endl;
        print_mat(m1);
        std::cout << std::endl;
        std::cout << "m2 (rotate +90 deg around z axis):" << std::endl;
        print_mat(m2);
        std::cout << std::endl;
        std::cout << "m3 (rotate +90 deg around x axis):" << std::endl;
        print_mat(m3);
        std::cout << std::endl;

        std::cout << "m1*v1:\t";
        print_vec(glm::vec3(m1*glm::vec4(v1, 1)));
        std::cout << std::endl;

        std::cout << "m2*v2:\t";
        print_vec(glm::vec3(m2*glm::vec4(v2, 1)));
        std::cout << std::endl;

        std::cout << "m3*v3:\t";
        print_vec(glm::vec3(m3*glm::vec4(v3, 1)));
        std::cout << std::endl;
    }
    std::cout << std::endl;

    std::cout << "============" << std::endl;
    std::cout << "vector angle" << std::endl;
    std::cout << "============" << std::endl << std::endl;

    {
        glm::vec3 v1(1, 0, 0);
        glm::vec3 v2(0, 1, 0);
        glm::vec3 v3(-1, 0, 0);

        std::cout << "v1:\t";
        print_vec(v1);
        std::cout << std::endl;
        std::cout << "v2:\t";
        print_vec(v2);
        std::cout << std::endl;
        std::cout << "v3:\t";
        print_vec(v3);
        std::cout << std::endl << std::endl;

        std::cout << "angle(v1, v2):\t" << glm::angle(v1, v2) << std::endl;
        std::cout << "angle(v2, v3):\t" << glm::angle(v2, v3) << std::endl;
        std::cout << "angle(v3, v1):\t" << glm::angle(v3, v1) << std::endl;
    }
}
예제 #24
0
파일: main.c 프로젝트: stpddream/OSHomework
int main(int argc, char** argv) {
    int i;
    int j;
    
    pthread_mutex_init(&mp, NULL);

    /* Fill in matrix A and B with random numbers */
    pthread_t tid[N][N];
    int pos[N][N][2]; 
        
    for(i = 0; i < N; i++) {
        for(j = 0; j < N; j++) {                               
            pos[i][j][0] = i;
            pos[i][j][1] = j;
            
            //Generate seed for each thread             
            struct timeval tv;            
            gettimeofday(&tv,NULL);
            seeds[i][j] = (unsigned int)tid + i * 10 + j * 13 + tv.tv_usec; 
            
            pthread_create(&tid[i][j], NULL, write_mat, pos[i][j]);            
        }
    }
        
    for(i = 0; i < N; i++) {
        for(j = 0; j < N; j++) {
            pthread_join(tid[i][j], NULL);
        }
    }
    
    printf("==== Matrix A ====\n");
    print_mat(A);
    printf("\n");
    printf("==== Matrix B ====\n");
    print_mat(B);
    printf("\n");

    
    /* Calculate C = A x B */    
    //TODO: Optimize: use DP 
    //Too lazy to optimize, not sure how DP works in multithread
    pthread_t tids[N][N];
    
    for(i = 0; i < N; i++) {
        for(j = 0; j < N; j++) {            
            pos[i][j][0] = i;
            pos[i][j][1] = j;       
            pthread_create(&tids[i][j], NULL, eval_mult, pos[i][j]);
        }
    }
    
    for(i = 0; i < N; i++) {
        for(j = 0; j < N; j++) {
            pthread_join(tids[i][j], NULL);
        }
    }

    printf("==== Matrix C ====\n");
    print_mat(C);
    printf("\n");

    
    /* Calculate the max row sum */
    MAX_ROW_SUM = 0;    
    pthread_t stid[N];
        
    int row[N];
    for(i = 0; i < N; i++) {
        row[i] = i;
        pthread_create(&stid[i], NULL, update_row_sum, &row[i]);
    }
    
    for(i = 0; i < N; i++) {
        pthread_join(stid[i], NULL);
    }
    
    printf("Max Sum: %d\n", MAX_ROW_SUM);
    pthread_mutex_destroy(&mp);
    
    return (EXIT_SUCCESS);
}