int main()
{
    double *A, *B, *C;
    int i,j,r,max_threads,size;
    double alpha, beta;
    double s_initial, s_elapsed;
    
    printf("Intializing data for matrix multiplication C=A*B for matrix\n\n"
            " A(%i*%i) and matrix B(%i*%i)\n",M,P,P,N);
    alpha = 1.0;
    beta = 0.0;

    printf("Allocating memory for matrices aligned on 64-byte boundary for better performance \n\n");
    A = ( double *)mkl_malloc(M*P*sizeof( double ),64);
    B = ( double *)mkl_malloc(N*P*sizeof( double ),64);
    C = ( double *)mkl_malloc(M*N*sizeof( double ),64);
    if (A == NULL || B == NULL || C == NULL)
    {
        printf("Error: can`t allocate memory for matrices.\n\n");
        mkl_free(A);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }

    printf("Intializing matrix data\n\n");
    size = M*P;
    for (i = 0; i < size; ++i)
    {
        A[i] = ( double )(i+1);
    }
    size = N*P;
    for (i = 0; i < size; ++i)
    {
        B[i] = ( double )(i-1);
    }

    printf("Finding max number of threads can use for parallel runs \n\n");
    max_threads = mkl_get_max_threads();

    printf("Running from 1 to %i threads \n\n",max_threads);
    for (i = 1; i <= max_threads; ++i)
    {
        size = M*N;
        for (j = 0; j < size; ++j)
        {
            C[j] = 0.0;
        }

	    printf("Requesting to use %i threads \n\n",i); 
	    mkl_set_num_threads(i);

	    printf("Measuring performance of matrix product using dgemm function\n"
		    " via CBLAS interface on %i threads \n\n",i);
	    s_initial = dsecnd();
	    for (r = 0; r < LOOP_COUNT; ++r)
	    {
    		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, P, alpha, A, P, B, N, beta, C, N);
            // multiply matrices with cblas_dgemm;
	    }
	    s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;

	    printf("Matrix multiplication using dgemm completed \n"
		    " at %.5f milliseconds using %d threads \n\n",
		    (s_elapsed * 1000),i);
        printf("Output the result: \n");
        size = M*N;
        for (i = 0; i < size; ++i)
        {
            printf("%i\t",(int)C[i]);
            if (i % N == N - 1)
                printf("\n");
        }
    }

    printf("Dellocating memory\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);

    return 0;
}
Example #2
0
void solve1()
{
	struct st_mesh *q=mshio_create_mesh(fin_base);
	//mshio_print_mesh(q,PRINT_INFO_VERBOSE);

	struct st_solver_v1 *s=sv1_create_solver(q,ipar,dpar);
	sv1_print_solverinfo(s);

	//double _Complex *b0=(double _Complex*)mkl_malloc(sizeof(double _Complex)*s->Ng,64);
	double _Complex *x0=(double _Complex*)mkl_malloc(sizeof(double _Complex)*s->Ng,64);
	double _Complex *b1=(double _Complex*)mkl_malloc(sizeof(double _Complex)*s->Ng,64);
	double _Complex *x1=(double _Complex*)mkl_malloc(sizeof(double _Complex)*s->Ng,64);
	//assert(b0);
	assert(x0);
	assert(b1);
	assert(x1);
	int nitr;
	double eps;

	//sv1_gen_b0(s,0.0,b0);
	//sv1_solve(s,b0,x0,200,12,1.0E-13,&nitr,&eps); 
	//sv1_save_solution(s,x0,output_dir); 

	sv1_gen_b1x0(s,0.0,b1,x0);
	sv1_solve(s,b1,x1,200,30,1.0E-13,&nitr,&eps); 
	sv1_save_solution(s,x1,output_dir); 

	//mkl_free(b0);
	mkl_free(x0);
	mkl_free(b1);
	mkl_free(x1);
	sv1_destroy_solver(s); 
	mshio_destroy_mesh(q);
}
Example #3
0
void mark_best_KernelPerceptronModel(KernelPerceptron kmodel, int numit) {
    kmodel->best_numit = numit;

    if (kmodel->best_alpha_avg != NULL) {
        mkl_free(kmodel->best_alpha_avg);
    }

    if (kmodel->best_kernel_matrix != NULL) {
        mkl_free(kmodel->best_kernel_matrix);
    }

    kmodel->best_alpha_avg = (float*) mkl_64bytes_malloc((kmodel->M) * sizeof (float));
    kmodel->best_kernel_matrix = (float*) mkl_64bytes_malloc((kmodel->N) * (kmodel->M) * sizeof (float));

    for (size_t i = 0; i < (kmodel->M); i++) {

        (kmodel->best_alpha_avg)[i] = (kmodel->alpha_avg)[i];

    }

    size_t mn = (kmodel->N) * (kmodel->M);
#pragma ivdep
#pragma loop_count min(102400)    
    for (size_t i = 0; i < mn; i++) {

        (kmodel->best_kernel_matrix)[i] = (kmodel->kernel_matrix)[i];

    }

    kmodel->best_m = kmodel->M;
}
void mexFunction(int nlhs, mxArray *plhs[],
	int nrhs, const mxArray *prhs[]){
	/*Declarar las variables locales*/
	mexPrintf("hios\n"); //Tarea1 termina aqui
	double *A, *B, determinante;
	int *pivot, info, Nfilas, Ncolumnas;
	/*Insertar el código */
	if (nrhs != 1){ // nº args diferente de 1
		mexErrMsgTxt("Error. myla, Debe tener un arg de entrada");
	}
	if (!mxIsNumeric(prhs[0])){
		mexErrMsgTxt("Error. El argumento de entrada debe ser una matriz");
	}
	Nfilas = mxGetM(prhs[0]);
	Ncolumnas = mxGetN(prhs[0]);
	if (Nfilas != Ncolumnas){
		mexErrMsgTxt("Error. La matriz debe ser cuadrada");
	}
	if (Nfilas == 0){
		mexErrMsgTxt("Error. La matriz debe no ser vacía");
	}
	if (nlhs > 2){
		mexErrMsgTxt("Error. Debe haber uno o dos args de salida");
	}
	// copia de las variables
	A = mxGetPr(prhs[0]);
	B = (double *)mkl_malloc(Nfilas*Ncolumnas*sizeof(double), 64);
	memcpy(B, A, Nfilas*Ncolumnas*sizeof(double));
	pivot = (int *)mkl_malloc(Nfilas*sizeof(int), 32);
	//procesos computacionales
	info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, Nfilas, Ncolumnas, B, Ncolumnas, pivot);
	determinante = 1.0;
	for (int i = 0; i < Nfilas; i++){
		if (pivot[i] != (i+1)){
			determinante *= -B[i*Ncolumnas + i];
		}
		else{
			determinante *= B[i*Ncolumnas + i];
		}
	}
	// crear los resultados de salida
	plhs[0] = mxCreateDoubleScalar(determinante);
	if (nlhs == 2){
		if (fabs(determinante) < 1.0e-8){
			mexWarnMsgTxt("Matriz singular o casi singular");
		}
		LAPACKE_dgetri(LAPACK_COL_MAJOR, Nfilas, B, Ncolumnas, pivot);
		plhs[1] = mxCreateDoubleMatrix(Nfilas, Ncolumnas, mxREAL);
		double *C = mxGetPr(plhs[1]);
		memcpy(C, B, Nfilas*Ncolumnas*sizeof(double));
	}
	mkl_free(pivot);
	mkl_free(B);
}
Example #5
0
inline void CaffeFreeHost(void* ptr, bool use_cuda) {
#ifndef CPU_ONLY
  if (use_cuda) {
    CUDA_CHECK(cudaFreeHost(ptr));
    return;
  }
#endif

#ifdef USE_MLSL
  if (mn::is_multinode()) {
    mn::free(ptr);
  } else {
#endif /* !USE_MLSL */

#ifdef USE_MKL
    mkl_free(ptr);
#else
    free(ptr);
#endif

#ifdef USE_MLSL
  }
#endif /* USE_MLSL */

}
Example #6
0
void save_2d_image_potential(SimulationData &sim_data, double *potential, const char * fits_file_name) {

	double *save_data;
	save_data = (double*)mkl_malloc(sim_data.get_num_x() * sim_data.get_num_y() * sizeof(double), 64);
	fitsfile *fptr;
	int status = 0;
	long fpixel = 1, naxis = 2, nelements;
	long naxes[2] = {sim_data.get_num_y(), sim_data.get_num_x()};

	for (int i = 0; i < sim_data.get_num_x(); ++i) {
		for (int j = 0; j < sim_data.get_num_y(); ++j) {
			save_data[i * sim_data.get_num_y() + j] = 0;
			for (int k = 0; k < sim_data.get_num_z(); ++k) {
				save_data[i * sim_data.get_num_y() + j] += potential[i * sim_data.get_num_y() * sim_data.get_num_z() + j * sim_data.get_num_z() + k];
			}
		}
	}

	fits_create_file(&fptr, fits_file_name, &status);
	fits_create_img(fptr, DOUBLE_IMG, naxis, naxes, &status);
	nelements = naxes[0] * naxes[1];
	fits_write_img(fptr, TDOUBLE, fpixel, nelements, save_data, &status);
	fits_close_file(fptr, &status);
	fits_report_error(stderr, status);	

	mkl_free(save_data);


}
Example #7
0
int check_result(double* A, double* BT, double* C, int m, int n, int c, int transposed) {
    int err_c = 0; //how many errors found
    int i, j, k;
    double* C_ref = (double*)mkl_malloc(m * n * sizeof(double), 16); //with zeroed
//    for(i = 0; i < m; i ++) {
//        for(j = 0; j < n; j++) {
//            C_ref[i*n+j] = 0;
//            for(k = 0; k < c; k++) {
//                C_ref[i*n+j] += A[i*c+k] * BT[j*c+k];
//            }
//        }
//    }

    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
            m, n, c,
            1, A, c, BT, c, 0, C_ref, n);


    if(transposed){
        for(i = 0; i < m; i++) {
            for(j = 0; j < n; j++) {
                err_c += (fabs(C[j*m + i] - C_ref[i*n+j]) < 0.0001 ? 0 : 1);
            }
        }
    } else {
        //do compare
        for(i = 0; i < m * n ; i++) {
            err_c += (fabs(C[i] - C_ref[i]) < 0.0001 ? 0 : 1);
        }
    }
    mkl_free(C_ref);
    return err_c;
}
inline void znn_free(void* ptr)
{
#ifdef ZNN_XEON_PHI
    mkl_free(ptr);
#else
    free(ptr);
#endif
}
Example #9
0
CMatrix3D::~CMatrix3D()
{
	//first free each matrix and then release the pointer array

	for(int i = 0;i < depth;i++)
	{
		pMats[i].~CMatrix();//force all the arrays to dealloc
	}
	mkl_free(pMats);

}
WavefunctionData::~WavefunctionData() {
    mkl_free(psi);
    mkl_free(psi_new);
    mkl_free(psi_old);
    mkl_free(psi_tf);
    mkl_free(psi_abs2);
    mkl_free(conj_psi);
}
Example #11
0
int main(int argc, char *argv[]){
	double inicio, fin = dsecnd();
	double *A = (double *)mkl_malloc(N*N*sizeof(double), 64);
	double *B = (double *)mkl_malloc(N*sizeof(double), 64);
	int *pivot = (int *)mkl_malloc(N*sizeof(int), 32);
	// distribucion normal de media 0 y varianza 1 
	std::default_random_engine generador;
	std::normal_distribution<double> aleatorio(0.0, 1.0);
	for (int i = 0; i < N*N; i++) A[i] = aleatorio(generador);
	for (int i = 0; i < N; i++) B[i] = aleatorio(generador);
	// matriz A marcadamente diagonal para evitar riesgo de singularidad 
	for (int i = 0; i < N; i++) A[i*N + i] += 10.0;
	int result;
	inicio = dsecnd();
	for (int i = 0; i < NTEST; i++)
		result = LAPACKE_dgesv(LAPACK_ROW_MAJOR, N, 1, A, N, pivot, B, 1);
	fin = dsecnd();
	double tiempo = (fin - inicio) / (double)NTEST;
	printf("Tiempo: %lf msec\n", tiempo*1.0e3);
	mkl_free(A);
	mkl_free(B);
	std::getchar(); return 0;
}
Example #12
0
void update_average_alpha(KernelPerceptron kp) {

    if (kp->alpha_avg != NULL) {
        mkl_free(kp->alpha_avg);
    }

    kp->alpha_avg = (float*) mkl_64bytes_malloc((kp->M) * sizeof (float));

    for (size_t i = 0; i < (kp->M); i++) {

        (kp->alpha_avg)[i] = (kp->alpha)[i] - (kp->beta)[i] / (kp->c);

    }

}
Example #13
0
File: dgemm_3.c Project: yoyz/mpi
int bench_stream_triad()
{
    double *A, *B, *C;
    double t;
    int64_t m, n, k, i, j;
    m = SIZE, k = SIZE, n = SIZE;
    double scalar=3.14;
    A = (double *)mkl_malloc( m*k*sizeof( double ), 64 );
    B = (double *)mkl_malloc( k*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );

#pragma omp parallel for 
    for (i = 0; i < (m*k); i++) {
        A[i] = (double)(i+1);
    }
#pragma omp parallel for 
    for (i = 0; i < (k*n); i++) {
        B[i] = (double)(-i-1);
    }

#pragma omp parallel for 
    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    if (A == NULL || B == NULL || C == NULL) {
      printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
      mkl_free(A);
      mkl_free(B);
      mkl_free(C);
      return 1;
    }
    t=stoptime();
    for (i=0;i<NTIME;i++)
#pragma omp parallel for    
      for (j=0; j<(m*k); j++)
	A[j] = B[j]+scalar*C[j];
    t=stoptime()-t;
    printf("GB/s         : %f\n",(((((m*k)*3)*8)*NTIME)/t)*1E-9);
    DPRINTF("\n Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
    return 0;
}
Example #14
0
		~Vector() { mkl_free(data); }
Example #15
0
// X: a MxD matrix, Y: a M vector, W: a M vector
// W0: a M vector
int main(int argc, char ** argv){
    if (argc>1 && argv[1][0]=='h') {
        printf ("Usage: parSymSGD M D T C lamda r\n");
        printf ("  M: number of data points, D: dimensions, T: time iterations, C: cores;\n");
        printf ("  lamda: learning rate, r: panel size in unit of C.\n");
        return 1;
    }u
    // read in the arguments: M, D, I (time iterations), C (cores), r (each panel contains r*C points)
    int M = argc>1?atoi(argv[1]):32;
    int D = argc>2?atoi(argv[2]):4;
    T = argc>3?atoi(argv[3]):10;
    int C = argc>4?atoi(argv[4]):4;
    float lamda = argc>5?atof(argv[5]):0.01;
    int r = argc>6?atoi(argv[6]):1;
    ///printf("M=%d, D=%d, T=%d, C=%d, lamda=%8.6f, r=%d\n",M,D,T,C,lamda,r);

    int max_threads = mkl_get_max_threads(); // get the max number of threads
	
    int rep;
    mkl_set_num_threads(1); // set the number of threads to use by mkl
    panelSz = C*r;
    panels = M/panelSz;

    int i,j,k,p,t;
    float *Y, *Wreal, *W, *X;
    Y = (float *) mkl_malloc(M*sizeof(float),PAGESIZE);
    Wreal = (float *) mkl_malloc(D*sizeof(float),PAGESIZE);
    W = (float *) mkl_malloc(D*sizeof(float),PAGESIZE);
    X = (float *) mkl_malloc(M*D*sizeof(float),PAGESIZE);
    float *Ypred = (float*)mkl_malloc(M*sizeof(float),PAGESIZE);
    float *Ytmp = (float*)mkl_malloc(M*sizeof(float),PAGESIZE);
	float *I = (float*)mkl_malloc(D*D*sizeof(float),PAGESIZE);
    float *Z = (float*)mkl_malloc(M*D*sizeof(float),PAGESIZE);
    float *B = (float*)mkl_malloc(panels*D*sizeof(float),PAGESIZE);

    if (Y==NULL | Wreal==NULL | W==NULL | X==NULL | Ypred==NULL || Ytmp==NULL || Z==NULL || B==NULL || I== NULL){
        printf("Memory allocation error.\n");
        return 2;
    }

    initData(Wreal,W,X,Y, M, D,I);

    ///printf("panelSz=%d, panels=%d\n", panelSz, panels);

    for (nt=1; nt<=max_threads && nt<=panelSz; nt*=2){
        omp_set_num_threads(nt);// set the number of openMP threads

        for (rep=0; rep<REPEATS; rep++){//repeat measurements
            double prepTime, gdTime, sInit;
            // preprocessing
            sInit=dsecnd();
            //preprocessSeq(X, Y, Z, B, panelSz, panels, M, D, lamda);
            preprocessPar(X, Y, Z, B, panelSz, panels, M, D, lamda);
            prepTime = (dsecnd() - sInit);
            ///dump2("Z",Z,M,D);
            ///dump2("B",B,panels,D);

            // GD
            initW(W,D);
            ///dump1("W (initial)", W, D);
            sInit=dsecnd();
            float err;
            float fixpoint = 0.0;
            for (t=0;t<T;t++){
                for (p=0;p<panels;p++){
                    gd(&(X[p*panelSz*D]),&(Z[p*panelSz*D]), &(B[p*D]), panelSz, D, lamda, W, I);
                    ///printf("(t=%d, p=%d) ",t,p);
                    ///dump1("W", W, D);
                    ///err=calErr(X, Ypred, Ytmp, Y, W, M, D);
                  printf("finish  one  panels     ............................  \n");
                }
            }
            gdTime = (dsecnd() - sInit);

            err=calErr(X, Ypred, Ytmp, Y, W, M, D);
            fixpoint = err - prev_err;
            

            // print final err. time is in milliseconds
            printf("nt=%d\t ttlTime=%.5f\t prepTime=%.5f\t gdTime=%.5f\t error=%.5f\n", nt, (gdTime+prepTime)*1000, prepTime*1000, gdTime*1000, err);
        }
    }
    if (B) mkl_free(B);
    if (Z) mkl_free(Z);
    if (Ytmp) mkl_free(Ytmp);
    if (Ypred) mkl_free(Ypred);
    if (Y) mkl_free(Y);
    if (Wreal) mkl_free(Wreal);
    if (W) mkl_free(W);
    if (X) mkl_free(X);
	if (I) mkl_free(I);
    return 0;
}
int main()
{
    double *A, *B, *C;
    int m, n, p, i, j, k, r;
    double alpha, beta;
    double sum;
    double s_initial, s_elapsed;

    printf ("\n This example measures performance of rcomputing the real matrix product \n"
            " C=alpha*A*B+beta*C using a triple nested loop, where A, B, and C are \n"
            " matrices and alpha and beta are double precision scalars \n\n");

    m = 2000, p = 200, n = 1000;
    printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
    alpha = 1.0; beta = 0.0;
    
    printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");
    A = (double *)mkl_malloc( m*p*sizeof( double ), 64 );
    B = (double *)mkl_malloc( p*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
    if (A == NULL || B == NULL || C == NULL) {
        printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
        mkl_free(A);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }

    printf (" Intializing matrix data \n\n");
    for (i = 0; i < (m*p); i++) {
        A[i] = (double)(i+1);
    }

    for (i = 0; i < (p*n); i++) {
        B[i] = (double)(-i-1);
    }

    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    printf (" Making the first run of matrix product using triple nested loop\n"
            " to get stable run time measurements \n\n");
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++) {
            sum = 0.0;
            for (k = 0; k < p; k++)
                sum += A[p*i+k] * B[n*k+j];
            C[n*i+j] = sum;
        }
    }

    printf (" Measuring performance of matrix product using triple nested loop \n\n");
    s_initial = dsecnd();
    for (r = 0; r < LOOP_COUNT; r++) {
        for (i = 0; i < m; i++) {
            for (j = 0; j < n; j++) {
                sum = 0.0;
                for (k = 0; k < p; k++)
                    sum += A[p*i+k] * B[n*k+j];
                C[n*i+j] = sum;
            }
        }
    }
    s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;
    
    printf (" == Matrix multiplication using triple nested loop completed == \n"
            " == at %.5f milliseconds == \n\n", (s_elapsed * 1000));
    
    printf (" Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
    
    if (s_elapsed < 0.9/LOOP_COUNT) {
        s_elapsed=1.0/LOOP_COUNT/s_elapsed;
        i=(int)(s_elapsed*LOOP_COUNT)+1;
        printf(" It is highly recommended to define LOOP_COUNT for this example on your \n"
               " computer as %i to have total execution time about 1 second for reliability \n"
               " of measurements\n\n", i);
    }
    
    printf (" Example completed. \n\n");
    return 0;
}
Example #17
0
extern "C" __declspec(dllexport) void free_array(void* x)
{
	mkl_free(x);
}
Example #18
0
File: dgemm_3.c Project: yoyz/mpi
int bench_dgemm()
{
    double *A, *B, *C;
    int m, n, k, i, j;
    double alpha, beta;
    double t;

    m = SIZE, k = SIZE, n = SIZE;
    DPRINTF(" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
    alpha = 1.0; beta = 0.0;

    DPRINTF(" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");
    A = (double *)mkl_malloc( m*k*sizeof( double ), 64 );
    B = (double *)mkl_malloc( k*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
    if (A == NULL || B == NULL || C == NULL) {
      printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
      mkl_free(A);
      mkl_free(B);
      mkl_free(C);
      return 1;
    }

    DPRINTF(" Intializing matrix data \n\n");
#pragma omp parallel for 
    for (i = 0; i < (m*k); i++) {
        A[i] = (double)(i+1);
    }
#pragma omp parallel for 
    for (i = 0; i < (k*n); i++) {
        B[i] = (double)(-i-1);
    }

#pragma omp parallel for 
    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    DPRINTF(" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
    t=stoptime();
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                m, n, k, alpha, A, k, B, n, beta, C, n);
    t=stoptime()-t;
    printf("calculation time : %f\n",t);
    printf("gflops/s         : %f\n",((2.0*m*n*k)*1E-9)/t);

    DPRINTF("\n Computations completed.\n\n");

    DPRINTF(" Top left corner of matrix A: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(k,6); j++) {
        DPRINTF("%12.0f", A[j+i*k]);
      }
      DPRINTF("\n");
    }

    DPRINTF("\n Top left corner of matrix B: \n");
    for (i=0; i<min(k,6); i++) {
      for (j=0; j<min(n,6); j++) {
        DPRINTF("%12.0f", B[j+i*n]);
      }
      DPRINTF("\n");
    }
    
    DPRINTF("\n Top left corner of matrix C: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(n,6); j++) {
        DPRINTF("%12.5G", C[j+i*n]);
      }
      DPRINTF("\n");
    }

    DPRINTF("\n Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);

    DPRINTF(" Example completed. \n\n");
    return 0;
}
Example #19
0
		~Matrix() { mkl_free(data); }
Example #20
0
int main()
{
    double *A, *B, *C;
    int m, n, p, i, j;
    double alpha, beta;

    printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
            " Intel® MKL function dgemm, where A, B, and  C are matrices and \n"
            " alpha and beta are double precision scalars\n\n");

    m = 2000, p = 200, n = 1000;
    printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
    alpha = 1.0; beta = 0.0;

    printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");
    A = (double *)mkl_malloc( m*p*sizeof( double ), 64 );
    B = (double *)mkl_malloc( p*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
    if (A == NULL || B == NULL || C == NULL) {
      printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
      mkl_free(A);
      mkl_free(B);
      mkl_free(C);
      return 1;
    }

    printf (" Intializing matrix data \n\n");
    for (i = 0; i < (m*p); i++) {
        A[i] = (double)(i+1);
    }

    for (i = 0; i < (p*n); i++) {
        B[i] = (double)(-i-1);
    }

    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    printf (" Computing matrix product using Intel® MKL dgemm function via CBLAS interface \n\n");
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                m, n, p, alpha, A, p, B, n, beta, C, n);
    printf ("\n Computations completed.\n\n");

    printf (" Top left corner of matrix A: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(p,6); j++) {
        printf ("%12.0f", A[i+j*p]);
      }
      printf ("\n");
    }

    printf ("\n Top left corner of matrix B: \n");
    for (i=0; i<min(p,6); i++) {
      for (j=0; j<min(n,6); j++) {
        printf ("%12.0f", B[j+i*n]);
      }
      printf ("\n");
    }
    
    printf ("\n Top left corner of matrix C: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(n,6); j++) {
        printf ("%12.5G", C[j+i*n]);
      }
      printf ("\n");
    }

    printf ("\n Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);

    printf (" Example completed. \n\n");
    return 0;
}
Example #21
0
void free_matrix(double* A, double* B, double *C) {
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
}
Example #22
0
//============================================================generateProbabilityMap with MKL=====//
void Registration::generateProbabilityMap4(double *x, int x_rows, int D,
										   double *xPr,
										   double *y, int y_rows,
		                                   double sigma2,
										   double outlier, 
										   double *P1,
										   double *Pt1,
										   double *Px)
{
	// Initialize N, M, and D from input
	int N = x_rows;
	int M = y_rows;
	int P1_rows = y_rows;
	int P1_cols = 1;
	double ksig, outlier_tmp, sp;

	// Lookup table for the exponential
	double* expTable = (double*)mkl_malloc(10000 * sizeof(double), 64);//new double[1000];
	for(int i=0;i < 10000;i++)
		expTable[i] = exp(-(double)i/1000);

	double* P = (double *)mkl_malloc( M*1*sizeof( double ), 64 );
	double* temp_x = (double *)mkl_malloc( D*1*sizeof( double ), 64 );
	


	// Set sizes of matrices P1,Pt and Pt1. Fill them with zeros.
	//P1 = (double *)mkl_malloc( M*1*sizeof( double ), 64 );
	for(int i = 0; i < M*1; i++) P1[i] = 0.0;
	
	//Pt1 = (double *)mkl_malloc( N*1*sizeof( double ), 64 );
	//fill_mkl_matrix(Pt1, M, 1, 0.0);
	
	//Px = (double *)mkl_malloc( M*D*sizeof( double ), 64 );
	//fill_mkl_matrix(Px, M*D, 0.0);
	for(int i = 0; i < M*D; i++) Px[i] = 0.0;

	ksig = -2.0 * sigma2;
	outlier_tmp = (outlier * M * pow(-ksig*3.14159265358979,0.5*D) )/((1-outlier)*N);   


	// Matrices used for main loop
	double* Mx1 = (double *)mkl_malloc( M*1*sizeof( double ), 64 );
	for(int i = 0; i < M; i++) Mx1[i] = 1.0;

	double* Q = (double *)mkl_malloc( M*3*sizeof( double ), 64 );
	for(int i = 0; i < M*3; i++) Q[i] = 0.0;

	double* F = (double *)mkl_malloc( M*1*sizeof( double ), 64 );
	for(int i = 0; i < M*1; i++) F[i] = 0.0;

	double* tempM = (double *)mkl_malloc( 3*1*sizeof( double ), 64 );
	for(int i = 0; i < 3; i++) tempM[i] = 1.0;

	double one = 1.0;
	double negone = -1.0;
	double zero = 0.0;
	double beta = 1.0;
	double alpha = 1.0;

	double* x_nth_row = (double *)mkl_malloc( D*sizeof( double ), 64 ); //x + n * D * sizeof(double);
	double* temp_array = (double *)mkl_malloc( 1*1*sizeof( double ), 64 );

	int temp_matrix_size;

	// Main loop going over two point sets to calculate the probability map.
	for(int n = 0; n < N; n++) {

		//Q = Mx1 * x->get_n_rows(n,1)
		for(int i = 0; i<D; i++)
			x_nth_row[i] = x[n*D+i];

		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                    M, D, 1, one, Mx1, 1, x_nth_row, D, zero, Q, D);
	

		// Q = Q - y
		temp_matrix_size = y_rows * D;
		vdsub(&temp_matrix_size, Q, y, Q); 
		
		//Q->apply(squarefunction)
		int Q_rows = y_rows;
		int Q_cols = D;
		temp_matrix_size = Q_rows*Q_cols;
		vdmul(&temp_matrix_size, Q, Q, Q);		

		//*F = ( *Q * *tempM / ksig)
		beta = 1.0 / ksig;
		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                    Q_rows, 1, Q_cols, beta, Q, Q_cols, tempM, 1, zero, F, 1);

		//F->apply(expfunction)
		int F_rows = Q_rows;
		int F_cols = 1;
		temp_matrix_size = F_rows * F_cols;

		// Calcuate exponential through the lookup table
		for(int i=0;i < Q_rows;i++)
		{/*
			if(F[i] < -10)
				F[i] = 0;
			else
				F[i] = expTable[-(int)floor(F[i]*1000)];*/
			F[i] = exp(F[i]);
		}
		//vdExp(temp_matrix_size, F, F);	
		
		//sp = (Mx1->transpose()* *F).get(0,0);
		
		//temp_array[0] = 0.0;
		cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 
                    1, 1, M, one, Mx1, 1, F, F_cols, zero, temp_array, 1);
		sp = temp_array[0];
		sp += outlier_tmp;

		//*P = (*F/sp) * xPrb->get(n, 0) = F*(sp/xPrb->get(n,0))
		double multiplier = 1/ sp * xPr[n];
		for(int i = 0; i < F_rows; i++) {
			P[i] = F[i] * multiplier;
		}
		
		//Pt1->put(n,0,(1 - outlier_tmp/sp) * xPrb->get(n,0))
		Pt1[n] = (1 - outlier_tmp/sp) * xPr[n];

		//*P1 = *P1 + (*P)
		temp_matrix_size = M * 1;
		vdadd(&temp_matrix_size, P1, P, P1);


		//*Px = *Px + *P*x->get_n_rows(n,1)
		alpha = 1;
		beta = 1;
		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                    M, D, 1, one, P, 1, x_nth_row, D, one, Px, D);

	}

	mkl_free_buffers();
	//mkl_thread_free_buffers();
	mkl_free(expTable);
	mkl_free(P);
	mkl_free(temp_x);
	mkl_free(Mx1);
	mkl_free(Q);
	mkl_free(F);
	mkl_free(tempM);
	mkl_free(x_nth_row);
	mkl_free(temp_array);

	return;	
}
Example #23
0
PotentialData::~PotentialData() {
	mkl_free(harmonic_trap);
}