int main()
{
    double *A, *B, *C;
    int i,j,r,max_threads,size;
    double alpha, beta;
    double s_initial, s_elapsed;
    
    printf("Intializing data for matrix multiplication C=A*B for matrix\n\n"
            " A(%i*%i) and matrix B(%i*%i)\n",M,P,P,N);
    alpha = 1.0;
    beta = 0.0;

    printf("Allocating memory for matrices aligned on 64-byte boundary for better performance \n\n");
    A = ( double *)mkl_malloc(M*P*sizeof( double ),64);
    B = ( double *)mkl_malloc(N*P*sizeof( double ),64);
    C = ( double *)mkl_malloc(M*N*sizeof( double ),64);
    if (A == NULL || B == NULL || C == NULL)
    {
        printf("Error: can`t allocate memory for matrices.\n\n");
        mkl_free(A);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }

    printf("Intializing matrix data\n\n");
    size = M*P;
    for (i = 0; i < size; ++i)
    {
        A[i] = ( double )(i+1);
    }
    size = N*P;
    for (i = 0; i < size; ++i)
    {
        B[i] = ( double )(i-1);
    }

    printf("Finding max number of threads can use for parallel runs \n\n");
    max_threads = mkl_get_max_threads();

    printf("Running from 1 to %i threads \n\n",max_threads);
    for (i = 1; i <= max_threads; ++i)
    {
        size = M*N;
        for (j = 0; j < size; ++j)
        {
            C[j] = 0.0;
        }

	    printf("Requesting to use %i threads \n\n",i); 
	    mkl_set_num_threads(i);

	    printf("Measuring performance of matrix product using dgemm function\n"
		    " via CBLAS interface on %i threads \n\n",i);
	    s_initial = dsecnd();
	    for (r = 0; r < LOOP_COUNT; ++r)
	    {
    		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, P, alpha, A, P, B, N, beta, C, N);
            // multiply matrices with cblas_dgemm;
	    }
	    s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;

	    printf("Matrix multiplication using dgemm completed \n"
		    " at %.5f milliseconds using %d threads \n\n",
		    (s_elapsed * 1000),i);
        printf("Output the result: \n");
        size = M*N;
        for (i = 0; i < size; ++i)
        {
            printf("%i\t",(int)C[i]);
            if (i % N == N - 1)
                printf("\n");
        }
    }

    printf("Dellocating memory\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);

    return 0;
}
double doCheck(int Rows_Size, int Columns_Size, \
                  double (*  Matrix_A), \
                  double (* Matrix_B), \
                  double (*  Matrix_C), \
                  int   nIter,  double *error)
{
 
        //  double(*restrict Cgemm)[Rows_Size] = malloc(sizeof(double)*Rows_Size*Rows_Size);
		double(* Cgemm)=(double*)malloc(Rows_Size*Rows_Size*sizeof(double));
		
          	double mklStartTime = dsecnd();

          	for(int i=0; i < nIter; i++)
              	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, \
                    Rows_Size, Rows_Size, Columns_Size, alpha, \
                Matrix_A,Columns_Size, \
                Matrix_B, Rows_Size, beta, \
                Cgemm, Rows_Size);
         	double mklEndTime = dsecnd();

	        *error = nrmsdError(Rows_Size, Columns_Size,Matrix_C,Cgemm);
	
         	free(Cgemm);

        return (2e-9*Rows_Size*Columns_Size*Rows_Size/((mklEndTime-mklStartTime)/nIter) );
}
/* ................................................................... */
double doCheck(int Rows_Size, int Columns_Size, \
                   double (**  Matrix_A), \
                   double (** Matrix_B), \
                   double (**  Matrix_C), \
                   int   nIter,  double *error)
{
 
        //  double(*restrict Cgemm)[Rows_Size] = malloc(sizeof(double)*Rows_Size*Rows_Size);
		 double(** Cgemm)=(double**)malloc(Rows_Size*sizeof(double*));
        for(int i=0;i<Rows_Size;i++)
        Cgemm[i]    =(double*) malloc(sizeof(double)*Rows_Size);


          double alpha = 1.0f, beta = 0.0f;  /* Scaling factors */
          // warm up   
//        double alpha = 1.0f, beta = 0.0f;  /* Scaling factors */
          // warm up   
          cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, \
                    Rows_Size, Rows_Size, Columns_Size, alpha, \
              (double*) Matrix_A, Rows_Size, \
               (double *) Matrix_B, Columns_Size, beta, \
               (double *) Cgemm, Rows_Size);
          double mklStartTime = dsecnd();

          for(int i=0; i < nIter; i++)
              cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, \
                    Rows_Size, Rows_Size, Columns_Size, alpha, \
                (double *)Matrix_A, Rows_Size, \
                (double *)Matrix_B, Columns_Size, beta, \
                (double *)Cgemm, Rows_Size);

          double mklEndTime = dsecnd();


           *error = nrmsdError(Rows_Size, Columns_Size, Matrix_C, Cgemm);
		printf("\n ram\n");
         free(Matrix_A);
         free(Matrix_B);
         free(Matrix_C);
         free(Cgemm);

         return (2e-9*Rows_Size*Columns_Size*Rows_Size/((mklEndTime-mklStartTime)/nIter) );
}
示例#4
0
void stop(Rate *r) {

    (*r)->t_end = dsecnd();
    (*r)->total_elapsed += ((*r)->t_end - (*r)->t_begin);
    ((*r)->count)++;

    if (((*r)->count) % PARSER_RATE_VERBOSITY == 0 && ((*r)->count) > 0) {
        log_info("Parser Rate is %lf sentences/sec", ((*r)->count) / ((*r)->total_elapsed));
    }
}
示例#5
0
int main(int argc, char *argv[]){
	double inicio, fin = dsecnd();
	double *A = (double *)mkl_malloc(N*N*sizeof(double), 64);
	double *B = (double *)mkl_malloc(N*sizeof(double), 64);
	int *pivot = (int *)mkl_malloc(N*sizeof(int), 32);
	// distribucion normal de media 0 y varianza 1 
	std::default_random_engine generador;
	std::normal_distribution<double> aleatorio(0.0, 1.0);
	for (int i = 0; i < N*N; i++) A[i] = aleatorio(generador);
	for (int i = 0; i < N; i++) B[i] = aleatorio(generador);
	// matriz A marcadamente diagonal para evitar riesgo de singularidad 
	for (int i = 0; i < N; i++) A[i*N + i] += 10.0;
	int result;
	inicio = dsecnd();
	for (int i = 0; i < NTEST; i++)
		result = LAPACKE_dgesv(LAPACK_ROW_MAJOR, N, 1, A, N, pivot, B, 1);
	fin = dsecnd();
	double tiempo = (fin - inicio) / (double)NTEST;
	printf("Tiempo: %lf msec\n", tiempo*1.0e3);
	mkl_free(A);
	mkl_free(B);
	std::getchar(); return 0;
}
示例#6
0
Rate start(Rate *r) {

    if (*r == NULL) {
        *r = (Rate) malloc(sizeof (struct Rate));
        (*r)->count = 0;
        (*r)->total_elapsed = 0.0;
        check(r != NULL, "Memory allocation error");
    }

    (*r)->t_begin = dsecnd();
    (*r)->t_end = -1;

    return *r;

error:
    exit(1);

}
示例#7
0
int main( int argc, char* argv[] )
{

	double alpha;
	int interpFuncExtent;
	int numFreq;
	int gridSize;

	char inputFile[256];
	char inputTargetFile[256];
	char inputCoef[256];
	char outputFileName[256];
	char rbfFile[256];
	int van;
	int pvan;
	int minlevel;  
	double tol;
	// int max_iter = 60;
	int max_iter = 10000;
	int myid;
	double t0, t1;
	int d = 0; 
	int bitmap;

	MPI_Init(0,0);	
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
if(myid == 0){
	t0 = dsecnd();
	if( argc < 15 )
    {
		printf("\n\nUsage : %s alpha interpFuncExtent numFreq gridSize inputFile1(coords) inputFile2(coefs) \
                        outputFileName kifmmoptions (ftilde+1) (f+1) GMRES_tolerance [bitmap] Target \
                        distance-criterion-cte minlevel \n",argv[0]);
		printf("   bitmap 1 = use fast diagonal computation (diagonal preconditioner)\n");
		printf("   bitmap 2 = use PMult (SSOR preconditioner)\n");
		printf("   bitmap 4 = write diagonal blocks\n");
		printf("   bitmap 8 = read in the recomputed diagonal blocks\n");
		printf("   bitmap 16 = compute actual residual for each iteration\n");
		printf("   bitmap 32 = compute sparse Kw matrix and write it to file \n");
		return -1;
    }
void domult(int Block_RowIndex,int Block_ColIndex)
{
        int i,j,k;
        float omp_tv_start, omp_tv_end;

        int nThreads ;
        int nIter =1;
        int Rows_Size = Block_Of_Row;
        int Columns_Size = Total_Row;
	double(** Matrix_A)=(double**)malloc(Rows_Size*sizeof(double*));
	for(int i=0;i<Rows_Size;i++)
        Matrix_A[i]    =(double*) malloc(sizeof(double)*Columns_Size);
	double(** Matrix_B)=(double**)malloc(Rows_Size*sizeof(double*));
        for(int i=0;i<Rows_Size;i++)
        Matrix_B[i]    =(double*) malloc(sizeof(double)*Columns_Size);
	double(** Matrix_C)=(double**)malloc(Rows_Size*sizeof(double*));
        for(int i=0;i<Rows_Size;i++)
        Matrix_C[i]    =(double*) malloc(sizeof(double)*Columns_Size);
      // double (*restrict Matrix_B)[Columns_Size] = malloc(sizeof(double)*Columns_Size*Rows_Size);
      // double (*restrict Matrix_C)[Rows_Size]    = malloc(sizeof(double)*Rows_Size*Rows_Size);
/*       double (*restrict Cgemm)[Rows_Size]       = malloc(sizeof(double)*Rows_Size*Rows_Size);  */

        /* .....Declared Global Variables block_of_row and nRows  ... */
        /* MATRIX A OF SIZE "block_of_row X nRows" is multiplied with 
           MATRIX B OF SIZE "nRows X block_of_row"  */

        omp_tv_start = omp_get_wtime();
        printf("\nbufferA having index %d\t bufferB having index %d\n ", \
                 Block_RowIndex/Block_Of_Row,Block_ColIndex/Block_Of_Row);

        double aveTime, minTime = 1e6, maxTime =0.0;
        for (int i=0; i < nIter; i++) {
          double startTime = dsecnd();

          nThreads =Total_Thread;
          omp_set_num_threads(nThreads);

          /* .........OpenMP  implementaiton  ......*/
          /* ... Modified by vcvr  June 29-2013  ...*/
          #pragma omp parallel default (none) \
                      shared (bufferA, bufferB, bufferC, Rows_Size, Columns_Size) \
                      private(i, j, k) \

          #pragma omp for schedule(static)
          for(i=0; i<Rows_Size; i++)
          {
         //    printf(" Thread %d executes Outer loop iteration %d \n", \
                  omp_get_thread_num(), i);

            for(j=0; j<Rows_Size ; j++)
            {
             bufferC[i*Rows_Size+j] = 0.0;
              for(k = 0; k< Columns_Size ; k++)
          bufferC[i*Rows_Size + j]  += \
                (bufferA[i*Rows_Size + k])* (bufferB[j*Rows_Size + k]);
           }
          } /* --- End of omp parallel for --- */

        double endTime = dsecnd();
        double runtime = endTime - startTime;

        maxTime = (maxTime > runtime)? maxTime:runtime;
        minTime = (minTime < runtime)? minTime:runtime;

        aveTime += runtime;
     }  /* .... Average Gflops routine .... */
    aveTime /= nIter;

    omp_tv_end = omp_get_wtime();
    printf("\n\t\t Time taken ( do Mult)    :  %lf sec", (omp_tv_end - omp_tv_start));

    /* ...........Verfication of Matrix-Matrix Multiplication results ............*/
    #pragma omp parallel 
    #pragma omp master  
    printf("nThreads : %d    ITR : %d", omp_get_num_threads(), nIter);

    #pragma omp barrier   

    // do check   
    double error = 0.f;
  // double mklGflop = doCheck(Rows_Size, Columns_Size,\
                             Matrix_A,Matrix_B,Matrix_C, nIter,&error);  
    //printf("  mklGflop : %g ", mklGflop);    


    printf("\n");

    printf(" nThrds %d matrix Rows-A %d matrix Columns-A %d matrix Rows-B %d matrix Columns-B %d \ 
             maxRT %g minRT %g aveRT %g  ave_GFlop/s % g\n",  \
             omp_get_num_threads(), Rows_Size, Columns_Size, Columns_Size, Rows_Size, \
             maxTime, minTime, aveTime, 2e-9*Rows_Size*Columns_Size*Rows_Size/aveTime);

  free(Matrix_A);
 free(Matrix_B);
  free(Matrix_C);

}
示例#9
0
int main(void)
{
    /* Size of 1D transform */
    int N = 1000000;

    /* Arbitrary harmonic */
    int H = -N/2;

    /* Execution status */
    MKL_LONG status = 0;

    int forward_ok = 1, backward_ok = 1;

    double time_start = 0, time_end = 0;
    double flops = 0;

    printf("Forward and backward 1D complex inplace transforms\n");

    printf("Allocate space for data on the host\n");
    x = (COMPLEX*)malloc( N * sizeof(COMPLEX) );
    if (0 == x) {
        printf("Error: memory allocation on host failed\n");
        exit(1);
    }

    printf("Preallocate memory on the target\n");
    /*
     * SOLUTION: Use offload pragma to preallocate memory for x on the target.
     *      (1) The lenght of x is N
     *      (2) Make sure the memory of x is aligned on 64-byte boundary
     *      (3) Make sure the allocated memory is not freed
     */
#pragma offload target(mic) in(x:length(N) align(64) alloc_if(1) free_if(0))
    {
    }

    printf("Create handle for 1D single-precision forward and backward transforms\n");

    /* 
     * SOLUTION: Offload the call to DftiCreateDescriptor to the target.
     *      (1) What would be the 'in' variables?
     *      (2) What would be the 'out' variables?
     */
#pragma offload target(mic) in(N) nocopy(handle) out(status)
    {
        status = DftiCreateDescriptor(&handle, DFTI_SINGLE,
            DFTI_COMPLEX, 1, (MKL_LONG)N );
        if (0 == status)
		{
			status = DftiCommitDescriptor(handle);
		}
    }

    if (status) {
        printf("Error: cannot create handle\n");
        exit(1);
    }

    /* 
     * SOLUTION: Offload the call to DftiComputeForward to the target.
     *      (1) Make sure x is an 'inout' variable, because this is in-place
     *      transform.
     *      (2) Do not allocate memory for x because it was preallocated.
     *      (3) Do not free momory of x because we will use it again for more
     *      transforms.
     *      (4) What would be the 'out' variables?
     */
    // We do not time the first offload.
#pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \
    nocopy(handle) out(status)
    {
        status = DftiComputeForward(handle, x);
    }

    printf("Initialize input for forward transform\n");
    init(x, N, H);

    printf("Offload forward FFT computation to the target\n");
	time_start = dsecnd();
    /*
     * SOLUTION: Offload the call to DftiComputeForward to the target.
     * This should be the same as the previous offload.
     */
#pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \
    nocopy(handle) out(status)
    {
        status = DftiComputeForward(handle, x);
    }
	time_end = dsecnd();

    if (status) {
        printf("Error: DftiComputeForward failed\n");
        exit(1);
    }

    printf("Verify result of forward FFT\n");
    forward_ok = verify(x, N, H);
    if (0 == forward_ok) {
	    flops    = 5 * N * log2((double)N) / (time_end - time_start);
        printf("\t Forward: size = %d, GFlops  = %.3f  \n", N, flops/1000000000);
    }

    printf("Initialize input for backward transform\n");
    init(x, N, -H);

    printf("Offload backward FFT computation to the target\n");

	time_start = dsecnd();
    /* 
     * SOLUTION: Offload the call to DftiComputeBackward to the target.
     *      (1) Make sure x is an 'inout' variable, because this is in-place
     *      transform.
     *      (2) Do not allocate memory for x because it was preallocated.
     *      (3) Do not free momory of x at this time.
     *      (4) What would be the 'out' variables?
     */
#pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \
    nocopy(handle) out(status)
    {
        status = DftiComputeBackward(handle, x);
    }
	time_end = dsecnd();

    if (status) {
        printf("Error: DftiComputeBackward failed\n");
        exit(1);
    }

    printf("Verify result of backward FFT\n");
    backward_ok = verify(x, N, H);
    if (0 == backward_ok) {
	    flops    = 5 * N * log2((double)N) / (time_end - time_start);
        printf("\t Backward: size = %d, GFlops  = %.3f  \n", N, flops/1000000000 );
    }

    printf("Destroy DFTI handle and free space on the target\n");
    /*
     * SOLUTION: Use offload pragma to deallocate memory of x on the target.
     *      (1) What would be 'in' variables?
     *      (2) Do the 'in' variables need to be copied in?
     */
#pragma offload target(mic) nocopy(x:length(N) alloc_if(0) free_if(1)) \
    nocopy(handle)
    {
        DftiFreeDescriptor(&handle);
    }

    printf("Free space on host\n");
    free(x);

    printf("TEST %s\n",0==forward_ok ? 
            "FORWARD FFT PASSED" : "FORWARD FFT FAILED");
    printf("TEST %s\n",0==backward_ok ? 
            "BACKWARD FFT PASSED" : "BACKWARD FFT FAILED");

    return 0;
}
示例#10
0
// X: a MxD matrix, Y: a M vector, W: a M vector
// W0: a M vector
int main(int argc, char ** argv){
    if (argc>1 && argv[1][0]=='h') {
        printf ("Usage: parSymSGD M D T C lamda r\n");
        printf ("  M: number of data points, D: dimensions, T: time iterations, C: cores;\n");
        printf ("  lamda: learning rate, r: panel size in unit of C.\n");
        return 1;
    }u
    // read in the arguments: M, D, I (time iterations), C (cores), r (each panel contains r*C points)
    int M = argc>1?atoi(argv[1]):32;
    int D = argc>2?atoi(argv[2]):4;
    T = argc>3?atoi(argv[3]):10;
    int C = argc>4?atoi(argv[4]):4;
    float lamda = argc>5?atof(argv[5]):0.01;
    int r = argc>6?atoi(argv[6]):1;
    ///printf("M=%d, D=%d, T=%d, C=%d, lamda=%8.6f, r=%d\n",M,D,T,C,lamda,r);

    int max_threads = mkl_get_max_threads(); // get the max number of threads
	
    int rep;
    mkl_set_num_threads(1); // set the number of threads to use by mkl
    panelSz = C*r;
    panels = M/panelSz;

    int i,j,k,p,t;
    float *Y, *Wreal, *W, *X;
    Y = (float *) mkl_malloc(M*sizeof(float),PAGESIZE);
    Wreal = (float *) mkl_malloc(D*sizeof(float),PAGESIZE);
    W = (float *) mkl_malloc(D*sizeof(float),PAGESIZE);
    X = (float *) mkl_malloc(M*D*sizeof(float),PAGESIZE);
    float *Ypred = (float*)mkl_malloc(M*sizeof(float),PAGESIZE);
    float *Ytmp = (float*)mkl_malloc(M*sizeof(float),PAGESIZE);
	float *I = (float*)mkl_malloc(D*D*sizeof(float),PAGESIZE);
    float *Z = (float*)mkl_malloc(M*D*sizeof(float),PAGESIZE);
    float *B = (float*)mkl_malloc(panels*D*sizeof(float),PAGESIZE);

    if (Y==NULL | Wreal==NULL | W==NULL | X==NULL | Ypred==NULL || Ytmp==NULL || Z==NULL || B==NULL || I== NULL){
        printf("Memory allocation error.\n");
        return 2;
    }

    initData(Wreal,W,X,Y, M, D,I);

    ///printf("panelSz=%d, panels=%d\n", panelSz, panels);

    for (nt=1; nt<=max_threads && nt<=panelSz; nt*=2){
        omp_set_num_threads(nt);// set the number of openMP threads

        for (rep=0; rep<REPEATS; rep++){//repeat measurements
            double prepTime, gdTime, sInit;
            // preprocessing
            sInit=dsecnd();
            //preprocessSeq(X, Y, Z, B, panelSz, panels, M, D, lamda);
            preprocessPar(X, Y, Z, B, panelSz, panels, M, D, lamda);
            prepTime = (dsecnd() - sInit);
            ///dump2("Z",Z,M,D);
            ///dump2("B",B,panels,D);

            // GD
            initW(W,D);
            ///dump1("W (initial)", W, D);
            sInit=dsecnd();
            float err;
            float fixpoint = 0.0;
            for (t=0;t<T;t++){
                for (p=0;p<panels;p++){
                    gd(&(X[p*panelSz*D]),&(Z[p*panelSz*D]), &(B[p*D]), panelSz, D, lamda, W, I);
                    ///printf("(t=%d, p=%d) ",t,p);
                    ///dump1("W", W, D);
                    ///err=calErr(X, Ypred, Ytmp, Y, W, M, D);
                  printf("finish  one  panels     ............................  \n");
                }
            }
            gdTime = (dsecnd() - sInit);

            err=calErr(X, Ypred, Ytmp, Y, W, M, D);
            fixpoint = err - prev_err;
            

            // print final err. time is in milliseconds
            printf("nt=%d\t ttlTime=%.5f\t prepTime=%.5f\t gdTime=%.5f\t error=%.5f\n", nt, (gdTime+prepTime)*1000, prepTime*1000, gdTime*1000, err);
        }
    }
    if (B) mkl_free(B);
    if (Z) mkl_free(Z);
    if (Ytmp) mkl_free(Ytmp);
    if (Ypred) mkl_free(Ypred);
    if (Y) mkl_free(Y);
    if (Wreal) mkl_free(Wreal);
    if (W) mkl_free(W);
    if (X) mkl_free(X);
	if (I) mkl_free(I);
    return 0;
}
示例#11
0
int main()
{
    double *A, *B, *C;
    int m, n, p, i, j, k, r;
    double alpha, beta;
    double sum;
    double s_initial, s_elapsed;

    printf ("\n This example measures performance of rcomputing the real matrix product \n"
            " C=alpha*A*B+beta*C using a triple nested loop, where A, B, and C are \n"
            " matrices and alpha and beta are double precision scalars \n\n");

    m = 2000, p = 200, n = 1000;
    printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
    alpha = 1.0; beta = 0.0;
    
    printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");
    A = (double *)mkl_malloc( m*p*sizeof( double ), 64 );
    B = (double *)mkl_malloc( p*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
    if (A == NULL || B == NULL || C == NULL) {
        printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
        mkl_free(A);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }

    printf (" Intializing matrix data \n\n");
    for (i = 0; i < (m*p); i++) {
        A[i] = (double)(i+1);
    }

    for (i = 0; i < (p*n); i++) {
        B[i] = (double)(-i-1);
    }

    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    printf (" Making the first run of matrix product using triple nested loop\n"
            " to get stable run time measurements \n\n");
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++) {
            sum = 0.0;
            for (k = 0; k < p; k++)
                sum += A[p*i+k] * B[n*k+j];
            C[n*i+j] = sum;
        }
    }

    printf (" Measuring performance of matrix product using triple nested loop \n\n");
    s_initial = dsecnd();
    for (r = 0; r < LOOP_COUNT; r++) {
        for (i = 0; i < m; i++) {
            for (j = 0; j < n; j++) {
                sum = 0.0;
                for (k = 0; k < p; k++)
                    sum += A[p*i+k] * B[n*k+j];
                C[n*i+j] = sum;
            }
        }
    }
    s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;
    
    printf (" == Matrix multiplication using triple nested loop completed == \n"
            " == at %.5f milliseconds == \n\n", (s_elapsed * 1000));
    
    printf (" Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
    
    if (s_elapsed < 0.9/LOOP_COUNT) {
        s_elapsed=1.0/LOOP_COUNT/s_elapsed;
        i=(int)(s_elapsed*LOOP_COUNT)+1;
        printf(" It is highly recommended to define LOOP_COUNT for this example on your \n"
               " computer as %i to have total execution time about 1 second for reliability \n"
               " of measurements\n\n", i);
    }
    
    printf (" Example completed. \n\n");
    return 0;
}
示例#12
0
void train_once_KernelPerceptronModel(KernelPerceptron mdl, const CoNLLCorpus corpus, int max_rec) {
    long match = 0, total = 0;
    //size_t slen=0;

    double s_initial = dsecnd();
    int max_sv = 0;


    log_info("Total number of training instances %d", (max_rec == -1) ? DArray_count(corpus->sentences) : max_rec);

    for (int si = 0; si < ((max_rec == -1) ? DArray_count(corpus->sentences) : max_rec); si++) {

        FeaturedSentence sent = (FeaturedSentence) DArray_get(corpus->sentences, si);

        debug("Building feature matrix for sentence %d", si);
        set_FeatureMatrix(NULL, corpus, si);

        set_adjacency_matrix_fast(corpus, si, mdl, false);

        max_sv += (sent->length + 1) * sent->length - sent->length;

        int *model = parse(sent);

        //printfarch(model, sent->length);
        debug("Parsing sentence %d of length %d is done", si, sent->length);
        int *empirical = get_parents(sent);

        //printfarch(empirical, sent->length);
        int nm = nmatch(model, empirical, sent->length);

        debug("Model matches %d arcs out of %d arcs", nm, sent->length);
        if (nm != sent->length) { // root has no valid parent.
            log_info("Sentence %d (section %d) of length %d (%d arcs out of %d arcs are correct)", si, sent->section, sent->length, nm, sent->length);

            int sentence_length = sent->length;
            for (int to = 1; to <= sentence_length; to++) {

                if (model[to] != empirical[to]) {

                    update_alpha(mdl, si, model[to], to, sent, -1);

                    update_alpha(mdl, si, empirical[to], to, sent, +1);
                }


            }
        } else {
            log_info("Sentence %d (section %d) of length %d (Perfect parse)", si, sent->section, sent->length);
        }

        size_t nsuccess;
        if (budget_method == RANDOMIZED) {
            if (mdl->M > budget_target) {
                size_t nbefore = mdl->M;
                size_t nasked = nbefore - budget_target;
                nsuccess = delete_n_random_hypothesis(mdl, nasked);

                log_info("%lu vectors deleted (%lu asked). Current hypothesis set size reduced from %lu to %lu", nsuccess, nasked, nbefore, mdl->M);
            }
        }

        mdl->c++;

        free_feature_matrix(corpus, si);

        match += nm;
        total += (sent->length);


        if ((si + 1) % 1000 == 0 && si != 0) {
            log_info("Running training accuracy %lf after %d sentence.", (match * 1.) / total, si + 1);

            unsigned nsv = mdl->M;
            log_info("%u (%f of total %d) support vectors", nsv, (nsv * 1.) / max_sv, max_sv);
        }

        free(model);
        free(empirical);
    }

    unsigned nsv = mdl->M;
    log_info("Running training accuracy %lf", (match * 1.) / total);
    log_info("%u (%f of total %d) support vectors", nsv, (nsv * 1.) / max_sv, max_sv);

    if (verbosity > 0) {

        dump_support_vectors(mdl);



    }

    update_average_alpha(mdl);

    return;
}
示例#13
0
	t0 = dsecnd();
	if( argc < 15 )
    {
		printf("\n\nUsage : %s alpha interpFuncExtent numFreq gridSize inputFile1(coords) inputFile2(coefs) \
                        outputFileName kifmmoptions (ftilde+1) (f+1) GMRES_tolerance [bitmap] Target \
                        distance-criterion-cte minlevel \n",argv[0]);
		printf("   bitmap 1 = use fast diagonal computation (diagonal preconditioner)\n");
		printf("   bitmap 2 = use PMult (SSOR preconditioner)\n");
		printf("   bitmap 4 = write diagonal blocks\n");
		printf("   bitmap 8 = read in the recomputed diagonal blocks\n");
		printf("   bitmap 16 = compute actual residual for each iteration\n");
		printf("   bitmap 32 = compute sparse Kw matrix and write it to file \n");
		return -1;
    }
	
	printf("Wall-clock time used: %f seconds \n",dsecnd() - t0);
	alpha = atof( argv[1] );
	interpFuncExtent = atoi( argv[2] );
	numFreq = atoi( argv[3] );
	gridSize = atoi( argv[4] );
	strcpy(inputFile, argv[5] );
	strcpy(inputCoef, argv[6]);
	strcpy( outputFileName, argv[7] );
	strcpy( rbfFile, argv[8] );
	van = atoi( argv[9] );
	pvan = atoi( argv[10] );
	sscanf(argv[11],"%lf",&tol);
	bitmap = 3;
	if(argc > 12) bitmap = atoi(argv[12]);
        strcpy(inputTargetFile, argv[13] );