コード例 #1
0
ファイル: mdlbrot.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
static void calc_lines(Uint32 start, Uint32 end, Uint32* lines,
                       double max_values_sq, Uint32 max_iter)
{
  Uint32 i, iter_wert, icolor;
  double cx, cy;

  double pd_x = 3.0 / (double)MAX_X;
  double pd_y = 2.0 / (double)MAX_Y;

#ifdef MANUAL
  VT_USER_START("calc_lines");
#endif

  for(i = start; i < end; i++)
  {
    cx = -2.0 + (i / MAX_Y) * pd_x;
    cy = -1.0 + (i % MAX_Y) * pd_y;

    iter_wert = mandelbrot_point(cx, cy, max_values_sq, max_iter);

    icolor = (double)iter_wert / (double)max_iter * (1u << 24);
    lines[i-start] = icolor;
  }

#ifdef MANUAL
  VT_USER_END("calc_lines");
#endif
}
コード例 #2
0
ファイル: mdlbrot.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
static Uint32 mandelbrot_point(double cx, double cy, double max_value_sq,
                               Uint32 max_iter)
{
  double value_sq = 0;
  double x = 0, xt;
  double y = 0, yt;
  Uint32 iter = 0;

#ifdef MANUAL
  VT_USER_START("mandelbrot_point");
#endif
  
  while((value_sq <= max_value_sq) && (iter < max_iter))
  {
    xt = (x * x) - (y * y) + cx;
    yt = 2 * x * y + cy;
    x = xt;
    y = yt;
    iter++;
    value_sq = x * x + y * y;
  }

#ifdef MANUAL
  VT_USER_END("mandelbrot_point");
#endif

  return iter;
}
コード例 #3
0
ファイル: profiler.hpp プロジェクト: mliertzer/ngsolve
    static void StopTimer (int nr) 
    { 
      timeval time;
      gettimeofday (&time, 0);
      // tottimes[nr] += time.tv_sec + 1e-6 * time.tv_usec - starttimes[nr];
#pragma omp atomic
      tottimes[nr] += time.tv_sec + 1e-6 * time.tv_usec;
      VT_USER_END (const_cast<char*> (names[nr].c_str())); 
    }
コード例 #4
0
ファイル: Metrics.cpp プロジェクト: kawuum/grappa
 void stop_tracing_here() {
   #ifdef GOOGLE_PROFILER
     ProfilerStop( );
     impl::profile_handler(NULL);
     #ifdef VTRACE_SAMPLED
       VT_USER_END("sampling");
       
       sample();
     #endif
   #endif
 }
コード例 #5
0
ファイル: profiler.hpp プロジェクト: mliertzer/ngsolve
    void Stop () 
    {
      if (priority == 1)
	{
	  // VT_USER_END_ID(timer_id);
	  // VT_USER_END2(timer_id);
	  VT_USER_END(name.c_str());
	  if (prev != NULL)
	    // VT_USER_START_ID(prev -> timer_id);
	    // VT_USER_START2(prev -> timer_id);
	    VT_USER_START(prev -> name.c_str());
	  stack_top = prev;
	}
    }
コード例 #6
0
ファイル: profiler.hpp プロジェクト: mliertzer/ngsolve
    void Start () 
    {
      if (priority == 1)
	{
	  prev = stack_top;
	  stack_top = this;
	  if (prev)
	    // VT_USER_END_ID (prev -> timer_id);
	    // VT_USER_END2 (prev -> timer_id);
	    VT_USER_END (prev -> name.c_str());
	  // VT_USER_START_ID(timer_id);
	  // VT_USER_START2 (timer_id);
	  VT_USER_START (name.c_str());
	}
    }
コード例 #7
0
ファイル: mdlbrot.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
static void draw_pixel(SDL_Surface* pic, Uint32 x, Uint32 y, Uint32 color)
{
  Uint32* pixel;

#ifdef MANUAL
  VT_USER_START("draw_pixel");
#endif

  pixel = (Uint32*)pic->pixels + y * MAX_X + x;
  *pixel = color;

#ifdef MANUAL
  VT_USER_END("draw_pixel");
#endif
}
コード例 #8
0
ファイル: mdlbrot.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
static void draw(SDL_Surface* pic, Uint32* field)
{
  Uint32 i, j;

#ifdef MANUAL
  VT_USER_START("draw");
#endif

  for(i = 0; i < MAX_X; i++)
  {
    for(j = 0; j < MAX_Y; j++)
    {
      draw_pixel(pic, i, j, field[i * MAX_Y + j]);
    }
  }

#ifdef MANUAL
  VT_USER_END("draw");
#endif
}
コード例 #9
0
 static void StopTimer (int nr) 
 { 
   tottimes[nr] += clock()-starttimes[nr]; 
   VT_USER_END (const_cast<char*> (names[nr].c_str())); 
 }
コード例 #10
0
ファイル: ooc_BLAS.c プロジェクト: caomw/cuOmicABEL
/*
 * Out-of-core gemms:
 *   - Z' XR
 *   - Z' Y
 * Z is m x m
 * The other matrix is m x n
 */
void ooc_gemm( int m, int n, int ooc_b, double *Z, char *in, char *out, 
		int threshold, const char *obj_type, char *obj_name, int namelength, int nthreads_avg )
{
	/* Files */
	FILE *fp_in  = fgls_fopen( in, "rb" );
	FILE *fp_out = fgls_fopen( out, "wb" );

    /* OOC Problem dimensions */
	/*size_t max_elems_per_buffer = 1L << 26; // 64MElems, 512 MBs*/
	/*max_elems_per_buffer = max_elems_per_buffer - max_elems_per_buffer % n;*/
	/*size_t num_cols_per_buff = max_elems_per_buffer / n;*/

    /* Asynchronous IO data structures */
	double *in_comp, *out_comp;
	double_buffering db_in, db_out; // B, C
	double_buffering_init( &db_in, ooc_b * m * sizeof(double),
			                fp_in, NULL ); // _fp, cf not needed in this case
	double_buffering_init( &db_out, ooc_b * m * sizeof(double),
			                fp_out, NULL ); // _fp, cf not needed in this case

    /* BLAS constants */
    double ONE  = 1.0;
    double ZERO = 0.0;

    /* Read first piece of "in" */
    double_buffering_read( &db_in, IO_BUFF,
                           MIN( (size_t)ooc_b * m, (size_t)m * n ) * sizeof(double), 0);
	double_buffering_swap( &db_in );

    int cur_n;
    int i;
    for ( i = 0; i < n; i += ooc_b ) 
    {
        /* Read next piece of "in" */
        size_t nbytes = i + ooc_b > n ? 1 : MIN( ooc_b * m, ( n - (size_t)( i + ooc_b ) ) * m ) * sizeof(double);
		off_t  offset = i + ooc_b > n ? 0 : (off_t)(i + ooc_b) * m * sizeof(double);
        double_buffering_read( &db_in, IO_BUFF, nbytes, offset );

        /* Wait for current piece of "in" */
#if VAMPIR
    VT_USER_START("OOC_GEMM_WAIT");
#endif
		double_buffering_wait( &db_in, COMP_BUFF );
#if VAMPIR
    VT_USER_END("OOC_GEMM_WAIT");
#endif

        /* Compute */
		in_comp  = double_buffering_get_comp_buffer( &db_in );
		out_comp = double_buffering_get_comp_buffer( &db_out );
        cur_n = MIN( ooc_b, (n - i) );
		/*printf("Compute\n");*/

		// Sanity check
		average( in_comp, m, cur_n, threshold, obj_type, &obj_name[i*namelength], namelength, 1, nthreads_avg );
#if VAMPIR
    VT_USER_START("OOC_GEMM");
#endif
	/*printf("\nPRE: ");  print_timestamp(); fflush( stdout );*/
        dgemm_("T", "N", &m, &cur_n, &m, &ONE, Z, &m, in_comp, &m, &ZERO, out_comp, &m);
		/*printf("\nPOST: "); print_timestamp(); fflush( stdout );*/
#if VAMPIR
    VT_USER_END("OOC_GEMM");
#endif

        /* Wait until previous piece of "out" is written */
        if ( i > 0)
			double_buffering_wait( &db_out, IO_BUFF );

        /* Write current piece of "out" */
		double_buffering_write( &db_out, COMP_BUFF,
				                MIN( ooc_b * m, (size_t)(n - i) * m ) * sizeof(double),
                                (off_t)i * m * sizeof(double) );

        /* Swap buffers */
		double_buffering_swap( &db_in );
		double_buffering_swap( &db_out );
    }

    /* Wait for the remaining io calls issued */
	double_buffering_wait( &db_in,  COMP_BUFF );
	double_buffering_wait( &db_out, IO_BUFF );

	/* Clean-up */
	double_buffering_destroy( &db_in );
	double_buffering_destroy( &db_out );

	fclose( fp_in );
	fclose( fp_out );
}
コード例 #11
0
ファイル: ring.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
int main(int argc, char *argv[])
{
    int rank, size, next, prev, message;

#ifdef MANUAL
    VT_USER_START("main");
#endif

    /* Start up MPI */

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
 
    /* Calculate the rank of the next process in the ring.  Use the
       modulus operator so that the last process "wraps around" to
       rank zero. */

    next = (rank + 1) % size;
    prev = (rank + size - 1) % size;

    /* If we are the "master" process (i.e., MPI_COMM_WORLD rank 0),
       put the number of times to go around the ring in the
       message. */

    if (0 == rank) {
        message = NRING;

        printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n", 
               message, next, TAG, size);
        MPI_Send(&message, 1, MPI_INT, next, TAG, MPI_COMM_WORLD); 
        printf("Process 0 sent to %d\n", next);
    }

    /* Pass the message around the ring.  The exit mechanism works as
       follows: the message (a positive integer) is passed around the
       ring.  Each time it passes rank 0, it is decremented.  When
       each processes receives a message containing a 0 value, it
       passes the message on to the next process and then quits.  By
       passing the 0 message first, every process gets the 0 message
       and can quit normally. */

    while (1) {
#ifdef MANUAL
        VT_USER_START("ring_loop");
#endif
        MPI_Recv(&message, 1, MPI_INT, prev, TAG, MPI_COMM_WORLD, 
                 MPI_STATUS_IGNORE);

        if (0 == rank) {
            --message;
            printf("Process 0 decremented value: %d\n", message);
        }

        MPI_Send(&message, 1, MPI_INT, next, TAG, MPI_COMM_WORLD);
        if (0 == message) {
            printf("Process %d exiting\n", rank);
            break;
        }
#ifdef MANUAL
        VT_USER_END("ring_loop");
#endif
    }

    /* The last process does one extra send to process 0, which needs
       to be received before the program can exit */

    if (0 == rank) {
        MPI_Recv(&message, 1, MPI_INT, prev, TAG, MPI_COMM_WORLD,
                 MPI_STATUS_IGNORE);
    }
    
    /* All done */

    MPI_Finalize();

#ifdef MANUAL
    VT_USER_END("main");
#endif

    return 0;
}
コード例 #12
0
ファイル: mdlbrot.c プロジェクト: 315234/OpenFOAM-2.2.x-OSX
int main(int argc, char* argv[])
{
  int numprocs, rank, edge, pixel_count, start, end;
  double max_values_sq;
  Uint32 max_iter;

#ifdef MANUAL
  VT_USER_START("main");
#endif
  
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if(numprocs <= 1)
  {
    fprintf(stderr, "%s: error: requires at least two MPI processes",
            argv[0]);
#ifdef MANUAL
  VT_USER_END("main");
#endif
    return 1;
  }
  
  max_values_sq = 4.0;
  max_iter = 5000;

  edge = (MAX_X * MAX_Y) / (numprocs - 1);

  if(rank > 0)
  {
    int i = rank - 1;

    Uint32* pixels;

    start = i * edge;
    end = (i == numprocs - 2) ? MAX_X * MAX_Y : (i + 1) * edge;
    pixel_count = end - start;

    pixels = malloc(pixel_count * sizeof(Uint32));
    calc_lines(start, end, pixels, max_values_sq, max_iter);

    MPI_Send((void*)pixels, pixel_count, MPI_INT, 0, 0, MPI_COMM_WORLD);
    free(pixels);
  }
  else /* rank == 0 */
  {
    int i, recv_count = (edge + 1);

    Uint32* field = malloc(MAX_X * MAX_Y * sizeof(Uint32));
    Uint32* fieldpos;

    SDL_Surface* pic;
    SDL_Event event;
        
    MPI_Status status;

    for(i = 1; i < numprocs; i++)
    {
      start = (i - 1) * edge;
      end = (i == numprocs - 1) ? MAX_X * MAX_Y : i * edge;

      pixel_count = end - start;
      recv_count = pixel_count;

      fieldpos = field+start;

      MPI_Recv(fieldpos, recv_count, MPI_INT, i, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
    }
    
    SDL_Init(SDL_INIT_EVERYTHING);

    pic = SDL_SetVideoMode(MAX_X, MAX_Y, 32, SDL_HWSURFACE | SDL_DOUBLEBUF);
    SDL_WM_SetCaption("Mandelbrot", "Mandelbrot");

    draw(pic, field);

    SDL_Flip(pic);
   
    do
    {
      SDL_Delay(50);
      SDL_PollEvent(&event);
    } while( event.type != SDL_QUIT && event.type != SDL_KEYDOWN );
        
    SDL_FreeSurface(pic);
    SDL_Quit();

    free(field);
  }

  MPI_Finalize();

#ifdef MANUAL
  VT_USER_END("main");
#endif

  return 0;
}
コード例 #13
0
ファイル: fgls_chol.c プロジェクト: lucasb-eyer/OmicABEL
/*
 * Cholesky-based solution of the 
 *  sequence of Feasible Generalized Least-Squares problem
 *  in the context of GWAS:
 */
int fgls_chol( FGLS_config_t cf )
{
	int n = cf.n,
		   m = cf.m,
		   p = cf.p,
		   t = cf.t,
		   x_b = cf.x_b,
		   /*y_b = cf.y_b,*/
		   wXL = cf.wXL,
		   wXR = cf.wXR;
    /* In-core operands */
    double *Phi;
    double *M;
	double *ests;
    double *h2;
	double *res_sigma;
    double alpha;
    double beta;

    /* Out-of-core operands */
    double *Bij; // Auxiliary variables

    /* Reusable data thanks to constant XL */
    double *XL;
    double *XL_orig; // XL and a copy (XL is overwritten at every iteration of j)
    double *B_t;  // Top part of b ( in inv(S) b )
    double *V_tl; // Top-Left part of V

    /* BLAS / LAPACK constants */
    double ZERO = 0.0;
    double ONE = 1.0;
    int iONE = 1;
    /* LAPACK error value */
    int info;

    /* iterators and auxiliar vars */
    int ib, i, j, k, l; // size_t
    int nn = cf.n * cf.n; // size_t
	size_t size_one_b_record = p + (p*(p+1))/2;

	// Threading
	int id;
	double *tmpBs, *tmpVs; // Buffer with one B and one V per thread
	double *oneB, *oneV;   // Each thread pointer to its B and V

    if ( cf.y_b != 1 )
	{
        fprintf(stderr, "\n[Warning] y_b not used (set to 1)\n");
		cf.y_b = 1;
	}

    /* Memory allocation */
    // In-core
	build_SPD_Phi( cf.n, cf.Z, cf.W, cf.Phi );
	Phi   = cf.Phi;
    M     = ( double * ) fgls_malloc ( (size_t)cf.n * cf.n * sizeof(double) );
    ests  = cf.ests;

	h2 = ests;
	res_sigma = &ests[2*cf.t];

    XL_orig = cf.XL;
    XL      = ( double * ) fgls_malloc ( cf.wXL * cf.n * sizeof(double) );
    B_t  = ( double * ) fgls_malloc ( cf.wXL * sizeof(double) );
    V_tl = ( double * ) fgls_malloc ( cf.wXL * cf.wXL * sizeof(double) );

	// Temporary storage prior to copying in db_B
    tmpBs = ( double * ) fgls_malloc ( cf.p * cf.num_threads * sizeof(double) );
    tmpVs = ( double * ) fgls_malloc ( cf.p * cf.p * cf.num_threads * sizeof(double) );

    /* Files and pointers for out-of-core */
    double *XR_comp, *Y_comp, *B_comp;

    /* Asynchronous IO data structures */
	double_buffering db_XR, db_Y, db_B;
	double_buffering_init( &db_XR, (size_t)cf.n * cf.wXR * cf.x_b * sizeof(double),
			                cf.XR, &cf ); // _fp
	double_buffering_init( &db_Y, (size_t)cf.n * cf.y_b * sizeof(double),
			                cf.Y,  &cf );
	double_buffering_init( &db_B, (size_t)size_one_b_record * cf.x_b * cf.y_b * sizeof(double),
			                cf.B,  &cf );

#if VAMPIR
    VT_USER_START("READ_X");
#endif
    /* Read first block of XR's */
	double_buffering_read_XR( &db_XR, IO_BUFF, 0, (size_t)MIN( cf.x_b, cf.m ) - 1 );
	double_buffering_swap( &db_XR );
#if VAMPIR
    VT_USER_END("READ_X");
#endif
#if VAMPIR
    VT_USER_START("READ_Y");
#endif
    /* Read first Y */
	double_buffering_read_Y( &db_Y, IO_BUFF, 0, 0 );
	double_buffering_swap( &db_Y );
#if VAMPIR
    VT_USER_END("READ_Y");
#endif

    int iter = 0;
    for ( j = 0; j < t; j++ )
    {
        /* Set the number of threads for the multi-threaded BLAS */
		set_multi_threaded_BLAS( cf.num_threads );

#if VAMPIR
        VT_USER_START("READ_Y");
#endif
        /* Read next Y */
		size_t next_j = (j+1) >= t ? 0 : j+1;
		double_buffering_read_Y( &db_Y, IO_BUFF, next_j, next_j );
#if VAMPIR
        VT_USER_END("READ_Y");
#endif

#if VAMPIR
        VT_USER_START("COMP_J");
#endif
        /* M := sigma * ( h^2 Phi - (1 - h^2) I ) */
        memcpy( M, Phi, (size_t)n * n * sizeof(double) );
		alpha = res_sigma[j] * h2[j];
        beta  = res_sigma[j] * (1 - h2[j]);
        dscal_(&nn, &alpha, M, &iONE);
        for ( i = 0; i < n; i++ )
            M[i*n + i] = M[i*n + i] + beta;

        /* L * L' = M */
        dpotrf_(LOWER, &n, M, &n, &info);
        if (info != 0)
        {
            char err[STR_BUFFER_SIZE];
            snprintf(err, STR_BUFFER_SIZE, "dpotrf(M) failed (info: %d)", info);
            error_msg(err, 1);
        }

        /* XL := inv(L) * XL */
        memcpy( XL, XL_orig, wXL * n * sizeof(double) );
        dtrsm_(LEFT, LOWER, NO_TRANS, NON_UNIT, &n, &wXL, &ONE, M, &n, XL, &n);

#if VAMPIR
        VT_USER_START("WAIT_Y");
#endif
        // Wait until current Y is available for computation
		double_buffering_wait( &db_Y, COMP_BUFF );
#if VAMPIR
        VT_USER_END("WAIT_Y");
#endif

        /* y := inv(L) * y */
		Y_comp = double_buffering_get_comp_buffer( &db_Y );
		// Sanity check
		average( Y_comp, n, 1, cf.threshold, "TRAIT", 
				&cf.Y_fvi->fvi_data[n*NAMELENGTH], NAMELENGTH, 0 );
        dtrsv_(LOWER, NO_TRANS, NON_UNIT, &n, M, &n, Y_comp, &iONE);

        /* B_t := XL' * y */
        dgemv_(TRANS, &n, &wXL, &ONE, XL, &n, Y_comp, &iONE, &ZERO, B_t, &iONE);

        /* V_tl := XL' * XL */
        dsyrk_(LOWER, TRANS, &wXL, &n, &ONE, XL, &n, &ZERO, V_tl, &wXL);
#if VAMPIR
        VT_USER_END("COMP_J");
#endif
		/* Solve for x_b X's at once */
        for (ib = 0; ib < m; ib += x_b) 
        {
#if VAMPIR
            VT_USER_START("READ_X");
#endif
            /* Read next block of XR's */
			size_t next_x_from = ((size_t)ib + x_b) >= m ?  0 : (size_t)ib + x_b;
			size_t next_x_to   = ((size_t)ib + x_b) >= m ? MIN( (size_t)x_b, (size_t)m ) - 1 : 
				                                           next_x_from + MIN( (size_t)x_b, (size_t)m - next_x_from ) - 1;
			double_buffering_read_XR( &db_XR, IO_BUFF, next_x_from, next_x_to );
#if VAMPIR
            VT_USER_END("READ_X");
#endif

#if VAMPIR
            VT_USER_START("WAIT_X");
#endif
            /* Wait until current block of XR's is available for computation */
			double_buffering_wait( &db_XR, COMP_BUFF );
#if VAMPIR
            VT_USER_END("WAIT_X");
#endif

            /* Set the number of threads for the multi-threaded BLAS */
			set_multi_threaded_BLAS( cf.num_threads );

#if VAMPIR
            VT_USER_START("COMP_IB");
#endif
            /* XR := inv(L) XR */
			XR_comp = double_buffering_get_comp_buffer( &db_XR );
			// Auxiliar variables
            int x_inc = MIN(x_b, m - ib);
            int rhss  = wXR * x_inc;
			// Sanity check
			average( XR_comp, n, x_inc, cf.threshold, "SNP", 
					&cf.XR_fvi->fvi_data[(n+ib)*NAMELENGTH], NAMELENGTH, 1 );
			// Computation
            dtrsm_(LEFT, LOWER, NO_TRANS, NON_UNIT, &n, &rhss, &ONE, M, &n, XR_comp, &n);

#if VAMPIR
            VT_USER_END("COMP_IB");
#endif

#if CHOL_MIX_PARALLELISM
            /* Set the number of threads for the multi-threaded BLAS to 1.
             * The innermost loop is parallelized using OPENMP */
			set_single_threaded_BLAS();
#endif
#if VAMPIR
            VT_USER_START("COMP_I");
#endif
            B_comp = double_buffering_get_comp_buffer( &db_B );
#if CHOL_MIX_PARALLELISM
            #pragma omp parallel for private(Bij, oneB, oneV, i, k, info, id) schedule(static) num_threads(cf.num_threads)
#endif
            for (i = 0; i < x_inc; i++)
            {
				id = omp_get_thread_num();
				oneB = &tmpBs[ id * p ];
				oneV = &tmpVs[ id * p * p ];
                Bij = &B_comp[i * size_one_b_record];

                // Building B
                // Copy B_T
                memcpy(oneB, B_t, wXL * sizeof(double));
                // B_B := XR' * y
                dgemv_("T", 
                        &n, &wXR, 
                        &ONE, &XR_comp[i * wXR * n], &n, Y_comp, &iONE, 
                        &ZERO, &oneB[wXL], &iONE);

                // Building V
                // Copy V_TL
                for( k = 0; k < wXL; k++ )
                    dcopy_(&wXL, &V_tl[k*wXL], &iONE, &oneV[k*p], &iONE); // V_TL
                // V_BL := XR' * XL
                dgemm_("T", "N",
                        &wXR, &wXL, &n,
                        &ONE, &XR_comp[i * wXR * n], &n, XL, &n,
                        &ZERO, &oneV[wXL], &p); // V_BL
                // V_BR := XR' * XR
                dsyrk_("L", "T", 
                        &wXR, &n, 
                        &ONE, &XR_comp[i * wXR * n], &n, 
                        &ZERO, &oneV[wXL * p + wXL], &p); // V_BR

                // B := inv(V) * B
                dpotrf_(LOWER, &p, oneV, &p, &info);
                if (info != 0)
                {
					for ( k = 0; k < size_one_b_record; k++ )
						Bij[k] = 0.0/0.0; //nan("char-sequence");
					continue;
                }
                dtrsv_(LOWER, NO_TRANS, NON_UNIT, &p, oneV, &p, oneB, &iONE);
                dtrsv_(LOWER,    TRANS, NON_UNIT, &p, oneV, &p, oneB, &iONE);

                /* V := res_sigma * inv( X' inv(M) X) */
                dpotri_(LOWER, &p, oneV, &p, &info);
                if (info != 0)
                {
                    char err[STR_BUFFER_SIZE];
                    snprintf(err, STR_BUFFER_SIZE, "dpotri failed (info: %d)", info);
                    error_msg(err, 1);
                }

				// Copy output
				for ( k = 0; k < p; k++ )
					Bij[k] = (float) oneB[k];
                for ( k = 0; k < p; k++ )
                    Bij[p+k] = (float)sqrt(oneV[k*p+k]);
				int idx = 0;
				for ( k = 0; k < p-1; k++ ) // Cols of V
					for ( l = k+1; l < p; l++ ) // Rows of V
					{
						Bij[p+p+idx] = (float)oneV[k*p+l];
						idx++;
					}
#if 0
			  printf("Chi square: %.6f\n", ( (oneB[p-1] / Bij[p+p-1]) * (oneB[p-1] / Bij[p+p-1]) ) );
#endif
            }
#if VAMPIR
            VT_USER_END("COMP_I");
#endif

#if VAMPIR
            VT_USER_START("WAIT_BV");
#endif
            /* Wait until the previous blocks of B's and V's are written */
            if ( iter > 0)
                double_buffering_wait( &db_B, IO_BUFF );
#if VAMPIR
            VT_USER_END("WAIT_BV");
#endif
            /* Write current blocks of B's and V's */
#if VAMPIR
            VT_USER_START("WRITE_BV");
#endif
			double_buffering_write_B( &db_B, COMP_BUFF, ib, ib+x_inc - 1, j, j );
#if VAMPIR
            VT_USER_END("WRITE_BV");
#endif

            /* Swap buffers */
			double_buffering_swap( &db_XR );
			double_buffering_swap( &db_B  );
            iter++;
        }
        /* Swap buffers */
		double_buffering_swap( &db_Y );
    }

#if VAMPIR
    VT_USER_START("WAIT_ALL");
#endif
    /* Wait for the remaining IO operations issued */
	double_buffering_wait( &db_XR, COMP_BUFF );
	double_buffering_wait( &db_Y,  COMP_BUFF );
	double_buffering_wait( &db_B,  IO_BUFF );
#if VAMPIR
    VT_USER_END("WAIT_ALL");
#endif

    /* Clean-up */
    free( M );

    free( XL );
    free( B_t  );
    free( V_tl );
    free( tmpBs );
    free( tmpVs );

	double_buffering_destroy( &db_XR );
	double_buffering_destroy( &db_Y  );
	double_buffering_destroy( &db_B  );

    return 0;
}
コード例 #14
0
ファイル: vpt.hpp プロジェクト: FaceMixer/FaceMixer
    void end(boost::mpl::true_) 
    {
	VT_USER_END(name.c_str()); 
	// std::cout << "vpt_end(" << N << ")\n";    
    }