Esempio n. 1
0
WrapImpl *WrapImpl::getWrapImpl(WrapImplType t)
{
	m_wrapImplType = t;
	FILE *stream = stdout;
	WrapImpl *w = NULL;

	if ((t == NoSCM) || (t == NoGuarantee) || (t == NoAtomicity) || (t == MemCheck) || (t == Wrap_Hardware))
	{
		if (t == MemCheck)
			fprintf(stream, "MemCheck\n");
		else if (t == NoAtomicity)
			fprintf(stream, "NoAtomicity\n");
		else if (t == NoSCM)
			fprintf(stream, "NoSCM\n");
		else if (t == NoGuarantee)
			fprintf(stream, "NoGuarantee\n");
		else if (t == Wrap_Hardware)
			fprintf(stream, "Wrap_Hardware\n");
		else
			assert(0);
		w = new WrapImpl();
	}
	if (t == UndoLog)
	{
		fprintf(stream, "UndoLog\n");
		w = new WrapImplUndoLog();
	}
	if (t == Wrap_Software)
	{
		fprintf(stream, "Software\n");
		w = new WrapImplSoftware();
	}

	tic();
	totalWrapTime = getTime();
	return w;
}
Esempio n. 2
0
scs_int solveLinSys(const AMatrix * A, const Settings * stgs, Priv * p, scs_float * b, const scs_float * s, scs_int iter) {
	scs_int cgIts;
	scs_float cgTol = calcNorm(b, A->n)
			* (iter < 0 ? CG_BEST_TOL : CG_MIN_TOL / POWF((scs_float) iter + 1, stgs->cg_rate));

	tic(&linsysTimer);
	/* solves Mx = b, for x but stores result in b */
	/* s contains warm-start (if available) */
	accumByAtrans(A, p, &(b[A->n]), b);
	/* solves (I+A'A)x = b, s warm start, solution stored in b */
	cgIts = pcg(A, stgs, p, s, b, A->n, MAX(cgTol, CG_BEST_TOL));
	scaleArray(&(b[A->n]), -1, A->m);
	accumByA(A, p, b, &(b[A->n]));

	if (iter >= 0) {
		totCgIts += cgIts;
	}

	totalSolveTime += tocq(&linsysTimer);
#if EXTRAVERBOSE > 0
	scs_printf("linsys solve time: %1.2es\n", tocq(&linsysTimer) / 1e3);
#endif
	return 0;
}
Esempio n. 3
0
int main(int argc, char **argv)
{

  ref_vector X, B, Bi;
  vector C, C1;
  comp_vector S, Si, Scomp, Scompi;
  comp_vector R, Ri, Rcomp, Rcompi;
  comp_matrix O, Oi;

  int s_ratio;

  exome ex;

	check_syntax(argc, 5, "preprocess_debug ref_file output_dir s_ratio nucleotides");

  timevars();
	init_replace_table(argv[4]);

  s_ratio = atoi(argv[3]);

  encode_reference(&X, &ex, true, argv[1]);
  save_exome_file(&ex, argv[2]);

  tic("Calculating BWT");
  calculateBWTdebug(&B, &S, &X, 0);
  toc();

  save_ref_vector(&X, argv[2], "X");

  print_vector(S.vector, S.n);
  print_vector(B.vector, B.n);

  tic("Calculating prefix-trie matrices C and O");
  calculate_C(&C, &C1, &B);
  calculate_O(&O, &B);
  toc();

  print_vector(C.vector, C.n);
  print_vector(C1.vector, C1.n);
  print_comp_matrix(O);

  save_ref_vector(&B, argv[2], "B");
  free(B.vector);
  save_vector(&C, argv[2], "C");
  free(C.vector);
  save_vector(&C1, argv[2], "C1");
  free(C1.vector);
  save_comp_matrix(&O, argv[2], "O");
  free_comp_matrix(NULL, &O);

  tic("Calculating R");
  calculate_R(&R, &S);
  toc();
  print_vector(R.vector, R.n);

  tic("Calculating Scomp Rcomp");
  compress_SR(&S, &Scomp, s_ratio);
  print_vector(Scomp.vector, Scomp.n);
  compress_SR(&R, &Rcomp, s_ratio);
  print_vector(Rcomp.vector, Rcomp.n);
  toc();

  save_comp_vector(&S, argv[2], "S");
  free(S.vector);
  save_comp_vector(&R, argv[2], "R");
  free(R.vector);
  save_comp_vector(&Scomp, argv[2], "Scomp");
  free(Scomp.vector);
  save_comp_vector(&Rcomp, argv[2], "Rcomp");
  free(Rcomp.vector);

  tic("Calculating BWT of reverse reference");
  calculateBWTdebug(&Bi, &Si, &X, 1);
  toc();

  save_ref_vector(&X, argv[2], "Xi");

  print_vector(Bi.vector, Bi.n);
  print_vector(Si.vector, Si.n);

  tic("Calculating inverted prefix-trie matrix Oi");
  calculate_O(&Oi, &Bi);
  toc();

  free(X.vector);

	print_comp_matrix(Oi);

	save_ref_vector(&Bi, argv[2], "Bi");
  free(Bi.vector);

	save_comp_matrix(&Oi, argv[2], "Oi");
  free_comp_matrix(NULL, &Oi);

  tic("Calculating Ri");
  calculate_R(&Ri, &Si);
  toc();

  print_vector(Ri.vector, Ri.n);

  tic("Calculating Scompi Rcompi");
  compress_SR(&Si, &Scompi, s_ratio);
  print_vector(Scompi.vector, Scompi.n);
  compress_SR(&Ri, &Rcompi, s_ratio);
  print_vector(Rcompi.vector, Rcompi.n);
  toc();

  save_comp_vector(&Si, argv[2], "Si");
  free(Si.vector);
  save_comp_vector(&Ri, argv[2], "Ri");
  free(Ri.vector);
  save_comp_vector(&Scompi, argv[2], "Scompi");
  free(Scompi.vector);
  save_comp_vector(&Rcompi, argv[2], "Rcompi");
  free(Rcompi.vector);

  return 0;

}
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ ALLOCATES();

  CreateTicTacToc( CallMatlab );
  CreateTicTacToc( callSort   );
  int               I, J, K, ii, jj, kk;
  int               IJ, IJK, IJK_1;
  int               DI, DJ, DK, DIJK, DIJK_1;
  int               CDI, CDJ, CDK;
  int               result, fevals = 0;
  int               NVOLS, NVOLS_1, n, s, s_start, s_end, v_init;
  real              *volumes, *V, x, y, *DIST, *order, last_distance;
  int               *VV=NULL, nV, v, vv;
  char              skip;
  triplet           *TS=NULL, *DTS=NULL, T;
  mxArray           *INPUT[2]={NULL,NULL}, *OUTPUT[3]={NULL,NULL,NULL};
  double            *MAXs, LAST_MAX;
  double            thisMINx, thisMINy;
  double            *idxs;
  double            *vols;
  double            *ijk;
  char              callSort;
  mwSize            toVec[2]={1,1};
  char              VERBOSE = 0;
  char              STR[1024];
  
  
  if( nlhs > 1 ){
    mxErrMsgTxt("too much outputs");
  }


  if( mxIsChar( prhs[nrhs-1] ) ){
    mxGetString( prhs[nrhs-1], STR, 100 );
    if( ! myStrcmpi(STR,"verbose") ){ 
      VERBOSE = 1;
    } else {
      mxErrMsgTxt("only 'verbose' option allowed.");
    }
    nrhs = nrhs-1;
  }
  
  
  if( nrhs != 3 ){
    mxErrMsgTxt("sintax error. max_min_multiples_erodes( V , F , volumes )");
  }

  if( mxGetClassID( prhs[1] ) != mxFUNCTION_CLASS ){
    mxErrMsgTxt("F have to be a function_handle.");
  }

  if( myNDims( prhs[0] ) > 3  ){
    mxErrMsgTxt("bigger than 3d arrays is not allowed.");
  }
  
  NVOLS   = myNumel( prhs[2] );
  NVOLS_1 = NVOLS - 1;
  volumes = myGetPr( prhs[2] );
  
  
  I     = mySize( prhs[0] , 0 );
  J     = mySize( prhs[0] , 1 );
  K     = mySize( prhs[0] , 2 );
  IJ    = I*J;
  IJK   = IJ*K;
  

  VV = (int     *) mxMalloc( IJK*sizeof( int     ) );
  TS     = (triplet *) mxMalloc( IJK*sizeof( triplet ) );

  V = myGetPr( prhs[0] );

  v  = 0; 
  nV = 0;
  for( kk = 0 ; kk < K ; kk++ ){ for( jj = 0 ; jj < J ; jj++ ){ for( ii = 0 ; ii < I ; ii++ ){
    x = V[ v ];
    if( x == x ){
      VV[ nV ] = v;
      TS[ v ].isnan = 0;
      TS[ v ].i     = ii;
      TS[ v ].j     = jj;
      TS[ v ].k     = kk;
      nV++;
    } else {
      TS[ v ].isnan = 1;
    }
    v++;
  }}}


  INPUT[0] = prhs[1];
  INPUT[1] = mxCreateNumericMatrix( 1 , 3 , mxDOUBLE_CLASS , mxREAL );
  ijk = (double *) mxGetData( INPUT[1] );
  
  
  ijk[0] = TS[ VV[ nV/2] ].i + 1;
  ijk[1] = TS[ VV[ nV/2] ].j + 1;
  ijk[2] = TS[ VV[ nV/2] ].k + 1;
  


  OUTPUT[2] = mexCallMATLABWithTrap( 2 , OUTPUT , 2 , INPUT , "feval" );
  
  if( OUTPUT[2] == NULL ){
    
    callSort = 0;
    if( mxGetClassID( OUTPUT[0] ) != mxDOUBLE_CLASS ){
      if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1] );  INPUT[1]=NULL;  }
      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
      mxErrMsgTxt("F debe retornar un double en el primer output.");
    }
    if( mxGetClassID( OUTPUT[1] ) != mxDOUBLE_CLASS ){
      if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1] );  INPUT[1]=NULL;  }
      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
      mxErrMsgTxt("F debe retornar un double en el segundo output.");
    }
    
  } else {

    callSort = 1;
    if( VERBOSE ){
      mexPrintf("sort has to be called\n");
    }
    
    mxDestroyArray( OUTPUT[2] ); OUTPUT[2] = NULL;

    result = mexCallMATLAB( 1 , OUTPUT , 2 , INPUT , "feval" );
    if( result ){ mxErrMsgTxt("error computing la funcion."); }

    if( mxGetClassID( OUTPUT[0] ) != mxDOUBLE_CLASS ){
      if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1] );  INPUT[1]=NULL;  }
      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
      mxErrMsgTxt("F debe retornar un double en el primer output.");
    }

  }

  DI   = mySize( OUTPUT[0] , 0 );
  DJ   = mySize( OUTPUT[0] , 1 );
  DK   = mySize( OUTPUT[0] , 2 );
  
  DTS  = (triplet *) mxMalloc( 2*DI*DJ*DK*sizeof( triplet ) );
  

  plhs[0] = mxCreateNumericMatrix( NVOLS , 1 , mxREAL_CLASS , mxREAL );
  MAXs    = (real *) mxGetData( plhs[0] );
  for( n = 0 ; n < NVOLS ; n++ ){
    MAXs[n] = -10000;
  }

  
  LAST_MAX = MAXs[ NVOLS_1 ];
  for( v_init = 0 ; v_init < EVERY ; v_init++ ){
    if( utIsInterruptPending() ){
      if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1] );  INPUT[1]=NULL;  }
      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
      mexPrintf("USER INTERRUP!!!\n");
      mxErrMsgTxt("USER INTERRUP!!!");
    }
    if( VERBOSE ){
      mexPrintf("v_init:  %d  (%g)  of  %d\n", v_init , LAST_MAX , EVERY );
    }

    for( v = v_init ; v < nV ; v += EVERY ){
      vv = VV[ v ];
      
      thisMINx =   V[ vv ];
      thisMINy =  -thisMINx;
      if( ( thisMINx < LAST_MAX )  && ( thisMINy < LAST_MAX ) ){
        continue;
      }

      T = TS[ vv ];
      
      ijk[0] = T.i + 1;
      ijk[1] = T.j + 1;
      ijk[2] = T.k + 1;
      

      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }

      if( !callSort ){
        tic( CallMatlab );
        result = mexCallMATLAB( 2 , OUTPUT , 2 , INPUT , "feval" ); fevals++;
        tac( CallMatlab );
      } else {
        tic( CallMatlab );
        result = mexCallMATLAB( 1 , OUTPUT , 2 , INPUT , "feval" ); fevals++;
        tac( CallMatlab );
      }
      
      DI   = mySize( OUTPUT[0] , 0 );
      DJ   = mySize( OUTPUT[0] , 1 );
      DK   = mySize( OUTPUT[0] , 2 );

      DIJK    = DI*DJ*DK;

      if( volumes[ NVOLS_1 ] > DIJK ){
      if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1] );  INPUT[1]=NULL;  }
      if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
      if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
      if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
        mxErrMsgTxt("el maximo volumen debe ser menor que numel(DIST)");
      }

      DIJK_1  = DIJK - 1;

      DIST  = (double  *) mxGetData( OUTPUT[0] );
      
      DTS  = (triplet *) mxRealloc( DTS , DIJK*sizeof( triplet ) );
      s = 0;
      for( kk = 0 ; kk < DK ; kk++ ){ for( jj = 0 ; jj < DJ ; jj++ ){ for( ii = 0 ; ii < DI ; ii++ ){
        DTS[ s ].i = ii;
        DTS[ s ].j = jj;
        DTS[ s ].k = kk;
        s++;
      }}}


      if( !callSort ){
        order = (double  *) mxGetData( OUTPUT[1] );
      } else {
        toVec[0] = mxGetNumberOfElements( OUTPUT[0] );
        mxSetDimensions( OUTPUT[0] ,  toVec  , 2 );
        
        tic( callSort );
        result = mexCallMATLAB( 2 , OUTPUT+1 , 1 , OUTPUT , "sort" );
        tac( callSort );
      
        order = (double  *) mxGetData( OUTPUT[2] );
      }
      
      CDI = DTS[ (int) ( order[0] - 1 ) ].i;
      CDJ = DTS[ (int) ( order[0] - 1 ) ].j;
      CDK = DTS[ (int) ( order[0] - 1 ) ].k;
      
      
      skip   = 0;

      s = 0;
      for( n = 0 ; n < NVOLS ; n++ ){
        s_end = (int) ( volumes[n] - 1 );
        last_distance = DIST[ (int) order[ s_end ] - 1 ];
        
        while( s_end < DIJK_1 && DIST[ (int) ( order[ s_end + 1 ] - 1 ) ] == last_distance ){
          s_end++;
        }
        s_end++;
        
        for( ; s < s_end ; s++ ){
          vv = (int) ( order[ s ] - 1 );
          
          ii = T.i + DTS[ vv ].i - CDI;  if( ii < 0 || ii > I ){ skip = 1; break; }
          jj = T.j + DTS[ vv ].j - CDJ;  if( jj < 0 || jj > J ){ skip = 1; break; }
          kk = T.k + DTS[ vv ].k - CDK;  if( kk < 0 || kk > K ){ skip = 1; break; }
            
          vv = ii + jj*I + kk*IJ;
          if( TS[ vv ].isnan ){  skip = 1; break; }
          x =  V[ vv ];  if( x < thisMINx ){ thisMINx = x; }
          y = -x;        if( y < thisMINy ){ thisMINy = y; }
          if( ( thisMINx < LAST_MAX )  && ( thisMINy < LAST_MAX ) ){
            skip = 1; break; 
          }
        }
        if( skip ){  break; }
        if( thisMINx > MAXs[n] ){ MAXs[n] = thisMINx; }
        if( thisMINy > MAXs[n] ){ MAXs[n] = thisMINy; }
      }
      LAST_MAX = MAXs[ NVOLS_1 ];

    }
    
  }
  if( INPUT[1]  != NULL ){ mxDestroyArray( INPUT[1]  ); INPUT[1] =NULL; }
  if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; }
  if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; }
  if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; }
  
  if( VERBOSE ){
    mexPrintf( "\nfevals: %d  en  tiempo:   CallMatlab: %20.30g    sorting: %20.30g\n" , fevals , toc( CallMatlab ) , toc( callSort ) );
  }
  
  
  if(  VV != NULL ){  mxFree(  VV ); }
  if(  TS != NULL ){  mxFree(  TS ); }
  if( DTS != NULL ){  mxFree( DTS ); }

  myFreeALLOCATES();

}
Esempio n. 5
0
int
PreconditionerAS<space_type,coef_space_type>::applyInverse ( const vector_type& X /*R*/, vector_type& Y /*W*/) const
{
    /*
     * We solve Here P_v w = r
     * With P_v^-1 = diag(P_m)^-1 (=A)
     *              + P (\bar L + g \bar Q) P^t (=B)
     *              + C (L^-1) C^T (=C)
     */

    U = X;
    U.close();

    // solve equ (12)
    if ( this->type() == AS )
    {
        tic();

        *M_r = U;
        M_r->close();

        // step A : diag(Pm)^-1*r
        A->pointwiseDivide(*M_r,*M_diagPm);
        A->close();
        // s = P^t r
        M_Pt->multVector(M_r,M_s);

        // Impose boundary conditions on M_s
#if 1
        M_qh3_elt = *M_s;
        M_qh3_elt.close();
#if FEELPP_DIM == 3
        M_qh3_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) );
#else
        M_qh3_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) );
#endif
        *M_s = M_qh3_elt;
        M_s->close();
#endif
#if 1
        // Subvectors for M_s (per component) need to be updated
        M_s1 = M_s->createSubVector(M_Qh3_indices[0], true);
        M_s2 = M_s->createSubVector(M_Qh3_indices[1], true);
#if FEELPP_DIM == 3
        M_s3 = M_s->createSubVector(M_Qh3_indices[2], true);
#endif
#else 
        // s = [ s1, s2, s3 ]
        M_s->updateSubVector(M_s1, M_Qh3_indices[0]);
        M_s->updateSubVector(M_s2, M_Qh3_indices[1]);
#if FEELPP_DIM == 3
        M_s->updateSubVector(M_s3, M_Qh3_indices[2]);
#endif
#endif
        M_s->close();
        /*
         * hat(L) + g Q is a (Qh,Qh) matrix
         * [[ hat(L) + g Q, 0  ,     0   ],    [ y1 ]    [ s1 ]
         * [   0,   hat(L) + g Q,    0   ], *  [ y2 ] =  [ s2 ]
         * [   0,     0   , hat(L) + g Q ]]    [ y3 ]    [ s3 ]
         */
        M_lgqOp->applyInverse(M_s1,M_y1);
        M_lgqOp->applyInverse(M_s2,M_y2);
#if FEELPP_DIM == 3
        M_lgqOp->applyInverse(M_s3,M_y3);
#endif

        // y = [ y1, y2, y3 ]
        M_y->updateSubVector(M_y1, M_Qh3_indices[0]);
        M_y->updateSubVector(M_y2, M_Qh3_indices[1]);
#if FEELPP_DIM == 3
        M_y->updateSubVector(M_y3, M_Qh3_indices[2]);
#endif
        M_y->close();
        // step B : P*y
        M_P->multVector(M_y,B);

        // Impose boundary conditions on B = Py
#if 1
        M_vh_elt = *B;
        M_vh_elt.close();
#if FEELPP_DIM == 3
        M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) );
#else
        M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) );
#endif
        *B = M_vh_elt;
        B->close();
#endif
        // t = C^t r
        M_Ct->multVector(M_r,M_t);

        // Impose boundary conditions on M_t
#if 1
        M_qh_elt = *M_t;
        M_qh_elt.close();
        M_qh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=cst(0.) );
        *M_t = M_qh_elt;
        M_t->close();
#endif

        // 14.b : hat(L) z = t
        M_lOp->applyInverse(M_t,M_z);
        M_z->close();

        // step C : M_C z
        M_C->multVector(M_z,C);
        C->scale(1./M_g);

        // Impose boundary conditions on C = Cz
#if 1
        M_vh_elt = *C;
        M_vh_elt.close();
#if FEELPP_DIM == 3
        M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) );
#else
        M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) );
#endif
        *C = M_vh_elt;
        C->close();
#endif
        //if(M_g != 1.0)
        A->add(*C);
        A->add(*B);

        C->close();
        B->close();
        A->close();

        toc("assemble preconditioner AS",FLAGS_v>0);
        *M_uout = *A; // 15 : w = A + B + C
    }
    else if( this->type() == SIMPLE )
    {
        SimpleOp->applyInverse(X, Y);
        *M_uout = Y;
    }
    else
    {
        Y=U;
        *M_uout = Y;
    }
    M_uout->close();

    tic();
    Y=*M_uout;
    Y.close();
    toc("PreconditionerAS::applyInverse", FLAGS_v>0 );

    return 0;
}
Esempio n. 6
0
int hpquads(startree_t* starkd,
			codefile_t* codes,
			quadfile_t* quads,
			int Nside,
			double scale_min_arcmin,
			double scale_max_arcmin,
			int dimquads,
			int passes,
			int Nreuses,
			int Nloosen,
			int id,
			anbool scanoccupied,

			void* sort_data,
			int (*sort_func)(const void*, const void*),
			int sort_size,
			
			char** args, int argc) {
	hpquads_t myhpquads;
	hpquads_t* me = &myhpquads;

	int i;
	int pass;
	anbool circle = TRUE;
	double radius2;
	il* hptotry;
	int Nhptotry = 0;
	int nquads;
	double hprad;
	double quadscale;

	int skhp, sknside;

	qfits_header* qhdr;
	qfits_header* chdr;

	int N;
	int dimcodes;
	int quadsize;
	int NHP;

	memset(me, 0, sizeof(hpquads_t));

	if (Nside > HP_MAX_INT_NSIDE) {
		ERROR("Error: maximum healpix Nside = %i", HP_MAX_INT_NSIDE);
		return -1;
	}
	if (Nreuses > 255) {
		ERROR("Error, reuse (-r) must be less than 256");
		return -1;
	}

	me->Nside = Nside;
	me->dimquads = dimquads;
	NHP = 12 * Nside * Nside;
	dimcodes = dimquad2dimcode(dimquads);
	quadsize = sizeof(unsigned int) * dimquads;

	logmsg("Nside=%i.  Nside^2=%i.  Number of healpixes=%i.  Healpix side length ~ %g arcmin.\n",
		   me->Nside, me->Nside*me->Nside, NHP, healpix_side_length_arcmin(me->Nside));

	me->sort_data = sort_data;
	me->sort_func = sort_func;
	me->sort_size = sort_size;

	tic();
	me->starkd = starkd;
	N = startree_N(me->starkd);
	logmsg("Star tree contains %i objects.\n", N);

	// get the "HEALPIX" header from the skdt...
	skhp = qfits_header_getint(startree_header(me->starkd), "HEALPIX", -1);
	if (skhp == -1) {
		if (!qfits_header_getboolean(startree_header(me->starkd), "ALLSKY", FALSE)) {
			logmsg("Warning: skdt does not contain \"HEALPIX\" header.  Code and quad files will not contain this header either.\n");
		}
	}
    // likewise "HPNSIDE"
	sknside = qfits_header_getint(startree_header(me->starkd), "HPNSIDE", 1);

    if (sknside && Nside % sknside) {
        logerr("Error: Nside (-n) must be a multiple of the star kdtree healpixelisation: %i\n", sknside);
		return -1;
    }

	if (!scanoccupied && (N*(skhp == -1 ? 1 : sknside*sknside*12) < NHP)) {
		logmsg("\n\n");
		logmsg("NOTE, your star kdtree is sparse (has only a fraction of the stars expected)\n");
		logmsg("  so you probably will get much faster results by setting the \"-E\" command-line\n");
		logmsg("  flag.\n");
		logmsg("\n\n");
	}

	quads->dimquads = me->dimquads;
	codes->dimcodes = dimcodes;
	quads->healpix = skhp;
	codes->healpix = skhp;
	quads->hpnside = sknside;
	codes->hpnside = sknside;
	if (id) {
		quads->indexid = id;
		codes->indexid = id;
	}

	qhdr = quadfile_get_header(quads);
	chdr = codefile_get_header(codes);

	add_headers(qhdr, args, argc, startree_header(me->starkd), circle, passes);
	add_headers(chdr, args, argc, startree_header(me->starkd), circle, passes);

    if (quadfile_write_header(quads)) {
        ERROR("Couldn't write headers to quad file");
		return -1;
    }
    if (codefile_write_header(codes)) {
        ERROR("Couldn't write headers to code file");
		return -1;
    }

    quads->numstars = codes->numstars = N;
	me->quad_dist2_upper = arcmin2distsq(scale_max_arcmin);
	me->quad_dist2_lower = arcmin2distsq(scale_min_arcmin);
    codes->index_scale_upper = quads->index_scale_upper = distsq2rad(me->quad_dist2_upper);
    codes->index_scale_lower = quads->index_scale_lower = distsq2rad(me->quad_dist2_lower);
	
	me->nuses = calloc(N, sizeof(unsigned char));

	// hprad = sqrt(2) * (healpix side length / 2.)
	hprad = arcmin2dist(healpix_side_length_arcmin(Nside)) * M_SQRT1_2;
	quadscale = 0.5 * sqrt(me->quad_dist2_upper);
	// 1.01 for a bit of safety.  we'll look at a few extra stars.
	radius2 = square(1.01 * (hprad + quadscale));
	me->radius2 = radius2;

	logmsg("Healpix radius %g arcsec, quad scale %g arcsec, total %g arcsec\n",
		   distsq2arcsec(hprad*hprad),
		   distsq2arcsec(quadscale*quadscale),
		   distsq2arcsec(radius2));

	hptotry = il_new(1024);

	if (scanoccupied) {
		logmsg("Scanning %i input stars...\n", N);
		for (i=0; i<N; i++) {
			double xyz[3];
			int j;
			if (startree_get(me->starkd, i, xyz)) {
				ERROR("Failed to get star %i", i);
				return -1;
			}
			j = xyzarrtohealpix(xyz, Nside);
			il_insert_unique_ascending(hptotry, j);
			if (log_get_level() > LOG_VERB) {
				double ra,dec;
				if (startree_get_radec(me->starkd, i, &ra, &dec)) {
					ERROR("Failed to get RA,Dec for star %i\n", i);
					return -1;
				}
				logdebug("star %i: RA,Dec %g,%g; xyz %g,%g,%g; hp %i\n",
						 i, ra, dec, xyz[0], xyz[1], xyz[2], j);
			}
		}
		logmsg("Will check %zu healpixes.\n", il_size(hptotry));
		if (log_get_level() > LOG_VERB) {
			logdebug("Checking healpixes: [ ");
			for (i=0; i<il_size(hptotry); i++)
				logdebug("%i ", il_get(hptotry, i));
			logdebug("]\n");
		}

	} else {
		if (skhp == -1) {
			// Try all healpixes.
			il_free(hptotry);
			hptotry = NULL;
			Nhptotry = NHP;
		} else {
			// The star kdtree may itself be healpixed
			int starhp, starx, stary;
			// In that case, the healpixes we are interested in form a rectangle
			// within a big healpix.  These are the coords (in [0, Nside)) of
			// that rectangle.
			int x0, x1, y0, y1;
			int x, y;

			healpix_decompose_xy(skhp, &starhp, &starx, &stary, sknside);
			x0 =  starx    * (Nside / sknside);
			x1 = (starx+1) * (Nside / sknside);
			y0 =  stary    * (Nside / sknside);
			y1 = (stary+1) * (Nside / sknside);

			for (y=y0; y<y1; y++) {
				for (x=x0; x<x1; x++) {
					int j = healpix_compose_xy(starhp, x, y, Nside);
					il_append(hptotry, j);
				}
			}
			assert(il_size(hptotry) == (Nside/sknside) * (Nside/sknside));
		}
	}
	if (hptotry)
		Nhptotry = il_size(hptotry);

	me->quadlist = bl_new(65536, quadsize);

	if (Nloosen)
		me->retryhps = il_new(1024);

	for (pass=0; pass<passes; pass++) {
		char key[64];
		int nthispass;

		logmsg("Pass %i of %i.\n", pass+1, passes);
		logmsg("Trying %i healpixes.\n", Nhptotry);

		nthispass = build_quads(me, Nhptotry, hptotry, Nreuses);

		logmsg("Made %i quads (out of %i healpixes) this pass.\n", nthispass, Nhptotry);
		logmsg("Made %i quads so far.\n", (me->bigquadlist ? bt_size(me->bigquadlist) : 0) + (int)bl_size(me->quadlist));

		sprintf(key, "PASS%i", pass+1);
		fits_header_mod_int(chdr, key, nthispass, "quads created in this pass");
		fits_header_mod_int(qhdr, key, nthispass, "quads created in this pass");

		logmsg("Merging quads...\n");
		if (!me->bigquadlist)
			me->bigquadlist = bt_new(quadsize, 256);
		for (i=0; i<bl_size(me->quadlist); i++) {
			void* q = bl_access(me->quadlist, i);
			bt_insert2(me->bigquadlist, q, FALSE, compare_quads, &me->dimquads);
		}
		bl_remove_all(me->quadlist);
	}

	il_free(hptotry);
	hptotry = NULL;

	if (Nloosen) {
		int R;
		for (R=Nreuses+1; R<=Nloosen; R++) {
			il* trylist;
			int nthispass;

			logmsg("Loosening reuse maximum to %i...\n", R);
			logmsg("Trying %zu healpixes.\n", il_size(me->retryhps));
			if (!il_size(me->retryhps))
				break;

			trylist = me->retryhps;
			me->retryhps = il_new(1024);
			nthispass = build_quads(me, il_size(trylist), trylist, R);
			logmsg("Made %i quads (out of %zu healpixes) this pass.\n", nthispass, il_size(trylist));
			il_free(trylist);
			for (i=0; i<bl_size(me->quadlist); i++) {
				void* q = bl_access(me->quadlist, i);
				bt_insert2(me->bigquadlist, q, FALSE, compare_quads, &me->dimquads);
			}
			bl_remove_all(me->quadlist);
		}
	}
	if (me->retryhps)
		il_free(me->retryhps);

	kdtree_free_query(me->res);
	me->res = NULL;
	me->inds = NULL;
	me->stars = NULL;
	free(me->nuses);
	me->nuses = NULL;

	logmsg("Writing quads...\n");

	// add the quads from the big-quadlist
	nquads = bt_size(me->bigquadlist);
	for (i=0; i<nquads; i++) {
		unsigned int* q = bt_access(me->bigquadlist, i);
		quad_write(codes, quads, q, me->starkd, me->dimquads, dimcodes);
	}
	// add the quads that were made during the final round.
	for (i=0; i<bl_size(me->quadlist); i++) {
		unsigned int* q = bl_access(me->quadlist, i);
		quad_write(codes, quads, q, me->starkd, me->dimquads, dimcodes);
	}

	// fix output file headers.
	if (quadfile_fix_header(quads)) {
		ERROR("Failed to fix quadfile headers");
		return -1;
	}
	if (codefile_fix_header(codes)) {
		ERROR("Failed to fix codefile headers");
		return -1;
	}

	bl_free(me->quadlist);
	bt_free(me->bigquadlist);

	toc();
	logmsg("Done.\n");
	return 0;
}
Esempio n. 7
0
File: main.c Progetto: larsivsi/cbot
int main(int argc, char **argv)
{
	tic();

	char *conf_file = NULL;
	socket_fd = -1;
	for (int i=1; i<argc; i++) {
		if (!strcmp(argv[i], "-c")) {
			if (argc <= i) {
				print_usage();
			}
			conf_file = argv[++i];
		} else if (!strcmp(argv[i], "-fd")) {
			if (argc <= i) {
				print_usage();
			}
			socket_fd = atoi(argv[++i]);
		} else {
			printf(" >> unknown option: %s\n", argv[i]);
		}
	}

	if (!conf_file)
		conf_file = "cbot.conf";
	load_config(conf_file);

	// Set rand seed
	srand(time(NULL));

	// Set up cURL
	curl_global_init(CURL_GLOBAL_ALL);

	// Set up db connection for logging
	if (config->enabled_modules & MODULE_LOG) {
		log_init();
	}

	// Parse markov corpus
	if (config->enabled_modules & MODULE_MARKOV) {
		markov_init(config->markovcorpus);
	}

	irc_init();

	if (socket_fd == -1) {
		printf(" - Connecting to %s:%s with nick %s, joining channels...\n",
			config->host, config->port, config->nick);
		net_connect();
	} else { // In-place upgrade yo
		printf(" >> Already connected, upgraded in-place!\n");
		join_channels();
	}

	struct recv_data *irc = malloc(sizeof(struct recv_data));
	patterns = malloc(sizeof(*patterns));
	compile_patterns(patterns);

	// Select param
	fd_set socket_set;
	FD_ZERO(&socket_set);
	FD_SET(STDIN_FILENO, &socket_set);
	FD_SET(socket_fd, &socket_set);


	int recv_size;
	char buffer[BUFFER_SIZE];
	char input[BUFFER_SIZE];
	memset(buffer, 0, BUFFER_SIZE);
	size_t buffer_length = 0;
	while (1) {
		int ret = select(socket_fd+1, &socket_set, 0, 0, 0);
		if (ret == -1) {
			printf(" >> Disconnected, reconnecting...\n");
			close(socket_fd);
			net_connect();
		}
		if (FD_ISSET(STDIN_FILENO, &socket_set)) {
			if (fgets(input, BUFFER_SIZE, stdin) == NULL) {
				printf(" >> Error while reading from stdin!\n");
				continue;
			}

			if (strcmp(input, "quit\n") == 0) {
				printf(" >> Bye!\n");
				break;
			} else if (strcmp(input, "reload\n") == 0) {
				terminate();
				free(irc);
				free_patterns(patterns);
				free(patterns);

				// Set up arguments
				char * arguments[6];
				arguments[0] = argv[0];
				arguments[1] = "-c";
				arguments[2] = conf_file;
				arguments[3] = "-fd";
				char fdstring[snprintf(NULL, 0, "%d", socket_fd)];
				sprintf(fdstring, "%d", socket_fd);
				arguments[4] = fdstring;
				arguments[5] = NULL;

				printf(" >> Upgrading...\n");
				execvp(argv[0], arguments);

				printf(" !!! Execvp failing, giving up...\n");
				exit(-1);
			} else if (strncmp(input, "say ", 4) == 0) {
				int offsets[30];
				int offsetcount = pcre_exec(patterns->command_say, 0, input, strlen(input), 0, 0, offsets, 30);
				if (offsetcount > 0) {
					char channel[BUFFER_SIZE];
					char message[BUFFER_SIZE];
					pcre_copy_substring(input, offsets, offsetcount, 1, channel, BUFFER_SIZE);
					pcre_copy_substring(input, offsets, offsetcount, 2, message, BUFFER_SIZE);
					char sendbuf[strlen("PRIVMSG  : ") + strlen(channel) + strlen(message)];
					sprintf(sendbuf, "PRIVMSG %s :%s\n", channel, message);
					irc_send_str(sendbuf);
				}
			} else if (strncmp(input, "kick ", 5) == 0) {
				int offsets[30];
				int offsetcount = pcre_exec(patterns->command_kick, 0, input, strlen(input), 0, 0, offsets, 30);
				if (offsetcount > 0) {
					char channel[BUFFER_SIZE];
					char user[BUFFER_SIZE];
					pcre_copy_substring(input, offsets, offsetcount, 1, channel, BUFFER_SIZE);
					pcre_copy_substring(input, offsets, offsetcount, 2, user, BUFFER_SIZE);
					char sendbuf[strlen("KICK   :Gene police! You! Out of the pool, now!\n") + strlen(channel) + strlen(user)];
					sprintf(sendbuf, "KICK %s %s :Gene police! You! Out of the pool, now!\n", channel, user);
					irc_send_str(sendbuf);
				}
			} else {
				printf(" >> Unrecognized command. Try 'quit'\n");
			}
			FD_SET(socket_fd, &socket_set);
		} else {
			if (buffer_length >= BUFFER_SIZE - 1) {
				printf(" >> what the f**k, IRCd, a line longer than 4k? dropping some buffer\n");
				memset(buffer, 0, BUFFER_SIZE);
				buffer_length = 0;
				continue;
			}

			recv_size = recv(socket_fd, buffer + buffer_length, BUFFER_SIZE - buffer_length - 1, 0);
			buffer_length += recv_size;
			buffer[buffer_length] = '\0';
			if (recv_size == 0) {
				printf(" >> recv_size is 0, assuming closed remote socket, reconnecting\n");
				close(socket_fd);
				printf("closed\n");
				net_connect();
				printf("reconnected\n");
			}
			char *newlinepos = 0;
			char *bufbegin = buffer;
			while ((newlinepos = strchr(bufbegin, '\n'))) {
				*newlinepos = 0;
				printf(" ~ %s\n", bufbegin);
				// Only handle privmsg
				if (irc_parse_input(bufbegin, irc, patterns)) {
					irc_handle_input(irc, patterns);
				}
				bufbegin = newlinepos + 1;
			}
			size_t bytes_removed = bufbegin - buffer;
			memmove(buffer, bufbegin, buffer_length - bytes_removed);
			buffer_length -= bytes_removed;
			memset(buffer + buffer_length, 0, BUFFER_SIZE - buffer_length);

			FD_SET(STDIN_FILENO, &socket_set);
		}
	}
	printf(" >> Socket closed, quitting...\n");

	close(socket_fd);

	free(irc);
	free_patterns(patterns);
	free(patterns);

	terminate();

	return 0;
}
Esempio n. 8
0
void run_benchmark( void *vargs, cl_context& context, cl_command_queue& commands, cl_program& program, cl_kernel& kernel ) {
  struct bench_args_t *args = (struct bench_args_t *)vargs;
  int num_jobs = 1 << 16;

  char* seqA_batch = (char *)malloc(sizeof(args->seqA) * num_jobs);
  char* seqB_batch = (char *)malloc(sizeof(args->seqB) * num_jobs);
  char* alignedA_batch = (char *)malloc(sizeof(args->alignedA) * num_jobs);
  char* alignedB_batch = (char *)malloc(sizeof(args->alignedB) * num_jobs);
  int i;
  for (i=0; i<num_jobs; i++) {
    memcpy(seqA_batch + i*sizeof(args->seqA), args->seqA, sizeof(args->seqA));
    memcpy(seqB_batch + i*sizeof(args->seqB), args->seqB, sizeof(args->seqB));
    memcpy(alignedA_batch + i*sizeof(args->alignedA), args->alignedA, sizeof(args->alignedA));
    memcpy(alignedB_batch + i*sizeof(args->alignedB), args->alignedB, sizeof(args->alignedB));
  }

  // 0th: initialize the timer at the beginning of the program
  timespec timer = tic();

  // Create device buffers
  //
  cl_mem seqA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqA)*num_jobs, NULL, NULL);
  cl_mem seqB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqB)*num_jobs, NULL, NULL);
  cl_mem alignedA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedA)*num_jobs, NULL, NULL);
  cl_mem alignedB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedB)*num_jobs, NULL, NULL);
  if (!seqA_buffer || !seqB_buffer || !alignedA_buffer || !alignedB_buffer)
  {
    printf("Error: Failed to allocate device memory!\n");
    printf("Test failed\n");
    exit(1);
  }    

  // 1st: time of buffer allocation
  toc(&timer, "buffer allocation");

  // Write our data set into device buffers  
  //
  int err;
  err = clEnqueueWriteBuffer(commands, seqA_buffer, CL_TRUE, 0, sizeof(args->seqA)*num_jobs, seqA_batch, 0, NULL, NULL);
  err |= clEnqueueWriteBuffer(commands, seqB_buffer, CL_TRUE, 0, sizeof(args->seqB)*num_jobs, seqB_batch, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
      printf("Error: Failed to write to device memory!\n");
      printf("Test failed\n");
      exit(1);
  }

  // 2nd: time of pageable-pinned memory copy
  toc(&timer, "memory copy");
    
  // Set the arguments to our compute kernel
  //
  err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &seqA_buffer);
  err  |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &seqB_buffer);
  err  |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &alignedA_buffer);
  err  |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &alignedB_buffer);
  err  |= clSetKernelArg(kernel, 4, sizeof(int), &num_jobs);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to set kernel arguments! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 3rd: time of setting arguments
  toc(&timer, "set arguments");

  // Execute the kernel over the entire range of our 1d input data set
  // using the maximum number of work group items for this device
  //

#ifdef C_KERNEL
  err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
#else
  printf("Error: OpenCL kernel is not currently supported!\n");
  exit(1);
#endif
  if (err)
  {
    printf("Error: Failed to execute kernel! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 4th: time of kernel execution
  clFinish(commands);
  toc(&timer, "kernel execution");

  // Read back the results from the device to verify the output
  //
  err = clEnqueueReadBuffer( commands, alignedA_buffer, CL_TRUE, 0, sizeof(args->alignedA)*num_jobs, alignedA_batch, 0, NULL, NULL );  
  err |= clEnqueueReadBuffer( commands, alignedB_buffer, CL_TRUE, 0, sizeof(args->alignedB)*num_jobs, alignedB_batch, 0, NULL, NULL );  
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to read output array! %d\n", err);
    printf("Test failed\n");
    exit(1);
  }

  // 5th: time of data retrieving (PCIe + memcpy)
  toc(&timer, "data retrieving");

  // memcpy(args->alignedA, alignedA_batch, sizeof(args->alignedA));
  // memcpy(args->alignedB, alignedB_batch, sizeof(args->alignedB));
  for (i=0; i<sizeof(args->alignedA); i++) {
    args->alignedA[i] = 'a';
  }
  for (i=0; i<sizeof(args->alignedB); i++) {
    args->alignedB[i] = 'b';
  }
  free(seqA_batch);
  free(seqB_batch);
  free(alignedA_batch);
  free(alignedB_batch);
}
Esempio n. 9
0
int main (int argc, char** argv)
{
	int i;
	int iterations = 100;
	// prepare grids
	// declare_grids -->
	float *  u_0_0_out;
	float *  u_0_0;
	float *  ux_1_0;
	float *  uy_2_0;
	float *  uz_3_0;
	float *  u_0_0_out_cpu;
	float *  u_0_0_cpu;
	float *  ux_1_0_cpu;
	float *  uy_2_0_cpu;
	float *  uz_3_0_cpu;
	if ((argc<4))
	{
		printf("Wrong number of parameters. Syntax:\n%s <x_max> <y_max> <z_max> <# of iterations>\n", argv[0]);
		exit(-1);
	}
	int x_max = atoi(argv[1]);
	int y_max = atoi(argv[2]);
	int z_max = atoi(argv[3]);
	if(argc==5)
	  iterations = atoi(argv[4]);
	// <--
	
	// allocate_grids -->
	u_0_0=((float * )malloc((((x_max*y_max)*z_max)*sizeof (float))));
	ux_1_0=((float * )malloc(((((x_max+2)*y_max)*z_max)*sizeof (float))));
	uy_2_0=((float * )malloc((((x_max*(y_max+2))*z_max)*sizeof (float))));
	uz_3_0=((float * )malloc((((x_max*y_max)*(z_max+2))*sizeof (float))));
	u_0_0_cpu=((float * )malloc((((x_max*y_max)*z_max)*sizeof (float))));
	ux_1_0_cpu=((float * )malloc(((((x_max+2)*y_max)*z_max)*sizeof (float))));
	uy_2_0_cpu=((float * )malloc((((x_max*(y_max+2))*z_max)*sizeof (float))));
	uz_3_0_cpu=((float * )malloc((((x_max*y_max)*(z_max+2))*sizeof (float))));
	// <--
	
	
	// initialize
	// initialize_grids -->
	initialize(u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.1, 0.2, 0.30000000000000004, x_max, y_max, z_max);
	initialize(u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.1, 0.2, 0.30000000000000004, x_max, y_max, z_max);
	// <--
	
	long nFlopsPerStencil = 8;
	long nGridPointsCount = iterations * ((x_max*y_max)*z_max);
	long nBytesTransferred = iterations * (((((((x_max+2)*y_max)*z_max)*sizeof (float))+(((x_max*(y_max+2))*z_max)*sizeof (float)))+(((x_max*y_max)*(z_max+2))*sizeof (float)))+(((x_max*y_max)*z_max)*sizeof (float)));
	

	/* *************************** PGI GPU-acc benchmark ********************* */

	
	// warm up
	
	{
		// compute_stencil -->
	  divergence(( & u_0_0_out), u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.4, 0.5, 0.6, x_max, y_max, z_max,iterations);
		// <--
	}
	
	// run the benchmark
	tic ();
	{
		// compute_stencil -->
	  divergence(( & u_0_0_out), u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.7, 0.7999999999999999, 0.8999999999999999, x_max, y_max, z_max,iterations);
		// <--	
	}
	toc (nFlopsPerStencil, nGridPointsCount, nBytesTransferred);

	
	/* *************************** ******************** ********************* */	

	/* *************************** Naive CPU Comparison ********************* */
	
	// warm up cpu comparison

	{
		// compute_stencil -->
	  divergence(( & u_0_0_out_cpu), u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.4, 0.5, 0.6, x_max, y_max, z_max,iterations);
		// <--
	}
	
	// run the benchmark
	tic ();
	{
		// compute_stencil -->
	  divergence_cpu(( & u_0_0_out_cpu), u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.7, 0.7999999999999999, 0.8999999999999999, x_max, y_max, z_max,iterations);
		// <--	
	}
	toc (nFlopsPerStencil, nGridPointsCount, nBytesTransferred);

	// checking "correctness" (assuming cpu version is correct)
	int error_count=0;
	int halo = 0;
	int x,y,z;
	for(y=0;y<x_max;y++) {
	  for(x=0;x<x_max;x++) {
	    for(z=0;z<y_max;z++) {
	      i = x + (x_max+halo)*y + (x_max+halo)*(y_max+halo)*z;
	      if(fabs(u_0_0_out[i] - u_0_0_out_cpu[i])>0.001) {
		error_count++;
		printf("%dth error encountered at u[%d]: |%f-%f|=%5.16f\n",error_count,i,u_0_0_out[i],u_0_0_out_cpu[i],fabs(u_0_0_out[i] - u_0_0_out_cpu[i]));
		if(error_count>30) {
		  printf("too many errors\n"); printf("print some solutions\n");
		  for(x=0;x<100;x++) {
		    printf("u_pgi[%d]=%2.2f ?? u_cpu[%d]=%2.2f\n",x,u_0_0_out[x],x,u_0_0_out_cpu[x]);
		  }
		  exit(1);
		}
	      }
	    }
	  }
	}
	if(error_count==0) {
	  printf("Error Check Successful. No errors encountered.\n");	  
	}
			  
	
	
	// free memory
	// deallocate_grids -->
	free(u_0_0);
	free(ux_1_0);
	free(uy_2_0);
	free(uz_3_0);
	// <--
	
	
	return EXIT_SUCCESS;
}
Esempio n. 10
0
int main(int argc, char **argv)
{
    struct timespec timer_1, timer_2;

    hsa_status_t err;

    err = hsa_init();
    check(Initializing the hsa runtime, err);

    /* 
     * Iterate over the agents and pick the gpu agent using 
     * the get_gpu_agent callback.
     */
    hsa_agent_t agent;
    err = hsa_iterate_agents(get_gpu_agent, &agent);
    if(err == HSA_STATUS_INFO_BREAK) { err = HSA_STATUS_SUCCESS; }
    check(Getting a gpu agent, err);

    /*
     * Query the name of the agent.
     */
    char name[64] = { 0 };
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
    check(Querying the agent name, err);
    printf("The agent name is %s.\n", name);

    /*
     * Query the maximum size of the queue.
     */
    uint32_t queue_size = 0;
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
    check(Querying the agent maximum queue size, err);
    printf("The maximum queue size is %u.\n", (unsigned int) queue_size);

    /*
     * Create a queue using the maximum size.
     */
    hsa_queue_t* queue; 
    err = hsa_queue_create(agent, queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
    check(Creating the queue, err);

    /*
     * Load the BRIG binary.
     */
    hsa_ext_module_t module;
    load_module_from_file("vector_copy.brig",&module);

    /*
     * Create hsa program.
     */
    hsa_ext_program_t program;
    memset(&program,0,sizeof(hsa_ext_program_t));
    err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &program);
    check(Create the program, err);

    /*
     * Add the BRIG module to hsa program.
     */
    err = hsa_ext_program_add_module(program, module);
    check(Adding the brig module to the program, err);

    /*
     * Determine the agents ISA.
     */
    hsa_isa_t isa;
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa);
    check(Query the agents isa, err);

    /*
     * Finalize the program and extract the code object.
     */
    hsa_ext_control_directives_t control_directives;
    memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t));
    hsa_code_object_t code_object;
    err = hsa_ext_program_finalize(program, isa, 0, control_directives, "-O0", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
    check(Finalizing the program, err);

    /*
     * Destroy the program, it is no longer needed.
     */
    err=hsa_ext_program_destroy(program);
    check(Destroying the program, err);

    /*
     * Create the empty executable.
     */
    hsa_executable_t executable;
    err = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable);
    check(Create the executable, err);

    /*
     * Load the code<F3> object.
     */
    err = hsa_executable_load_code_object(executable, agent, code_object, "");
    check(Loading the code object, err);

    /*
     * Freeze the executable; it can now be queried for symbols.
     */
    err = hsa_executable_freeze(executable, "");
    check(Freeze the executable, err);

   /*
    * Extract the symbol from the executable.
    */
    hsa_executable_symbol_t symbol;
    err = hsa_executable_get_symbol(executable, "", "&__OpenCL_vector_copy_kernel", agent, 0, &symbol);
    check(Extract the symbol from the executable, err);

    /*
     * Extract dispatch information from the symbol
     */
    uint64_t kernel_object;
    uint32_t kernarg_segment_size;
    uint32_t group_segment_size;
    uint32_t private_segment_size;

    err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object);
    check(Extracting the symbol from the executable, err);
    err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernarg_segment_size);
    check(Extracting the kernarg segment size from the executable, err);
    err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
    check(Extracting the group segment size from the executable, err);
    err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &private_segment_size);
    check(Extracting the private segment from the executable, err);
     /*
     * Create a signal to wait for the dispatch to finish.
     */ 
    hsa_signal_t signal;
    err=hsa_signal_create(1, 0, NULL, &signal);
    check(Creating a HSA signal, err);

    /*
     * Allocate and initialize the kernel arguments and data.
     */
    int* in=(int*)malloc(SIZE);
    int i;
    for(i=0;i<ELEMENT;i++)
        in[i]=(rand()%50000+1);
    err=hsa_memory_register(in, SIZE);
    check(Registering argument memory for input parameter, err);

    int* out=(int*)malloc(SIZE);
    memset(out, 0, SIZE);
    err=hsa_memory_register(out, SIZE);
    check(Registering argument memory for output parameter, err);
    
    int element = ELEMENT;
    int iter = ITER;
    
    

    struct __attribute__ ((aligned(16))) args_t {
       	uint64_t global_offset_0;
	uint64_t global_offset_1;
	uint64_t global_offset_2;
	uint64_t printf_buffer;
	uint64_t vqueue_pointer;
	uint64_t aqlwrap_pointer;
        void* in;
        void* out;
        int iter;
        int element;
    } args;
    memset(&args, 0, sizeof(args));
    args.in=in;
    args.out=out;
    args.element=element;
    args.iter=iter;

    /*
     * Find a memory region that supports kernel arguments.
     */
    hsa_region_t kernarg_region;
    kernarg_region.handle=(uint64_t)-1;
    hsa_agent_iterate_regions(agent, get_kernarg_memory_region, &kernarg_region);
    err = (kernarg_region.handle == (uint64_t)-1) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
    check(Finding a kernarg memory region, err);
    void* kernarg_address = NULL;

    /*
     * Allocate the kernel argument buffer from the correct region.
     */   
    err = hsa_memory_allocate(kernarg_region, kernarg_segment_size, &kernarg_address);
    check(Allocating kernel argument memory buffer, err);
    memcpy(kernarg_address, &args, sizeof(args));
 
    /*
     * Obtain the current queue write index.
     */
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);

    /*
     * Write the aql packet at the calculated queue index address.
     */
    const uint32_t queueMask = queue->size - 1;
    hsa_kernel_dispatch_packet_t* dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(queue->base_address))[index&queueMask]);

    dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
    dispatch_packet->setup  |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
    dispatch_packet->workgroup_size_x = (uint16_t)LOCAL_SIZE;
    dispatch_packet->workgroup_size_y = (uint16_t)1;
    dispatch_packet->workgroup_size_z = (uint16_t)1;
    dispatch_packet->grid_size_x = (uint32_t) (GLOBAL_SIZE);
    dispatch_packet->grid_size_y = 1;
    dispatch_packet->grid_size_z = 1;
    dispatch_packet->completion_signal = signal;
    dispatch_packet->kernel_object = kernel_object;
    dispatch_packet->kernarg_address = (void*) kernarg_address;
    dispatch_packet->private_segment_size = private_segment_size;
    dispatch_packet->group_segment_size = group_segment_size;
    __atomic_store_n((uint8_t*)(&dispatch_packet->header), (uint8_t)HSA_PACKET_TYPE_KERNEL_DISPATCH, __ATOMIC_RELEASE);

    /*
     * Increment the write index and ring the doorbell to dispatch the kernel.
     */
    tic(&timer_1);
    hsa_queue_store_write_index_relaxed(queue, index+1);
    hsa_signal_store_relaxed(queue->doorbell_signal, index);
    check(Dispatching the kernel, err);

    /*
     * Wait on the dispatch completion signal until the kernel is finished.
     */
    hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
    toc("Execution Period", &timer_1, &timer_2);
    /*
     * Validate the data in the output buffer.
     */
    int temp = 0;
    for(i=0;i<element;i++)
    {
        if(temp<in[i])
            temp = in[i];
    }

    if(temp==out[GLOBAL_SIZE])
        printf("PASS \n");
    else 
        printf("FAIL out=%d in=%d \n",out[GLOBAL_SIZE],temp);

	
    /*
     * Cleanup all allocated resources.
     */
    err=hsa_signal_destroy(signal);
    check(Destroying the signal, err);

    err=hsa_executable_destroy(executable);
    check(Destroying the executable, err);

    err=hsa_code_object_destroy(code_object);
    check(Destroying the code object, err);

    err=hsa_queue_destroy(queue);
    check(Destroying the queue, err);
    
    err=hsa_shut_down();
    check(Shutting down the runtime, err);

    free(in);
    free(out);
//printf("kernarg_segment_size:%d group_segment_size:%d private_segment_size:%d",kernarg_segment_size,group_segment_size,private_segment_size);
 
        return 0;
}
Esempio n. 11
0
int main(int argc, char **argv)
{
    u16 (*bayer)[WAMI_DEBAYER_IMG_NUM_COLS] = NULL;
    rgb_pixel (*debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL;
    char *input_directory = NULL;
#ifdef ENABLE_CORRECTNESS_CHECKING
    rgb_pixel (*gold_debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL;
#endif

    const size_t num_bayer_pixels = WAMI_DEBAYER_IMG_NUM_ROWS *
        WAMI_DEBAYER_IMG_NUM_COLS;
    const size_t num_debayer_pixels = (WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD) *
        (WAMI_DEBAYER_IMG_NUM_COLS-2*PAD);

    if (argc != 2)
    {
        fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    input_directory = argv[1];

    bayer = XMALLOC(sizeof(u16) * num_bayer_pixels);
    debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels);
#ifdef ENABLE_CORRECTNESS_CHECKING
    gold_debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels);
#endif

    read_image_file(
        (char *) bayer,
        input_filename,
        input_directory,
        sizeof(u16) * num_bayer_pixels);

    memset(debayer, 0, sizeof(u16) * num_debayer_pixels);

    printf("WAMI kernel 1 parameters:\n\n");
    printf("Input image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS);
    printf("Input image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS);
    printf("Output image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS-2*PAD);
    printf("Output image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD);

    printf("\nStarting WAMI kernel 1 (debayer).\n");
    tic();
    accept_roi_begin();
    wami_debayer(
        debayer,
        bayer);
    accept_roi_end();
    PRINT_STAT_DOUBLE("CPU time using func toc - ", toc());

#ifdef ENABLE_CORRECTNESS_CHECKING
    read_image_file(
        (char *) gold_debayer,
        golden_output_filename,
        input_directory,    
        sizeof(rgb_pixel) * num_debayer_pixels);

    /*
     * An exact match is expected for the debayer kernel, so we check
     * each pixel individually and report either the first failure or
     * a success message.
     */
    {
        /*
        // original error metric
        int r, c, success = 1;
        for (r = 0; success && r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r)
        {
            for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c)
            {
	        if (ENDORSE(debayer[r][c].r != gold_debayer[r][c].r))
                {
                    printf("Validation error: red pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].r, gold_debayer[r][c].r);
                    success = 0;
                    break;
                }

                if (ENDORSE(debayer[r][c].g != gold_debayer[r][c].g))
                {
                    printf("Validation error: green pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].g, gold_debayer[r][c].g);
                    success = 0;
                    break;
                }

                if (ENDORSE(debayer[r][c].b != gold_debayer[r][c].b))
                {
                    printf("Validation error: blue pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].b, gold_debayer[r][c].b);
                    success = 0;
                    break;
                }
            }
        }
        if (success)
        {
            printf("\nValidation checks passed -- the test output matches the golden output.\n\n");
        }
        */

        // new error metric
        int r, c;
	double err;
        for (r = 0; r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r)
        {
            for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c)
            {
	        double pixel_error = 0.0;
		pixel_error += ENDORSE(((double) abs(debayer[r][c].r - gold_debayer[r][c].r)) / ((double) 65535));
		pixel_error += ENDORSE(((double) abs(debayer[r][c].g - gold_debayer[r][c].g)) / ((double) 65535));
		pixel_error += ENDORSE(((double) abs(debayer[r][c].b - gold_debayer[r][c].b)) / ((double) 65535));

		err += (pixel_error / ((double) 3)) 
		  / ((double) ((WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD) * (WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD)));
            }
        }

	FILE *fp = fopen("err.txt", "wb");
	assert(fp != NULL);
	fprintf(fp, "%.2f\n", err);
	fclose(fp);
    }
#endif

#ifdef WRITE_OUTPUT_TO_DISK
    printf("Writing output to %s/%s.\n", output_directory, output_filename);
    {
        const u16 output_channels = 3;
        write_image_file(
            (char *) debayer,
            output_filename,
            output_directory,
            WAMI_DEBAYER_IMG_NUM_COLS - 2*PAD,
            WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD,
            output_channels);
    }
#endif

    FREE_AND_NULL(bayer);
    FREE_AND_NULL(debayer);
#ifdef ENABLE_CORRECTNESS_CHECKING
    FREE_AND_NULL(gold_debayer);
#endif

    return 0;
}
PreconditionerBlockMS<space_type>::PreconditionerBlockMS(space_ptrtype Xh,             // (u)x(p)
                                                         ModelProperties model,        // model
                                                         std::string const& p,         // prefix
                                                         sparse_matrix_ptrtype AA, value_type relax )    // The matrix
    :
        M_backend(backend()),           // the backend associated to the PC
        M_Xh( Xh ),
        M_Vh( Xh->template functionSpace<0>() ), // Potential
        M_Qh( Xh->template functionSpace<1>() ), // Lagrange
        M_Vh_indices( M_Vh->nLocalDofWithGhost() ),
        M_Qh_indices( M_Qh->nLocalDofWithGhost() ),
        M_uin( M_backend->newVector( M_Vh )  ),
        M_uout( M_backend->newVector( M_Vh )  ),
        M_pin( M_backend->newVector( M_Qh )  ),
        M_pout( M_backend->newVector( M_Qh )  ),
        U( M_Xh, "U" ),
        M_mass(M_backend->newMatrix(M_Vh,M_Vh)),
        M_L(M_backend->newMatrix(M_Qh,M_Qh)),
        M_er( 1. ),
        M_model( model ),
        M_prefix( p ),
        M_prefix_11( p+".11" ),
        M_prefix_22( p+".22" ),
        u(M_Vh, "u"),
        ozz(M_Vh, "ozz"),
        zoz(M_Vh, "zoz"),
        zzo(M_Vh, "zzo"),
        M_ozz(M_backend->newVector( M_Vh )),
        M_zoz(M_backend->newVector( M_Vh )),
        M_zzo(M_backend->newVector( M_Vh )),
        X(M_Qh, "X"),
        Y(M_Qh, "Y"),
        Z(M_Qh, "Z"),
        M_X(M_backend->newVector( M_Qh )),
        M_Y(M_backend->newVector( M_Qh )),
        M_Z(M_backend->newVector( M_Qh )),
        phi(M_Qh, "phi"),
        M_relax(relax)
{
    tic();
    LOG(INFO) << "[PreconditionerBlockMS] setup starts";
    this->setMatrix( AA );
    this->setName(M_prefix);

    /* Indices are need to extract sub matrix */
    std::iota( M_Vh_indices.begin(), M_Vh_indices.end(), 0 );
    std::iota( M_Qh_indices.begin(), M_Qh_indices.end(), M_Vh->nLocalDofWithGhost() );

    M_11 = AA->createSubMatrix( M_Vh_indices, M_Vh_indices, true, true);

    /* Boundary conditions */
    BoundaryConditions M_bc = M_model.boundaryConditions();
    map_vector_field<FEELPP_DIM,1,2> m_dirichlet_u { M_bc.getVectorFields<FEELPP_DIM> ( "u", "Dirichlet" ) };
    map_scalar_field<2> m_dirichlet_p { M_bc.getScalarFields<2> ( "phi", "Dirichlet" ) };

    /* Compute the mass matrix (needed in first block, constant) */
    auto f2A = form2(_test=M_Vh, _trial=M_Vh, _matrix=M_mass);
    auto f1A = form1(_test=M_Vh);
    f2A = integrate(_range=elements(M_Vh->mesh()), _expr=inner(idt(u),id(u))); // M
    for(auto const & it : m_dirichlet_u )
    {
        LOG(INFO) << "Applying " << it.second << " on " << it.first << " for "<<M_prefix_11<<"\n";
        f2A += on(_range=markedfaces(M_Vh->mesh(),it.first), _expr=it.second,_rhs=f1A, _element=u, _type="elimination_symmetric");
    }
    
    /* Compute the L (= er * grad grad) matrix (the second block) */
    auto f2L = form2(_test=M_Qh,_trial=M_Qh, _matrix=M_L);
#if 0
    //If you want to manage the relative permittivity materials per material,
    //here is the entry to deal with.
    for(auto it : M_model.materials() )
    { 
        f2L += integrate(_range=markedelements(M_Qh->mesh(),marker(it)), _expr=M_er*inner(gradt(phi), grad(phi)));
    }
#else
    f2L += integrate(_range=elements(M_Qh->mesh()), _expr=M_er*inner(gradt(phi), grad(phi)));
#endif
    auto f1LQ = form1(_test=M_Qh);

    for(auto const & it : m_dirichlet_p)
    {
        LOG(INFO) << "Applying " << it.second << " on " << it.first << " for "<<M_prefix_22<<"\n";
        f2L += on(_range=markedfaces(M_Qh->mesh(),it.first),_element=phi, _expr=it.second, _rhs=f1LQ, _type="elimination_symmetric");
    }

    toc( "[PreconditionerBlockMS] setup done ", FLAGS_v > 0 );
}
Esempio n. 13
0
int main(int argc, char *argv[]) {
  if (argc < 4) {
    fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n");
    fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]);
    exit(0);
  }

  /* Timing */
  STATS_INIT("kernel", "pthread_porter_stemming");
  PRINT_STAT_STRING("abrv", "pthread_stemmer");

  NTHREADS = atoi(argv[1]);
  int WORDS = atoi(argv[2]);
  PRINT_STAT_INT("threads", NTHREADS);
  FILE *f = fopen(argv[3], "r");
  if (f == 0) {
    fprintf(stderr, "File %s not found\n", argv[1]);
    exit(1);
  }

  stem_list =
      (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *));
  int words = load_data(WORDS, stem_list, f);
  fclose(f);
 
 if (words < 0)
    goto out;

  PRINT_STAT_INT("words", words);

  tic();
  int start, tids[NTHREADS];
  pthread_t threads[NTHREADS];
  pthread_attr_t attr;
  iterations = words / NTHREADS;

  sirius_pthread_attr_init(&attr);
  sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  for (int i = 0; i < NTHREADS; i++) {
    tids[i] = i;
    sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]);
  }

  for (int i = 0; i < NTHREADS; i++) {
    sirius_pthread_join(threads[i], NULL);
  }
  PRINT_STAT_DOUBLE("pthread_stemmer", toc());

  STATS_END();

#ifdef TESTING
  f = fopen("../input/stem_porter.pthread", "w");

  for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b);

  fclose(f);
#endif

out:
  sirius_free(s);

  // free up allocated data
  for (int i = 0; i < words; i++) {
    sirius_free(stem_list[i]->b);
    sirius_free(stem_list[i]);
  }

  return 0;
}
Esempio n. 14
0
int main(void) {

	std::string filePath = "CNN-DocTermCountMatrix.txt";
	Matrix& X_Ori = loadMatrix(filePath);
	int NSample = min(20, X_Ori.getRowDimension());
	Matrix& X = X_Ori.getSubMatrix(0, NSample - 1, 0, X_Ori.getColumnDimension() - 1);
	// disp(X.getSubMatrix(0, 10, 0, 100));
	println(sprintf("%d samples loaded", X.getRowDimension()));
	GraphOptions& options = *new GraphOptions();
	options.graphType = "nn";
	std::string type = options.graphType;
	double NN = options.graphParam;
	fprintf("Graph type: %s with NN: %d\n", type.c_str(), (int)NN);

	// Parameter setting for text data
	options.kernelType = "cosine";
	options.graphDistanceFunction = "cosine";

	// Parameter setting for image data
	/*options.kernelType = "rbf";
			options.graphDistanceFunction = "euclidean";*/

	options.graphNormalize = true;
	options.graphWeightType = "heat";

	bool show = true && !false;

	// Test adjacency function - pass
	tic();
	std::string DISTANCEFUNCTION = options.graphDistanceFunction;
	Matrix& A = adjacency(X, type, NN, DISTANCEFUNCTION);
	fprintf("Elapsed time: %.2f seconds.\n", toc());
	std::string adjacencyFilePath = "adjacency.txt";
	saveMatrix(adjacencyFilePath, A);
	if (show)
		disp(A.getSubMatrix(0, 4, 0, 4));

	// Test laplacian function - pass
	tic();
	Matrix& L = laplacian(X, type, options);
	fprintf("Elapsed time: %.2f seconds.\n", toc());
	std::string LaplacianFilePath = "Laplacian.txt";
	saveMatrix(LaplacianFilePath, L);
	if (show)
		disp(L.getSubMatrix(0, 4, 0, 4));

	// Test local learning regularization - pass
	NN = options.graphParam;
	std::string DISTFUNC = options.graphDistanceFunction;
	std::string KernelType = options.kernelType;
	double KernelParam = options.kernelParam;
	double lambda = 0.001;
	tic();
	Matrix& LLR_text = calcLLR(X, NN, DISTFUNC, KernelType, KernelParam, lambda);
	fprintf("Elapsed time: %.2f seconds.\n", toc());
	std::string LLRFilePath = "localLearningRegularization.txt";
	saveMatrix(LLRFilePath, LLR_text);
	if (show)
		display(LLR_text.getSubMatrix(0, 4, 0, 4));

	return EXIT_SUCCESS;

}
Esempio n. 15
0
int main(int argc, char** argv)
{
    int err;                            // error code returned from api calls
    int* a = NULL; // input pointer
    int* results = NULL; // output pointer
    unsigned int correct;               // number of correct results returned

    size_t global[2];                   // global domain size for our calculation
    size_t local[2];                    // local domain size for our calculation

    cl_platform_id platform_id;         // platform id
    cl_device_id device_id;             // compute device id
    cl_context context;                 // compute context
    cl_command_queue commands;          // compute command queue
    cl_program program;                 // compute program
    cl_kernel kernel;                   // compute kernel

    char cl_platform_vendor[1001];
    char cl_platform_name[1001];

    cl_mem input_a;                     // device memory used for the input array
    //cl_mem input_b;                     // device memory used for the input array
    cl_mem output;                      // device memory used for the output array
    int inc;
    double t_start, t_end;

    if (argc != 2) {
        printf("%s <inputfile>\n", argv[0]);
        return EXIT_FAILURE;
    }

    // Connect to first platform
    //
    err = clGetPlatformIDs(1,&platform_id,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to find an OpenCL platform!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor);
    err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    printf("CL_PLATFORM_NAME %s\n",cl_platform_name);

    // Connect to a compute device
    //
    int fpga = 0;
#if defined (FPGA_DEVICE)
    fpga = 1;
#endif
    err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU,
                         1, &device_id, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to create a device group!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    // Create a compute context
    //
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    // Create a command commands
    //
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n");
        printf("Error: code %i\n",err);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }

    int status;

    // Create Program Objects
    //

    // Load binary from disk
    unsigned char *kernelbinary;
    char *xclbin=argv[1];
    printf("loading %s\n", xclbin);
    int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
    if (n_i < 0) {
        printf("failed to load kernel from xclbin: %s\n", xclbin);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to load kernel from xclbin: %s\n", xclbin);
    }
    size_t n = n_i;
    // Create the compute program from offline
    program = clCreateProgramWithBinary(context, 1, &device_id, &n,
                                        (const unsigned char **) &kernelbinary, &status, &err);
    if ((!program) || (err!=CL_SUCCESS)) {
        printf("Error: Failed to create compute program from binary %d!\n", err);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to create compute program from binary %d!\n", err);
    }

    // Build the program executable
    //
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to build program executable!\n");
    }

    // Create the compute kernel in the program we wish to run
    //
    kernel = clCreateKernel(program, "mmult", &err);
    if (!kernel || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to create compute kernel!\n");
    }

    // Create the input and output arrays in device memory for our calculation
    //
    input_a = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(int) * DATA_SIZE, NULL, NULL);
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * RESULT_SIZE, NULL, NULL);
    if (!input_a || !output)
    {
        printf("Error: Failed to allocate device memory!\n");
        printf("Test failed\n");
        return EXIT_FAILURE;
    }
    else {
        printf("Succeed to allocate device memory!\n");
    }

    // set up socket
    printf("\n************* Welcome to UCLA FPGA agent! **********\n");
    struct sockaddr_in stSockAddr;
    int SocketFD = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);

    if(-1 == SocketFD) {
        perror("can not create socket");
        exit(EXIT_FAILURE);
    }

    memset(&stSockAddr, 0, sizeof(stSockAddr));

    stSockAddr.sin_family = AF_INET;
    stSockAddr.sin_port = htons(7000);
    stSockAddr.sin_addr.s_addr = htonl(INADDR_ANY);

    if(-1 == bind(SocketFD,(struct sockaddr *)&stSockAddr, sizeof(stSockAddr))) {
        perror("error bind failed");
        close(SocketFD);
        exit(EXIT_FAILURE);
    }

    if(-1 == listen(SocketFD, 10)) {
        perror("error listen failed");
        close(SocketFD);
        exit(EXIT_FAILURE);
    }


    int taskNum = -1;

    // polling setting
    timespec deadline;
    deadline.tv_sec = 0;
    deadline.tv_nsec = 100;

    // Get the start time
    timespec timer = tic( );
    timespec socListenTime = diff(timer, timer);
    timespec socSendTime = diff(timer, timer);
    timespec socRecvTime = diff(timer, timer);
    timespec exeTime = diff(timer, timer);

    bool broadcastFlag = false;

    int packet_buf[PACKET_SIZE];
    int time_buf[TIME_BUF_SIZE];

    while (true) {
        //printf("\n************* Got a new task! *************\n");
        timer = tic();

        int ConnectFD = accept(SocketFD, NULL, NULL);
        if (!broadcastFlag) {
            broadcastFlag = true;
            timer = tic();
        }

        // For profiling only
        //struct timeval  tv;
        //gettimeofday(&tv, NULL);
        //double time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond
        //printf("Receive time (ms): %lf\n", time_in_mill);

        accTime (&socListenTime, &timer);

        if(0 > ConnectFD) {
            perror("error accept failed");
            close(SocketFD);
            exit(EXIT_FAILURE);
        }

        read(ConnectFD, &packet_buf, PACKET_SIZE * sizeof(int));

        // send FPGA stats back to java application
        if(packet_buf[0] == -1) {
            // for profiling use
            collect_timer_stats(ConnectFD, &socListenTime, &socSendTime, &socRecvTime, &exeTime, &timer);
            broadcastFlag = false;
            continue;
        }

        char* shm_addr;
        int shmid = -1;
        int data_size = -1;  // data sent to FPGA (unit: int)
        shmid = packet_buf[0];
        data_size = packet_buf[1];
        printf("Shmid: %d, Data size (# of int): %d\n", shmid, data_size);

        // shared memory
        if((shm_addr = (char *) shmat(shmid, NULL, 0)) == (char *) -1) {
            perror("Server: shmat failed.");
            exit(1);
        }
        //else
        //printf("Server: attach shared memory: %p\n", shm_addr);

        int done = 0;
        while(done == 0) {
            done = (int) *((int*)shm_addr);
            clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL);
        }

        //printf("Copy data to the array in the host\n");
        a = (int *)(shm_addr + FLAG_NUM * sizeof(int));
        results = (int *)(shm_addr + FLAG_NUM * sizeof(int));

        accTime (&socSendTime, &timer);

        taskNum = a[2];
        for (int i=0; i<taskNum; i++) {
            int tmp = *(a+8+i*8+7);
            assert(tmp >=0 && tmp < TOTAL_TASK_NUMS);
        }
        printf("Task Num: %d\n", taskNum);

        //printf("\nparameter recieved --- \n");
        //Write our data set into the input array in device memory

        //printf("Write data from host to FPGA\n");
        err = clEnqueueWriteBuffer(commands, input_a, CL_TRUE, 0, sizeof(int) * data_size, a, 0, NULL, NULL);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to write to source array a!\n");
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Set the arguments to our compute kernel
        //
        err = 0;
        err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_a);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
        err |= clSetKernelArg(kernel, 2, sizeof(int), &taskNum);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to set kernel arguments! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Execute the kernel over the entire range of our 1d input data set
        // using the maximum number of work group items for this device
        //

        //printf("Enqueue Task\n");
        err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
        if (err)
        {
            printf("Error: Failed to execute kernel! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        // Read back the results from the device to verify the output
        //
        cl_event readevent;
        //printf("Enqueue read buffer\n");
        err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(int) * FPGA_RET_PARAM_NUM * taskNum, results, 0, NULL, &readevent );
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to read output array! %d\n", err);
            printf("Test failed\n");
            return EXIT_FAILURE;
        }

        //printf("Wait for FPGA results\n");
        clWaitForEvents(1, &readevent);
        accTime(&exeTime, &timer);

        // Get the execution time
        //toc(&timer);

        // put data back to shared memory
        //printf("Put data back to the shared memory\n");
        *((int*)(shm_addr + sizeof(int))) = DONE;

        //printf("\n************* Task finished! *************\n");

        if (-1 == shutdown(ConnectFD, SHUT_RDWR)) {
            perror("can not shutdown socket");
            close(ConnectFD);
            close(SocketFD);
            exit(EXIT_FAILURE);
        }
        close(ConnectFD);

        //printf("done\n");

        // free the shared memory
        shmdt(shm_addr);
        //shmctl(shmid, IPC_RMID, 0);

        accTime(&socRecvTime, &timer);

        printf("**********timing begin**********\n");
        printTimeSpec(socListenTime);
        printTimeSpec(socSendTime);
        printTimeSpec(socRecvTime);
        printTimeSpec(exeTime);
        printf("**********timing end**********\n\n");
    }

    close(SocketFD);

    // Shutdown and cleanup
    //
    clReleaseMemObject(input_a);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return EXIT_SUCCESS;

}
Esempio n. 16
0
int main(int argc, char** argv)
{
  cl_context       context      = 0;
  cl_command_queue commandQueue = 0;
  cl_program       program      = 0;
  cl_device_id     device       = 0;
  cl_kernel        kernel       = 0;
  cl_int           status;

  char filename[]   = "../../kernels/VectorUpdate_vec_kernel.cl";
  char filename2[] = "../../common/types_kernel.h";

  int profiling_info = 0;
  cl_event myEvent, myEvent2;

  if( argc != 4 )
    {
      printf("Usage: %s vector_file1 vector_file2 alpha\n", argv[0]);
      return EXIT_FAILURE;
    }

  char xfilename[50];
  char yfilename[50];
  real alpha;
  
  strcpy(xfilename, argv[1]);
  strcpy(yfilename, argv[2]);
  alpha = strtod(argv[3], NULL);

#ifdef PROFILE
  cl_ulong startTime, endTime, startTime2, endTime2;
  cl_ulong kernelExecTimeNs, readFromGpuTime;
  profiling_info = 1;
#endif

  /*  READING DATA FROM FILE  */
  
  real *x;
  real *y;
  real *ref_x;
  int N, M, N4;

  std::ifstream xfile;
  xfile.open (xfilename, std::ios::in);
  if (!xfile.is_open())
    {
      printf("Error: cannot open file\n");
      return EXIT_FAILURE;
    }
  
    xfile >> N;

    // it must be N%4 == 0                                                        
    N4 = ((N & (4-1)) == 0) ? N : N+(4-(N&3));

    HANDLE_ALLOC_ERROR(x = (real*)malloc(N4*sizeof(real)));
  
    for( int i = 0; i < N; i++)
 	xfile >> x[i];

    for(int i = N; i < N4; ++i)
      x[i] = 0;

    xfile.close();
    
    // needed for checking result 
    HANDLE_ALLOC_ERROR(ref_x = (real*)malloc(N*sizeof(real)));
    memcpy(ref_x, x, N*sizeof(real));


    std::ifstream yfile;
    yfile.open (yfilename, std::ios::in);
    if (!yfile.is_open())
      {
	printf("Error: cannot open file\n");
	return EXIT_FAILURE;
      }
    
    yfile >> M;
    assert(N==M);
    
    HANDLE_ALLOC_ERROR(y = (real*)malloc(N4*sizeof(real)));
    
    for( int i = 0; i < N; i++)
      yfile >> y[i];
    for(int i = N; i < N4; ++i)
      y[i] = 0;

    
    yfile.close();

    int Ndev4 = N4/4;
    



  TIME start = tic(); 

  TIME init = tic();
  
  // Create an OpenCL context
  context = CreateContext();
  if(context == NULL)
    {
      std::cerr << "Failed to create OpenCL context." << std::endl;
      Cleanup(context, commandQueue, program, kernel);
      return EXIT_FAILURE;
    }
  
  // Create a command queue
  commandQueue = CreateCommandQueue(context, &device, profiling_info);
  if(commandQueue == NULL)
    {
      std::cerr << "Failed to create OpenCL command queue." << std::endl;
      Cleanup(context, commandQueue, program, kernel);
      return EXIT_FAILURE;
    }
  
  // Create OpenCL program 
  program = CreateProgram(context, device, filename, filename2);
  if (program == NULL)
    {
      Cleanup(context, commandQueue, program, kernel);
      return EXIT_FAILURE;
    }
  
  // Create OpenCL kernel
  kernel = clCreateKernel(program, "VectorUpdate", NULL);
  if(kernel == NULL)
    {
      std::cerr << "Failed to create kernel." << std::endl;
      Cleanup(context, commandQueue, program, kernel);
      return EXIT_FAILURE;
    }
  

  printf("%lf\n",toc(init));

    /*     QUERYING DEVICE INFO     */
    
    size_t kernelWorkGroupSize; // maximum work-group size that can be used to execute a kernel
    size_t sizeOfWarp;          // the preferred multiple of workgroup size for launch
    cl_ulong localMemSize;      // the amount of local memory in bytes being used by a kernel
    cl_ulong privateMemSize;    // the minimum amount of private memory, in bytes, used by each workitem in the kernel. 

    HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, NULL));
    HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &sizeOfWarp, NULL));
  
#ifdef PRINT_INFO  
    HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemSize, NULL));
    HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), &privateMemSize, NULL));
#endif

#ifdef PRINT_INFO 
    printf("------------  Some info: --------------\n");
    printf("kernelWorkGroupSize = %lu \n", kernelWorkGroupSize);
    printf("sizeOfWarp          = %lu \n", sizeOfWarp);
    printf("localMemSize        = %lu \n", localMemSize);
    printf("privateMemSize      = %lu \n", privateMemSize);
    printf("------------------------ --------------\n");
#endif

    if( WORK_GROUP_SIZE > kernelWorkGroupSize )
      {
	printf("Error: wrong work group size\n");
	return EXIT_FAILURE;
      }

    size_t localWorkSize[1] = {WORK_GROUP_SIZE};
    int numWorkGroups = (Ndev4-1)/WORK_GROUP_SIZE+1;
    size_t globalWorkSize[1] = {numWorkGroups*WORK_GROUP_SIZE};


    TIME t = tic();

    cl_mem DEV_x = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
				  sizeof(real)*N4, x, &status);
    HANDLE_OPENCL_ERROR(status);
    
    cl_mem DEV_y = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
				  sizeof(real)*N4, y, &status);
    HANDLE_OPENCL_ERROR(status);
    
    int n = 0;
    status  = clSetKernelArg(kernel, n++, sizeof(cl_mem), (void*)&DEV_x);
    status |= clSetKernelArg(kernel, n++, sizeof(cl_mem), (void*)&DEV_y);
    status |= clSetKernelArg(kernel, n++, sizeof(real), (void*)&alpha);
    status |= clSetKernelArg(kernel, n++, sizeof(int), (void*)&Ndev4);
    HANDLE_OPENCL_ERROR(status);

    
    printf("%lf\n",toc(t));

    // Queue the kernel 
    HANDLE_OPENCL_ERROR(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
					       globalWorkSize, localWorkSize,
					       0, NULL, &myEvent));

    // Read the output buffer back to the Host
    HANDLE_OPENCL_ERROR(clEnqueueReadBuffer(commandQueue, DEV_x, CL_TRUE,
					    0, N4*sizeof(real), x,
					    0, NULL, &myEvent2));

    clFinish(commandQueue); // wait for all events to finish


    double elapsed_time = toc(start);

    /*  CHECK RESULT */

    TIME start_seq = tic();
    for (int i = 0; i < N; i++)
      ref_x[i] += alpha*y[i];
    double elapsed_time_seq = toc(start_seq);

    assert(ref_x[10] < 1000000);

    //std::cout << ref_x[0] << " " << x[0] << std::endl;

    //  for (int i = 0; i < N; i++)
    //  assert( abs(x[i] - ref_x[i]) < TOL );

    //std::cout << "Verified..." << std::endl;


#ifdef PROFILE
    clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_START, 
			    sizeof(cl_ulong), &startTime, NULL);
    clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
			    sizeof(cl_ulong), &endTime, NULL);
    clGetEventProfilingInfo(myEvent2, CL_PROFILING_COMMAND_START,
                            sizeof(cl_ulong), &startTime2, NULL);
    clGetEventProfilingInfo(myEvent2, CL_PROFILING_COMMAND_END,
                            sizeof(cl_ulong), &endTime2, NULL);

    kernelExecTimeNs = endTime-startTime;
    readFromGpuTime = endTime2-startTime2;
    printf(/*"Kernel execution time: %lf\n"*/"%lf\n", (double)readFromGpuTime/1000000000.0);
    printf(/*"Kernel execution time: %lf\n"*/"%lf\n", (double)kernelExecTimeNs/1000000000.0);
#endif
    printf(/*"Total execution time: %lf\n"*/"%lf\n", elapsed_time);
    printf(/*"Total execution time (seq.):*/"%lf\n", elapsed_time_seq);

    Cleanup(context, commandQueue, program, kernel);
    free(x);
    free(y);
    clReleaseMemObject(DEV_x);
    clReleaseMemObject(DEV_y);

    return EXIT_SUCCESS;
}
Esempio n. 17
0
//Main tracking Algorithm
void AnalysisModule::larvaFind(uchar * img, int imWidth, int imHeight, int frameInd){

	input = cv::Mat(imHeight,imWidth,CV_8UC1,NULL);
	input.data = img;
	if(output.rows != imHeight | output.cols != imWidth) output.create(imHeight,imWidth,CV_8UC1);
	int nextInd = (index+1)%sampleInd.size();

	//for Profiling
	tic();
		
	sampleInd[nextInd] = frameInd;
	sampleTime[nextInd] = frameInd * frameIntervalMS;

	//On first image, automatically determine threshold level using the Otsu method
	// Minimizes within group variance of thresholded classes.  Should land on the best boundary between backlight and larva
	if(index == -1) threshold = otsuThreshold(img,imWidth*imHeight);
	

	//Can speed this up by applying to a roi bounding box a bit larger than the previous one

	//Simple inverted binary threshold of the image
	cv::threshold(input,output,threshold,255,CV_THRESH_BINARY_INV);  profile[0] = toctic();
	//Detect Contours in the binary image
	cv::findContours(output,contours,CV_RETR_EXTERNAL,CV_CHAIN_APPROX_NONE);  profile[1] = toctic();
	
	//No contours detected
	if (contours.size() == 0) {
		return;
	}

	//find contour with largest perimeter length
	double maxLen = 0; int maxInd = -1;
	double cLen;
	for(int i=0; i<contours.size(); i++){
		cLen = cv::arcLength(cv::Mat(contours[i]), false);
		if(cLen >= maxLen){ maxLen = cLen; maxInd = i; };
	}
	
	//Check to make sure that the perimeter is a larva by simple size analysis 
	//(larva should have a certain perimeter length at 8.1um/pixel)
	cLarva[nextInd] = contours[maxInd];
	
	//calculate bounding box
	bBox[nextInd] = cv::boundingRect(cv::Mat(cLarva[nextInd])); profile[2] = toctic();
	
	//Calculate fourier coefficients
	fourierDecompose(cLarva[nextInd],nFourier,fourier[nextInd]);
	centroid[nextInd] = cv::Point2f(fourier[nextInd][0][AX],fourier[nextInd][0][AY]); profile[3] = toctic();

	//Reconstruct the estimated boundary
	fourierReconstruct(fourier[nextInd],cFit,fitRes); profile[4] = toctic();
	
	//Calculate Curvature
	perimeterCurvature(cFit,curve,fitRes/8); profile[5] = toctic();

	//Find head and tail based on curvature minimums (small angle = sharp region)
	findHeadTail(cFit,curve,headTail); 
	head[nextInd] = headTail[0];
	tail[nextInd] = headTail[1]; profile[6] = toctic();

	//Calculate Skeleton
	skeletonCalc(cFit,skeleton,headTail,length[nextInd],neck[nextInd]); profile[7] = toctic();
	

	//Calculate bearing and head angle to bearing
	bodyAngles(tailBearingAngle[nextInd], headToBodyAngle[nextInd], head[nextInd], neck[nextInd], tail[nextInd]); profile[8] = toctic();
	

	//Capture stage position 
	stagePos[nextInd] = cv::Point(gui->stageThread->xpos,gui->stageThread->ypos);
	
	//Keep track of entire history with a sample every 30 frames
	if((nextInd % 30) == 0){
		fullTrack[(fullTrackInd+1)%fullTrack.size()].x = stagePos[nextInd].x/gui->stageThread->tickPerMM_X+centroid[nextInd].x*gui->camThread->umPerPixel/1000.0;
		fullTrack[(fullTrackInd+1)%fullTrack.size()].y = stagePos[nextInd].y/gui->stageThread->tickPerMM_Y+centroid[nextInd].y*gui->camThread->umPerPixel/1000.0;
		fullTrackStim[(fullTrackInd+1)%fullTrack.size()] = binStimMax;
		binStimMax = 0; //updated from stimThread
		fullTrackInd++;
	}
	

	//Calculate Velocities of head and tail
	calcVelocities(nextInd);

	//Spew out profiling info
	//for(int i=0; i<9; i++) qDebug("%d: %.4fms",i,profile[i]*1000);
	//qDebug("\n");
	
	index++;

};
int main(int argc, char*argv[])
{
	// 6 config, each has three files to read
	char *files[] = {
		"../resources/config_32N32M_B.txt",
		"../resources/config_32N32M_A.txt",
		"../resources/config_32N32M_prior.txt",
		"../resources/config_64N64M_B.txt",
		"../resources/config_64N64M_A.txt",
		"../resources/config_64N64M_prior.txt",
		"../resources/config_128N128M_B.txt",
		"../resources/config_128N128M_A.txt",
		"../resources/config_128N128M_prior.txt",
		"../resources/config_256N256M_B.txt",
		"../resources/config_256N256M_A.txt",
		"../resources/config_256N256M_prior.txt",
		"../resources/config_512N512M_B.txt",
		"../resources/config_512N512M_A.txt",
		"../resources/config_512N512M_prior.txt",
		"../resources/config_1024N1024M_B.txt",
		"../resources/config_1024N1024M_A.txt",
		"../resources/config_1024N1024M_prior.txt",
	};

	// variables
	int i,j,k;
	int Len;
	int debug=0;

	int job=0;

	// select job frome commmand line
	int argi;
	if (argc == 1)
	{
		puts("Please specify an option.\nUsage: \"./ocl_fo -job number(0-5) \"\n");
		exit(1);
	}

	for (argi = 1; argi < argc; ++argi)
	{
		if (!strcmp(argv[argi], "-job"))
		{
			need_argument(argc, argv,argi);
			job = atoi(argv[++argi]) ;
			continue;
		}

		if (argv[argi][0] == '-')
		{
			fatal("'%s' is not a valid command-line option.\n",argv[argi]);
		}
	}

	//printf("job = %d\n", job);
	if( job  > 5) {
		printf("Job number exceeds the limit 5! Exit Programm!\n");
		exit(1);	
	}



	HMM *word;
	word = (HMM*)malloc(sizeof(HMM));

	Len = getLineNum(files[job*3+2]);	
	printf("config_%dN_%dM\n",Len, Len);

	//read B,A,prior
	printf("Read the following files...");

	//read_config(files,job,B,A,prior,Len);
	read_config(word,files,job,Len,Len);

	printf("Done!\n");
	if( debug && job == 0 ) {
		puts("a");
		check_a(word);	
		puts("b");
		check_b(word);	
		puts("pri");
		check_pri(word);	
	}


	//----------------------
	// run forward algorithm
	//----------------------

	//---------------------------
	// GPU Version
	//---------------------------

	run_opencl_fo(word);


	//---------------------------
	// CPU Version
	//---------------------------

	puts("\n=>CPU");


	struct timeval cpu_timer;

	int N = word->nstates;
	int T = word->len;
	float *B = word->b;
	float *A = word->a;
	float *prior = word->pri;

	double tmp, alpha_sum;
	double log_likelihood;

	float *alpha; // NxT
	alpha = (float*)malloc(sizeof(float)*N*T);

	float *A_t; // NxN
	A_t = (float*)malloc(sizeof(float)*N*N);

	log_likelihood = 0.0;

	// start timing
	tic(&cpu_timer);

	transpose(A, A_t, N, T);	



	for(j=0;j<T;++j)
	{
		alpha_sum = 0.0;

		if(j==0){ // initialize
			for(i=0;i<N;++i){
				alpha[i*T + 0] = B[i*T + 0] * prior[i];	
				alpha_sum += alpha[i*T + 0];
			}
		}else{ // move forward
			for(i=0;i<N;++i)
			{ // go through each state
				tmp = 0.0;	
				for(k=0;k<N;++k){
					tmp += A_t[i*N + k] * alpha[k*T + j-1];
				}

				alpha[i*T + j] = (float)tmp * B[i*T + j];
				alpha_sum += alpha[i*T + j];
			}
		}

		// scaling
		for(i=0;i<N;++i){			
			alpha[i*T + j] /= alpha_sum;
		}

		log_likelihood += log(alpha_sum);
	}
	// end timing
	toc(&cpu_timer);

	printf("log_likelihood = %lf\n", log_likelihood);


	// free memory

	free_hmm(word);
	free(A_t);
	free(alpha);


	return 0;
}
Esempio n. 19
0
int main(int argc, char* argv[])
{
#if 0
  Stack *stack = Read_Stack("../data/binimg.tif");
 
  Set_Matlab_Path("/Applications/MATLAB74/bin/matlab");
  Stack *dist = Stack_Bwdist(stack);

  Stack* seeds = Stack_Local_Max(dist, NULL, STACK_LOCMAX_ALTER1);

  Stack *out = Scale_Double_Stack((double *) dist->array, stack->width, 
				  stack->height, stack->depth, GREY);

  Translate_Stack(out, COLOR, 1);

  Rgb_Color color;
  Set_Color(&color, 255, 0, 0);

  Stack_Label_Bwc(out, seeds, color);

  Print_Stack_Info(dist);

  Write_Stack("../data/test.tif", out);
#endif 

#if 0
  Stack *stack = Read_Stack("../data/benchmark/sphere_bw.tif");
  //Stack *stack = Read_Stack("../data/sphere_data.tif");
  //Stack_Not(stack, stack);

  int i;
  /*
  uint8 *array = stack->array + 512 * 600;
  for (i = 1; i < 512; i++) {
    array[i] = 1;
  }
  */
  //stack->depth = 50;
  
  /*
  long int *label = (long int *) malloc(sizeof(long int) * 
					Stack_Voxel_Number(stack));
  */
  tic();
  Stack *out = Stack_Bwdist_L_U16(stack, NULL, 0);
  uint16 *out_array = (uint16 *) out->array;

  printf("%llu\n", toc());

  //int *hist = Stack_Hist(out);
  //Print_Int_Histogram(hist);

  
  Stack *out2 = Stack_Bwdist_L(stack, NULL, NULL);
  float *out2_array = (float *) out2->array;

  int n = Stack_Voxel_Number(out);

  int t = 0;
  int x, y, z;
  for (i = 0; i < n; i++) {
    uint16 d2 = (uint16) out2_array[i];
    if (out_array[i] != d2){
      int area = stack->width * stack->height;
      STACK_UTIL_COORD(i, stack->width, area, x, y, z);
      printf("(%d %d %d)", x, y, z);
      printf("%d %d %d\n", out_array[i], d2, stack->array[i]);
      t++;
    }
  }

  printf("%d error\n", t);

#  if 0
  //Translate_Stack(out, GREY, 1);
  float *out_array = (float *) out->array;
  int i;
  int n = Stack_Voxel_Number(out);
  /*
  for (i = 0; i < n; i++) {
    out_array[i] = sqrt(out_array[i]);
  }
  Stack *out2 = Scale_Float_Stack((float *)out->array, out->width, out->height,
    out->depth, GREY);
  */
  
  Stack *out2 = Make_Stack(GREY, out->width, out->height, out->depth);
  for (i = 0; i < n; i++) {
    out2->array[i] = (uint8) round(sqrt(out_array[i]));
  }
  
  Write_Stack("../data/test.tif", out2);
#  endif
  
  Write_Stack("../data/test.tif", out);
  Kill_Stack(out);
  Kill_Stack(out2);
#endif

#if 1
  Stack *stack = Read_Stack("../data/system/29.tif");
  Print_Stack_Info(stack);

  tic();
  Stack *out = Stack_Bwdist_L_U16P(stack, NULL, 0);
  ptoc();

  Stack *golden = Read_Stack("../data/system/29_dist2.tif");

  printf("Checking result ...\n");
  if (Stack_Identical(out, golden) == FALSE) {
    printf("Result unmatched.\n");
  } else {
    printf("Good.\n");
  }

#endif


  return 0;
}
Esempio n. 20
0
int test_7(){
	idxint n = 15;
	idxint m = 29;
	pfloat feas_Gx[120] = {9999,-9999,9999,-9999,9999,-9999,9999,-9999,9999,-9999,-3.5008,3.5008,-0.4504,0.4504,-0.8764999999999999,0.8764999999999999,-0.1088,0.1088,1,1,-1,-8.4095,8.4095,-1.0107,1.0107,-1.686,1.686,-0.3525,0.3525,1,1,-1,-15.1987,15.1987,-2.0203,2.0203,-2.3932,2.3932,-0.6233,0.6233,1,1,-1,-22.5405,22.5405,-3.1862,3.1862,-2.8749,2.8749,-0.7923,0.7923,1,1,-1,-29.2639,29.2639,-4.3096,4.3096,-3.0189,3.0189,-0.8116,0.8116,1,1,-1,3.5008,-3.5008,0.4504,-0.4504,0.8764999999999999,-0.8764999999999999,0.1088,-0.1088,1,1,-1,8.4095,-8.4095,1.0107,-1.0107,1.686,-1.686,0.3525,-0.3525,1,1,-1,15.1987,-15.1987,2.0203,-2.0203,2.3932,-2.3932,0.6233,-0.6233,1,1,-1,22.5405,-22.5405,3.1862,-3.1862,2.8749,-2.8749,0.7923,-0.7923,1,1,-1,29.2639,-29.2639,4.3096,-4.3096,3.0189,-3.0189,0.8116,-0.8116,1,1,-1};
	idxint feas_Gp[16] = {0,2,4,6,8,10,21,32,43,54,65,76,87,98,109,120};
	idxint feas_Gi[120] = {8,9,10,11,12,13,14,15,16,17,0,1,2,3,4,5,6,7,8,18,19,0,1,2,3,4,5,6,7,10,18,20,0,1,2,3,4,5,6,7,12,18,21,0,1,2,3,4,5,6,7,14,18,22,0,1,2,3,4,5,6,7,16,18,23,0,1,2,3,4,5,6,7,9,18,24,0,1,2,3,4,5,6,7,11,18,25,0,1,2,3,4,5,6,7,13,18,26,0,1,2,3,4,5,6,7,15,18,27,0,1,2,3,4,5,6,7,17,18,28};

	pfloat feas_c[15] = {0,0,0,0,0,0.127,0.9134,0.6324,0.0975,0.2785,0.873,0.0866,0.3676,0.9025,0.7215};
	pfloat feas_h[29] = {-729.9349999999999,789.9349999999999,-71.015,131.015,-89.66,149.66,-1.165,61.165,9999,0,9999,0,9999,0,9999,0,9999,0,150,0,0,0,0,0,0,0,0,0,0};

	idxint bool_idx[5] = {0,1,2,3,4};

	/* Answer: */ 
	pfloat x[15] = {0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,32.383266,0.00,0.00,0.00,
		0.00,0.00,0.00};
	pfloat x2[15] = {0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,86.798858,
		0.000000,0.000000,0.000000};

	idxint i, ret_code, pass;

	timer t;

	ecos_bb_pwork* prob = ECOS_BB_setup(
		n, m, 0, 
		m, 0, NULL, 0,
		feas_Gx, feas_Gp, feas_Gi,
		NULL, NULL, NULL,
		feas_c, feas_h, NULL, 5, bool_idx, 0, NULL, NULL);
	prob->stgs->verbose = 0;
	tic(&t);
	ret_code = ECOS_BB_solve(prob);
	pfloat msRuntime = toc(&t);

	pass = 1;

	printf("Soln: ");
	for (i=5; i<n; ++i){
		pass &= float_eqls(x[i] ,prob->x[i], prob->stgs->integer_tol );
		printf("%f ", prob->x[i]);
	}
	printf("\nRuntime: %f\n", msRuntime);
	
	updateDataEntry_h(prob, 0, 789.935);
	updateDataEntry_h(prob, 1, -729.935);
	updateDataEntry_h(prob, 2, 131.015);
	updateDataEntry_h(prob, 3, -71.015);
	updateDataEntry_h(prob, 4, 149.66);
	updateDataEntry_h(prob, 5, -89.66);
	updateDataEntry_h(prob, 6, 61.165);
	updateDataEntry_h(prob, 7, -1.165);

	tic(&t);
	ret_code = ECOS_BB_solve(prob);
	msRuntime = toc(&t);

	printf("Soln2: ");
	for (i=5; i<n; ++i){
		pass &= float_eqls(x2[i] ,prob->x[i], prob->stgs->integer_tol );
		printf("%f ", prob->x[i]);
	}
	printf("\nRuntime: %f\n", msRuntime);
	
	ECOS_BB_cleanup(prob, 0);

	return pass;
}
Esempio n. 21
0
 Timer() {
     tic();
     TicksPerSeconds =
         clock::duration::period::den / clock::duration::period::num;
 }
/* Cholesky update/downdate */
int demo3 (problem *Prob)
{
    cs *A, *C, *W = NULL, *WW, *WT, *E = NULL, *W2 ;
    int n, k, *Li, *Lp, *Wi, *Wp, p1, p2, *p = NULL, ok ;
    double *b, *x, *resid, *y = NULL, *Lx, *Wx, s,  t, t1 ;
    css *S = NULL ;
    csn *N = NULL ;
    if (!Prob || !Prob->sym || Prob->A->n == 0) return (0) ;
    A = Prob->A ; C = Prob->C ; b = Prob->b ; x = Prob->x ; resid = Prob->resid;
    n = A->n ;
    if (!Prob->sym || n == 0) return (1) ;
    rhs (x, b, n) ;                             /* compute right-hand side */
    printf ("\nchol then update/downdate ") ;
    print_order (1) ;
    y = cs_malloc (n, sizeof (double)) ;
    t = tic () ;
    S = cs_schol (1, C) ;                       /* symbolic Chol, amd(A+A') */
    printf ("\nsymbolic chol time %8.2f\n", toc (t)) ;
    t = tic () ;
    N = cs_chol (C, S) ;                        /* numeric Cholesky */
    printf ("numeric  chol time %8.2f\n", toc (t)) ;
    if (!S || !N || !y) return (done3 (0, S, N, y, W, E, p)) ;
    t = tic () ;
    cs_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_lsolve (N->L, y) ;                       /* y = L\y */
    cs_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    printf ("solve    chol time %8.2f\n", toc (t)) ;
    printf ("original: ") ;
    print_resid (1, C, x, b, resid) ;           /* print residual */
    k = n/2 ;                                   /* construct W  */
    W = cs_spalloc (n, 1, n, 1, 0) ;
    if (!W) return (done3 (0, S, N, y, W, E, p)) ;
    Lp = N->L->p ; Li = N->L->i ; Lx = N->L->x ;
    Wp = W->p ; Wi = W->i ; Wx = W->x ;
    Wp [0] = 0 ;
    p1 = Lp [k] ;
    Wp [1] = Lp [k+1] - p1 ;
    s = Lx [p1] ;
    srand (1) ;
    for ( ; p1 < Lp [k+1] ; p1++)
    {
        p2 = p1 - Lp [k] ;
        Wi [p2] = Li [p1] ;
        Wx [p2] = s * rand () / ((double) RAND_MAX) ;
    }
    t = tic () ;
    ok = cs_updown (N->L, +1, W, S->parent) ;   /* update: L*L'+W*W' */
    t1 = toc (t) ;
    printf ("update:   time: %8.2f\n", t1) ;
    if (!ok) return (done3 (0, S, N, y, W, E, p)) ;
    t = tic () ;
    cs_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_lsolve (N->L, y) ;                       /* y = L\y */
    cs_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    p = cs_pinv (S->pinv, n) ;
    W2 = cs_permute (W, p, NULL, 1) ;           /* E = C + (P'W)*(P'W)' */
    WT = cs_transpose (W2,1) ;
    WW = cs_multiply (W2, WT) ;
    cs_spfree (WT) ;
    cs_spfree (W2) ;
    E = cs_add (C, WW, 1, 1) ;
    cs_spfree (WW) ;
    if (!E || !p) return (done3 (0, S, N, y, W, E, p)) ;
    printf ("update:   time: %8.2f (incl solve) ", t1+t) ;
    print_resid (1, E, x, b, resid) ;           /* print residual */
    cs_nfree (N) ;                              /* clear N */
    t = tic () ;
    N = cs_chol (E, S) ;                        /* numeric Cholesky */
    if (!N) return (done3 (0, S, N, y, W, E, p)) ;
    cs_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_lsolve (N->L, y) ;                       /* y = L\y */
    cs_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    printf ("rechol:   time: %8.2f (incl solve) ", t) ;
    print_resid (1, E, x, b, resid) ;           /* print residual */
    t = tic () ;
    ok = cs_updown (N->L, -1, W, S->parent) ;   /* downdate: L*L'-W*W' */
    t1 = toc (t) ;
    if (!ok) return (done3 (0, S, N, y, W, E, p)) ;
    printf ("downdate: time: %8.2f\n", t1) ;
    t = tic () ;
    cs_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_lsolve (N->L, y) ;                       /* y = L\y */
    cs_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    printf ("downdate: time: %8.2f (incl solve) ", t1+t) ;
    print_resid (1, C, x, b, resid) ;           /* print residual */
    return (done3 (1, S, N, y, W, E, p)) ;
} 
Esempio n. 23
0
PreconditionerAS<space_type,coef_space_type>::PreconditionerAS( std::string t,
                                                                space_ptrtype Xh,
                                                                coef_space_ptrtype Mh,
                                                                BoundaryConditions bcFlags,
                                                                std::string const& p,
                                                                sparse_matrix_ptrtype Pm,
                                                                double k )
    :
        M_type( AS ),
        M_Xh( Xh ),
        M_Vh(Xh->template functionSpace<0>() ),
        M_Qh(Xh->template functionSpace<1>() ),
        M_Mh( Mh ),
        M_Vh_indices( M_Vh->nLocalDofWithGhost() ),
        M_Qh_indices( M_Qh->nLocalDofWithGhost() ),
        M_Qh3_indices( Dim ),
        A(backend()->newVector(M_Vh)),
        B(backend()->newVector(M_Vh)),
        C(backend()->newVector(M_Vh)),
        M_r(backend()->newVector(M_Vh)),
        M_r_t(backend()->newVector(M_Vh)),
        M_uout(backend()->newVector(M_Vh)),
        M_diagPm(backend()->newVector(M_Vh)),
        //M_t(backend()->newVector(M_Vh)),
        U( M_Vh, "U" ),
        M_mu(M_Mh, "mu"),
        M_er(M_Mh, "er"),
        M_bcFlags( bcFlags ),
        M_prefix( p ),
        M_k(k),
        M_g(1.-k*k)
{
    tic();
    LOG(INFO) << "[PreconditionerAS] setup starts";
    this->setMatrix( Pm ); // Needed only if worldComm > 1

    // QH3 : Lagrange vectorial space type
    M_Qh3 = lag_v_space_type::New(Xh->mesh());

    M_qh3_elt = M_Qh3->element();
    M_qh_elt = M_Qh->element();
    M_vh_elt = M_Vh->element();

    // Block 11.1
    M_s = backend()->newVector(M_Qh3);
    M_y = backend()->newVector(M_Qh3);

    // Block 11.2
    M_z = backend()->newVector(M_Qh);
    M_t = backend()->newVector(M_Qh);

    // Create the interpolation and keep only the matrix
    auto pi_curl = I(_domainSpace=M_Qh3, _imageSpace=M_Vh);
    auto Igrad   = Grad( _domainSpace=M_Qh, _imageSpace=M_Vh);

    M_P = pi_curl.matPtr();
    M_C = Igrad.matPtr();

    M_Pt = backend()->newMatrix(M_Qh3,M_Vh);
    M_Ct = backend()->newMatrix(M_Qh3,M_Vh);

    M_P->transpose(M_Pt,MATRIX_TRANSPOSE_UNASSEMBLED);
    M_C->transpose(M_Ct,MATRIX_TRANSPOSE_UNASSEMBLED);

    LOG(INFO) << "size of M_C = " << M_C->size1() << ", " << M_C->size2() << std::endl;
    LOG(INFO) << "size of M_P = " << M_P->size1() << ", " << M_P->size2() << std::endl;

    // Create vector of indices to create subvectors/matrices
    std::iota( M_Vh_indices.begin(), M_Vh_indices.end(), 0 ); // Vh indices in Xh
    std::iota( M_Qh_indices.begin(), M_Qh_indices.end(), M_Vh->nLocalDofWithGhost() ); // Qh indices in Xh

    // "Components" of Qh3
    auto Qh3_dof_begin = M_Qh3->dof()->dofPointBegin();
    auto Qh3_dof_end = M_Qh3->dof()->dofPointEnd();

    int dof_comp, dof_idx;
    for( auto it = Qh3_dof_begin; it!= Qh3_dof_end; it++ )
    {
        dof_comp = it->template get<2>(); //Component
        dof_idx = it->template get<1>(); //Global index
        M_Qh3_indices[dof_comp].push_back( dof_idx );
    }

    // Subvectors for M_y (per component)
    M_y1 = M_y->createSubVector(M_Qh3_indices[0], true);
    M_y2 = M_y->createSubVector(M_Qh3_indices[1], true);
#if FEELPP_DIM == 3
    M_y3 = M_y->createSubVector(M_Qh3_indices[2], true);
#endif
    
    // Subvectors for M_s (per component)
    M_s1 = M_y->createSubVector(M_Qh3_indices[0], true);
    M_s2 = M_y->createSubVector(M_Qh3_indices[1], true);
#if FEELPP_DIM == 3
    M_s3 = M_y->createSubVector(M_Qh3_indices[2], true);
#endif

    this->setType ( t );
    toc( "[PreconditionerAS] setup done ", FLAGS_v > 0 );
}
static double toc (double t) { double s = tic () ; return (CS_MAX (0, s-t)) ; }
Esempio n. 25
0
/**
 * Iterates the ACWE for several iterations using 1 or more bands
 * @param numIterations
 * @param useAllBands
 */
void ActiveContours::iterate(int numIterations, bool useAllBands) {

	//Default origin and region to copy the entire region of the 3D texture
	dout << "Initializing origin and region with " << width << "," << height << "," << depth << endl;
	origin.push_back(0); origin.push_back(0); origin.push_back(0);
	region.push_back(width); region.push_back(height); region.push_back(depth);
	
    cl::CommandQueue* queue = clMan.getQueue();

    // Only used if we are printing the buffers. It defines the slides that we are going to print
    int* slidesToPrint = new int[3];
    slidesToPrint[0] = 9;
    slidesToPrint[1] = 10; 
    slidesToPrint[2] = 11;
    int sizeOfArray = 3;

    try {
		err = queue->enqueueAcquireGLObjects(&cl_textures, NULL, &evAcOGL);
		queue->finish();
		
		if (currIter == 0) {
			//Copying img_in_gl to buf_img_in
			cl::Event evCopyInGlToIn;

            tic(tm_copyGlToBuffer);
			dout << "Copying input texture (img_in_gl) to cl_buffer buf_img_in" << endl;
			vecEvPrevTextToBuffer.push_back(evAcOGL);
			queue->enqueueCopyImageToBuffer(img_in_gl, buf_img_in, origin, 
								region, (size_t)0, &vecEvPrevTextToBuffer,&evCopyInGlToIn);

            toc(tm_copyGlToBuffer);

			if (WRITE) {//Writes the init image on the temporal folder
                // Sets the precision of cout to 2
                cout << std::setprecision(3) << endl;
				vecEvPrevPrinting.push_back(evCopyInGlToIn);
				res = queue->enqueueReadBuffer(buf_img_in, CL_TRUE, 0,
						sizeof (float) *width*height*depth, (void*) arr_img_out, &vecEvPrevPrinting, 0);
				
				bool normalized_values = 1;
				dout << "Done copying texture to buffer.... writing result to images/temp_results/InputImage/" << endl;
				ImageManager::write3DImage((char*) "images/temp_results/InputImage/",
						arr_img_out, width, height,depth, normalized_values);

				/* Just to test that the TEXTURE is being copied to img_in_gl correctly*/
				/*
				int rowSize = sizeof(float)*width;
				res = queue->enqueueReadImage(img_in_gl, CL_FALSE, origin, region, (size_t) rowSize ,
						(size_t)  (rowSize*height), (void*) arr_img_out, &vecEvPrevPrinting, 0);
				
				queue->finish(); //Finish everything before the iterations
				
				ImageManager::write3DImage((char*) "images/temp_results/3dTexture/",
						 arr_img_out, width, height,depth,  normalized_values);

				queue->finish(); //Finish everything before the iterations
				dout << "Writing done!!!!" << endl;
				*/
			}
			vecEvPrevAvgInOut.push_back(evCopyInGlToIn); //For the first iteration we need to wait to copy the texture
		}//If iter == 0

        //Compute the last iteration of this 'round'
        int lastIter = min(currIter + numIterations, totalIterations);
		
        // -------------------- MAIN Active Countours iteration
        for (; currIter < lastIter; currIter++) {
			
            if (currIter % ITER == 0) {
                dout << endl << endl << "******************** Iter " << currIter << " ******************** " << endl;
            }

            tic(tm_avgInOut);
            evAvgInOut_SmoothPhi = compAvgInAndOut(buf_phi, buf_img_in, vecEvPrevAvgInOut);
            toc(tm_avgInOut);

            if (WRITE) {// Prints the previous values of phi
                cout << endl << "----------- Previous Phi ------------" << endl;
				vecEvPrevPrinting.clear();
				vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi);
                printBuffer(buf_phi, width*height, width*height*9, width, height, vecEvPrevPrinting);
                printBuffer(buf_phi, width*height, width*height*16, width, height, vecEvPrevPrinting);
                printBuffer(buf_phi, width*height, width*height*28, width, height, vecEvPrevPrinting);
			}		

            if (WRITE) {// Gets the final average values obtained
                cout << endl << "----------- Final Average  (avg out, avg in, count out, count in,  sum out, sum in)------------" << endl;
				vecEvPrevPrinting.clear();
				vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi);
				printBuffer(buf_avg_in_out, 6, vecEvPrevPrinting);
			}
			
            //It computes the curvatue and F values, the curvature is stored on the first layer
            //and the F values are stored on the second layer
            tic(tm_curvature);
            evCurvature_copySmoothToPhi = compCurvature(vecEvPrevCurvature);
            toc(tm_curvature);
			
            if (WRITE) {
                cout << "--------------------Displaying the value of curvature..." << endl;
				vecEvPrevPrinting.clear();
				vecEvPrevPrinting.push_back(evCurvature_copySmoothToPhi);
				//printBuffer(buf_curvature, 10, vecEvPrevPrinting);
                printBufferArray(buf_curvature, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray);

            }
			
            // Computing the maximum F value (max value of:
            // pow( curr_img - avgIn, 2) - pow( curr_img - avgOut, 2))
            
            vecEvPrevF.push_back(evAvgInOut_SmoothPhi);//Wait to compute the average in and out
            tic(tm_F);
			evF = compF(vecEvPrevF);
            toc(tm_F);

            if (WRITE) {
                cout << "--------------------Displaying the value of F ..." << endl;
				vecEvPrevPrinting.clear();
				vecEvPrevPrinting.push_back(evF);
                //printBuffer(buf_F, width*height, 0, width, height, vecEvPrevPrinting);
                printBufferArray(buf_F, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray);
            }

            //Computing maximum value of F
            vecEvPrevMaxF.push_back(evF);
            tic(tm_maxF);
            evMaxF = compReduce(buf_F, buf_max_F, true, vecEvPrevMaxF); // Use abs value 
            toc(tm_maxF);

            if (WRITE) {
                cout << "--------------------Displaying max value of F ..." << endl;
                vecEvPrevPrinting.clear();
                vecEvPrevPrinting.push_back(evF);
                printBuffer(buf_max_F, 1, vecEvPrevPrinting);
            }

            vecEvPrevDphiDt.push_back(evCurvature_copySmoothToPhi);// Wait for curvature
            vecEvPrevDphiDt.push_back(evMaxF);// Wait for max F -> and F
            tic(tm_DphiDt);
            evDphiDt_MaxDphiDt = compDphiDt(vecEvPrevDphiDt);
            toc(tm_DphiDt);

            if (WRITE) {
                cout << "--------------------Displaying values of Dphi/dt ..." << endl;
                vecEvPrevPrinting.clear();
                vecEvPrevPrinting.push_back(evDphiDt_MaxDphiDt);
                //printBuffer(buf_dphidt, width*height, 0, width, height, vecEvPrevPrinting);
                printBufferArray(buf_dphidt, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray);
            }

            vecEvPrevMaxDphiDt.push_back(evDphiDt_MaxDphiDt);
            tic(tm_maxDphiDt);
            evDphiDt_MaxDphiDt = compReduce(buf_dphidt, buf_max_dphidt, false, vecEvPrevMaxDphiDt ); 
            toc(tm_maxDphiDt);

            if (WRITE) {
                cout << "--------------------Displaying Max Dphi/dt ..." << endl;
                vecEvPrevPrinting.clear();
                vecEvPrevPrinting.push_back(evDphiDt_MaxDphiDt);
                printBuffer(buf_max_dphidt, 1, vecEvPrevPrinting);
            }

            vecEvPrevNewPhi.push_back(evDphiDt_MaxDphiDt);
            tic(tm_phi);
            evSDF_newPhi = compNewPhi(vecEvPrevNewPhi); //This phi without smooth term
            toc(tm_phi);

            if (WRITE) {
                cout << "--------------------Displaying values of new phi ..." << endl;
                vecEvPrevPrinting.clear();
                vecEvPrevPrinting.push_back(evSDF_newPhi);
                //printBuffer(buf_phi, width*height, width*height*7, width, height, vecEvPrevPrinting);
                printBufferArray(buf_phi, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray);
            }

            vecEvPrevSmPhi.push_back(evSDF_newPhi);
            tic(tm_smoothPhi);
            evAvgInOut_SmoothPhi = smoothPhi(vecEvPrevSmPhi, dt_smooth); //This phi without smooth term
            toc(tm_smoothPhi);

            if (WRITE) {
                cout << "--------------------Displaying values of smoothed phi ..." << endl;
                vecEvPrevPrinting.clear();
                vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi);
                printBufferArray(buf_smooth_phi, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray);
            }

            vecEvPrevCopySmoothToPhi.push_back(evAvgInOut_SmoothPhi);
            tic(tm_copySmoothPhi);
            res = queue->enqueueCopyBuffer(buf_smooth_phi, buf_phi,
                    (size_t)0, (size_t) 0,  (size_t) sizeof (float) *buf_size,
                    &vecEvPrevCopySmoothToPhi, &evCurvature_copySmoothToPhi);
            toc(tm_copySmoothPhi);

            vecEvPrevAvgInOut.push_back(evCurvature_copySmoothToPhi);

            vecEvPrevAvgInOut.clear();
            vecEvPrevAvg.clear();
            vecEvPrevCurvature.clear();
            vecEvPrevF.clear();
            vecEvPrevMaxF.clear();
            vecEvPrevDphiDt.clear();
            vecEvPrevMaxDphiDt.clear();
            vecEvPrevNewPhi.clear();
            vecEvPrevSmPhi.clear();
            vecEvPrevSDF.clear();
            vecEvPrevPrinting.clear();
            vecEvPrevTextToBuffer.clear();

        }//Main loop

        queue->finish(); //Be sure we finish everything
        dout << "Done ..................." << endl;

        if (WRITE) {
            cout << "--------------------Writing new PHI as images in images/temp_results/newPhi/" << endl;

            vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi);
            //Reads from buf_phi (GPU) and writes to arr_img_out (Host)
            res = queue->enqueueReadBuffer(buf_smooth_phi, CL_TRUE, 0, sizeof (float) *buf_size, 
                    (void*) arr_img_out, &vecEvPrevPrinting, 0);
            // Prints image into png file
            ImageManager::write3DImage((char*) "images/temp_results/newPhi/", arr_img_out, width, height, depth, 0);
        }

        dout << " Copying back everything to OpenGL ... " << endl;
        vecEvPrevCopyPhiBackToGL.push_back(evAvgInOut_SmoothPhi);
        tic(tm_bufToGL);
        queue->enqueueCopyBufferToImage(buf_smooth_phi, img_phi_gl, (size_t)0, origin, 
                region, &vecEvPrevCopyPhiBackToGL, &evAcOGL);
        toc(tm_bufToGL);
        queue->finish(); //Be sure we finish everything
        err = queue->enqueueReleaseGLObjects(&cl_textures, NULL, 0);

    } catch (cl::Error ex) {
        cout << "EXCEPTION" << endl;
        clMan.printError(ex);
        return;
    }
}
Esempio n. 26
0
File: micro.c Progetto: awf/Coconut
int main(int argc, char** argv)
{
	if(argc != 2) {
		printf("You should use the following format for running this program: %s <Number of Iterations>\n", argv[0]);
		exit(1);
	}
	int N = atoi(argv[1]);
	int rng = 42;
    srand(rng);

	array_number_t vec1 = vector_fill(DIM, 0.0);
	array_number_t vec2 = vector_fill(DIM, 0.0);
	array_number_t vec3 = vector_fill(DIM, 0.0);
	for(int i=0; i<DIM; i++) {
		vec1->arr[i] = dist(rng);
		vec2->arr[i] = dist(rng);
		vec3->arr[i] = dist(rng);
	}

#ifdef HOIST
	storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM));
#endif
	
    timer_t t = tic();

    double total = 0;
    for (int count = 0; count < N; ++count) {
        vec1->arr[0] += 1.0 / (2.0 + vec1->arr[0]);
        vec2->arr[10] += 1.0 / (2.0 + vec2->arr[10]);
#ifdef DPS
#ifndef HOIST
	storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM));
#endif
#endif
#ifdef ADD3
    #ifdef DPS
        total += vectorSum(TOP_LEVEL_linalg_vectorAdd3_dps(s, vec1, vec2, vec3, DIM, DIM, DIM));
	#else
        total += vectorSum(TOP_LEVEL_linalg_vectorAdd3(vec1, vec2, vec3));
	#endif
#elif DOT
	#ifdef DPS
        total += TOP_LEVEL_linalg_dot_prod_dps(s, vec1, vec2, DIM, DIM);
	#else
        total += TOP_LEVEL_linalg_dot_prod(vec1, vec2);
	#endif
#elif CROSS
    #ifdef DPS
        total += vectorSum(TOP_LEVEL_linalg_cross_dps(s, vec1, vec2, DIM, DIM));
	#else
        total += vectorSum(TOP_LEVEL_linalg_cross(vec1, vec2));
	#endif
#endif
#ifdef DPS
#ifndef HOIST
	storage_free(s, VECTOR_ALL_BYTES(DIM));
#endif
#endif
    }
    float elapsed = toc2(t);
    printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N));
	return 0;
}
float*
mirageaudio_decode(MirageAudio *ma, const gchar *file, int *frames, int* size, int* ret)
{
    GstBus *bus;
    tic();

    ma->fftwsamples = 0;
    ma->curhop = 0;
    ma->cursample = 0;
    ma->quit = FALSE;

    g_mutex_lock(ma->decoding_mutex);
    ma->invalidate = FALSE;
    g_mutex_unlock(ma->decoding_mutex);

    // Gstreamer setup
    mirageaudio_initgstreamer(ma, file);
    if (ma->filerate < 0) {
        *size = 0;
        *frames = 0;
        *ret = -1;

        // Gstreamer cleanup
        gst_element_set_state(ma->pipeline, GST_STATE_NULL);
        gst_object_unref(GST_OBJECT(ma->pipeline));

        return NULL;
    }

    // libsamplerate initialization
    ma->src_data.src_ratio = (double)ma->rate/(double)ma->filerate;
    ma->src_data.input_frames = 0;
    ma->src_data.end_of_input = 0;
    src_reset(ma->src_state);
    g_print("libmirageaudio: rate=%d, resampling=%f\n", ma->filerate, ma->src_data.src_ratio);

    // decode...
    gst_element_set_state(ma->pipeline, GST_STATE_PLAYING);
    g_print("libmirageaudio: decoding %s\n", file);


    bus = gst_pipeline_get_bus(GST_PIPELINE(ma->pipeline));
    gboolean decoding = TRUE;
    *ret = 0;
    while (decoding) {
        GstMessage* message = gst_bus_timed_pop_filtered(bus, GST_MSECOND*100,
                GST_MESSAGE_ERROR | GST_MESSAGE_EOS);

        if (message == NULL)
            continue;

        switch (GST_MESSAGE_TYPE(message)) {
            case GST_MESSAGE_ERROR: {
                GError *err;
                gchar *debug;

                gst_message_parse_error(message, &err, &debug);
                g_print("libmirageaudio: error: %s\n", err->message);
                g_error_free(err);
                g_free(debug);
                ma->curhop = 0;
                decoding = FALSE;
                *ret = -1;

                break;
            }
            case GST_MESSAGE_EOS: {
                g_print("libmirageaudio: EOS Message received\n");
                decoding = FALSE;
                break;
            }
            default:
                break;
        }
        gst_message_unref(message);
    }
    gst_object_unref(bus);


    g_mutex_lock(ma->decoding_mutex);

    // Gstreamer cleanup
    gst_element_set_state(ma->pipeline, GST_STATE_NULL);
    gst_object_unref(GST_OBJECT(ma->pipeline));

    toc();

    if (ma->invalidate) {
        *size = 0;
        *frames = 0;
        *ret = -2;
    } else {
        *size = ma->winsize/2 + 1;
        *frames = ma->curhop;
    }

    g_mutex_unlock(ma->decoding_mutex);

    g_print("libmirageaudio: frames=%d (maxhops=%d), size=%d\n", *frames, ma->hops, *size);
    return ma->out;
}
Esempio n. 28
0
/*
 * Sets up all data structures needed.
 * Replace by codegen
 */
pwork* ECOS_setup(idxint n, idxint m, idxint p, idxint l, idxint ncones, idxint* q,
                   pfloat* Gpr, idxint* Gjc, idxint* Gir,
                   pfloat* Apr, idxint* Ajc, idxint* Air,
                   pfloat* c, pfloat* h, pfloat* b)
{
    idxint i, j, k, cidx, conesize, lnz, amd_result, nK, *Ljc, *Lir, *P, *Pinv, *Sign;
    pwork* mywork;
	double Control [AMD_CONTROL], Info [AMD_INFO];		
	pfloat rx, ry, rz, *Lpr;
	spmat *At, *Gt, *KU;

#if PROFILING > 0
	timer tsetup;
#endif

#if PROFILING > 1
	timer tcreatekkt;
	timer tmattranspose;
	timer tordering;
#endif

#if PROFILING > 0
	tic(&tsetup);
#endif
   
#if PRINTLEVEL > 2
	PRINTTEXT("\n");		
	PRINTTEXT("  *******************************************************************************\n");
	PRINTTEXT("  * ECOS: Embedded Conic Solver - Sparse Interior Point method for SOCPs        *\n");
	PRINTTEXT("  *                                                                             *\n");
	PRINTTEXT("  * NOTE: The solver is based on L. Vandenberghe's 'The CVXOPT linear and quad- *\n");
	PRINTTEXT("  *       ratic cone program solvers', March 20, 2010. Available online:        *\n");
	PRINTTEXT("  *       [http://abel.ee.ucla.edu/cvxopt/documentation/coneprog.pdf]           *\n");
	PRINTTEXT("  *                                                                             *\n");
	PRINTTEXT("  *       This code uses T.A. Davis' sparse LDL package and AMD code.           *\n");
	PRINTTEXT("  *       [http://www.cise.ufl.edu/research/sparse]                             *\n");
	PRINTTEXT("  *                                                                             *\n");
	PRINTTEXT("  *       Written during a summer visit at Stanford University with S. Boyd.    *\n");
	PRINTTEXT("  *                                                                             *\n");
	PRINTTEXT("  * (C) Alexander Domahidi, Automatic Control Laboratory, ETH Zurich, 2012-13.  *\n");
	PRINTTEXT("  *                     Email: [email protected]                      *\n");
	PRINTTEXT("  *******************************************************************************\n");
	PRINTTEXT("\n\n");
    PRINTTEXT("PROBLEM SUMMARY:\n");
    PRINTTEXT("    Primal variables (n): %d\n", (int)n);
	PRINTTEXT("Equality constraints (p): %d\n", (int)p);
	PRINTTEXT("     Conic variables (m): %d\n", (int)m);
	PRINTTEXT("- - - - - - - - - - - - - - -\n");
    PRINTTEXT("         Size of LP cone: %d\n", (int)l);
    PRINTTEXT("          Number of SOCs: %d\n", (int)ncones);
    for( i=0; i<ncones; i++ ){
        PRINTTEXT("    Size of SOC #%02d: %d\n", (int)(i+1), (int)q[i]);
    }
#endif
	
	/* get work data structure */
    mywork = (pwork *)MALLOC(sizeof(pwork));
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for WORK struct\n");
#endif

	/* dimensions */
	mywork->n = n;
	mywork->m = m;
	mywork->p = p;
    mywork->D = l + ncones;
#if PRINTLEVEL > 2
    PRINTTEXT("Set dimensions\n");
#endif

	/* variables */
    mywork->x = (pfloat *)MALLOC(n*sizeof(pfloat));
    mywork->y = (pfloat *)MALLOC(p*sizeof(pfloat));
    mywork->z = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->s = (pfloat *)MALLOC(m*sizeof(pfloat));
  	mywork->lambda = (pfloat *)MALLOC(m*sizeof(pfloat));
	mywork->dsaff_by_W = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->dsaff = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->dzaff = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->saff = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->zaff = (pfloat *)MALLOC(m*sizeof(pfloat));
	mywork->W_times_dzaff = (pfloat *)MALLOC(m*sizeof(pfloat));
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for variables\n");
#endif
    
    /* best iterates so far */
    mywork->best_x = (pfloat *)MALLOC(n*sizeof(pfloat));
    mywork->best_y = (pfloat *)MALLOC(p*sizeof(pfloat));
    mywork->best_z = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->best_s = (pfloat *)MALLOC(m*sizeof(pfloat));
    mywork->best_info = (stats *)MALLOC(sizeof(stats));

	/* cones */
	mywork->C = (cone *)MALLOC(sizeof(cone));
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for cone struct\n");
#endif

	/* LP cone */
	mywork->C->lpc = (lpcone *)MALLOC(sizeof(lpcone));
	mywork->C->lpc->p = l;
	if( l > 0 ){
		mywork->C->lpc->w = (pfloat *)MALLOC(l*sizeof(pfloat));
		mywork->C->lpc->v = (pfloat *)MALLOC(l*sizeof(pfloat));
		mywork->C->lpc->kkt_idx = (idxint *)MALLOC(l*sizeof(idxint));
#if PRINTLEVEL > 2
        PRINTTEXT("Memory allocated for LP cone\n");
#endif
	} else {
		mywork->C->lpc->w = NULL;
		mywork->C->lpc->v = NULL;
		mywork->C->lpc->kkt_idx = NULL;
#if PRINTLEVEL > 2
        PRINTTEXT("No LP cone present, pointers filled with NULL\n");
#endif
	}


	/* Second-order cones */
	mywork->C->soc = (socone *)MALLOC(ncones*sizeof(socone));
	mywork->C->nsoc = ncones;
    cidx = 0;
    for( i=0; i<ncones; i++ ){
        conesize = (idxint)q[i];
        mywork->C->soc[i].p = conesize;
        mywork->C->soc[i].a = 0;
		mywork->C->soc[i].eta = 0;
        mywork->C->soc[i].q = (pfloat *)MALLOC((conesize-1)*sizeof(pfloat));
		mywork->C->soc[i].skbar = (pfloat *)MALLOC((conesize)*sizeof(pfloat));
		mywork->C->soc[i].zkbar = (pfloat *)MALLOC((conesize)*sizeof(pfloat));
#if CONEMODE == 0
        mywork->C->soc[i].Didx = (idxint *)MALLOC((conesize)*sizeof(idxint));
#endif 
#if CONEMODE > 0
        mywork->C->soc[i].colstart = (idxint *)MALLOC((conesize)*sizeof(idxint));
#endif
        cidx += conesize;
    }
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for second-order cones\n");
#endif

	/* info struct */
    mywork->info = (stats *)MALLOC(sizeof(stats));
#if PROFILING > 1
	mywork->info->tfactor = 0;
	mywork->info->tkktsolve = 0;
    mywork->info->tfactor_t1 = 0;
    mywork->info->tfactor_t2 = 0;
#endif
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for info struct\n");
#endif

    
#if defined EQUILIBRATE && EQUILIBRATE > 0
    /* equilibration vector */
    mywork->xequil = (pfloat *)MALLOC(n*sizeof(pfloat));
    mywork->Aequil = (pfloat *)MALLOC(p*sizeof(pfloat));
    mywork->Gequil = (pfloat *)MALLOC(m*sizeof(pfloat));
    
#if PRINTLEVEL > 2
    PRINTTEXT("Memory allocated for equilibration vectors\n");
#endif
#endif

	/* settings */
	mywork->stgs = (settings *)MALLOC(sizeof(settings));
	mywork->stgs->maxit = MAXIT;
	mywork->stgs->gamma = GAMMA;	
	mywork->stgs->delta = DELTA;
    mywork->stgs->eps = EPS;
	mywork->stgs->nitref = NITREF;
	mywork->stgs->abstol = ABSTOL;	
	mywork->stgs->feastol = FEASTOL;
	mywork->stgs->reltol = RELTOL;
    mywork->stgs->abstol_inacc = ATOL_INACC;
	mywork->stgs->feastol_inacc = FTOL_INACC;
	mywork->stgs->reltol_inacc = RTOL_INACC;
    mywork->stgs->verbose = VERBOSE;
#if PRINTLEVEL > 2
    PRINTTEXT("Written settings\n");
#endif

    mywork->c = c;
    mywork->h = h;
    mywork->b = b;
#if PRINTLEVEL > 2
    PRINTTEXT("Hung pointers for c, h and b into WORK struct\n");
#endif

    /* Store problem data */
  if(Apr && Ajc && Air) {
    mywork->A = createSparseMatrix(p, n, Ajc[n], Ajc, Air, Apr);
  } else {
    mywork->A = NULL;
  }
  if (Gpr && Gjc && Gir) {
	  mywork->G = createSparseMatrix(m, n, Gjc[n], Gjc, Gir, Gpr);
  } else {
    /* create an empty sparse matrix */
	mywork->G = createSparseMatrix(m, n, 0, Gjc, Gir, Gpr);
  }

#if defined EQUILIBRATE && EQUILIBRATE > 0
    set_equilibration(mywork);
    #if PRINTLEVEL > 2
        PRINTTEXT("Done equilibrating\n");
    #endif
#endif

#if PROFILING > 1
	mywork->info->ttranspose = 0;
	tic(&tmattranspose);
#endif
  if(mywork->A)
	  At = transposeSparseMatrix(mywork->A);
  else
    At = NULL;
#if PROFILING > 1	
	mywork->info->ttranspose += toc(&tmattranspose);
#endif
#if PRINTLEVEL > 2
    PRINTTEXT("Transposed A\n");
#endif
    
    
#if PROFILING > 1	
	tic(&tmattranspose);
#endif
	Gt = transposeSparseMatrix(mywork->G);    	
#if PROFILING > 1	
	mywork->info->ttranspose += toc(&tmattranspose);
#endif
#if PRINTLEVEL > 2
    PRINTTEXT("Transposed G\n");
#endif
    


     
  
    /* set up KKT system */
#if PROFILING > 1
	tic(&tcreatekkt);
#endif
	createKKT_U(Gt, At, mywork->C, &Sign, &KU);
#if PROFILING > 1
	mywork->info->tkktcreate = toc(&tcreatekkt);
#endif
#if PRINTLEVEL > 2
    PRINTTEXT("Created upper part of KKT matrix K\n");
#endif
    
    
	/* 
     * Set up KKT system related data
     * (L comes later after symbolic factorization) 
     */
    nK = KU->n;
    
#if DEBUG > 0
    dumpSparseMatrix(KU, "KU0.txt");
#endif
#if PRINTLEVEL > 2
    PRINTTEXT("Dimension of KKT matrix: %d\n", (int)nK);
    PRINTTEXT("Non-zeros in KKT matrix: %d\n", (int)KU->nnz);
#endif
    
    
    
    /* allocate memory in KKT system */
	mywork->KKT = (kkt *)MALLOC(sizeof(kkt));
	mywork->KKT->D = (pfloat *)MALLOC(nK*sizeof(pfloat));
	mywork->KKT->Parent = (idxint *)MALLOC(nK*sizeof(idxint));
	mywork->KKT->Pinv = (idxint *)MALLOC(nK*sizeof(idxint));
	mywork->KKT->work1 = (pfloat *)MALLOC(nK*sizeof(pfloat));
	mywork->KKT->work2 = (pfloat *)MALLOC(nK*sizeof(pfloat));
    mywork->KKT->work3 = (pfloat *)MALLOC(nK*sizeof(pfloat));
    mywork->KKT->work4 = (pfloat *)MALLOC(nK*sizeof(pfloat));
    mywork->KKT->work5 = (pfloat *)MALLOC(nK*sizeof(pfloat));
    mywork->KKT->work6 = (pfloat *)MALLOC(nK*sizeof(pfloat));
	mywork->KKT->Flag = (idxint *)MALLOC(nK*sizeof(idxint));	
	mywork->KKT->Pattern = (idxint *)MALLOC(nK*sizeof(idxint));
	mywork->KKT->Lnz = (idxint *)MALLOC(nK*sizeof(idxint));	
	mywork->KKT->RHS1 = (pfloat *)MALLOC(nK*sizeof(pfloat));
	mywork->KKT->RHS2 = (pfloat *)MALLOC(nK*sizeof(pfloat));
	mywork->KKT->dx1 = (pfloat *)MALLOC(mywork->n*sizeof(pfloat));
	mywork->KKT->dx2 = (pfloat *)MALLOC(mywork->n*sizeof(pfloat));
	mywork->KKT->dy1 = (pfloat *)MALLOC(mywork->p*sizeof(pfloat));
	mywork->KKT->dy2 = (pfloat *)MALLOC(mywork->p*sizeof(pfloat));
	mywork->KKT->dz1 = (pfloat *)MALLOC(mywork->m*sizeof(pfloat));
	mywork->KKT->dz2 = (pfloat *)MALLOC(mywork->m*sizeof(pfloat));
    mywork->KKT->Sign = (idxint *)MALLOC(nK*sizeof(idxint));
    mywork->KKT->PKPt = newSparseMatrix(nK, nK, KU->nnz);
	mywork->KKT->PK = (idxint *)MALLOC(KU->nnz*sizeof(idxint));

#if PRINTLEVEL > 2
    PRINTTEXT("Created memory for KKT-related data\n");    
#endif
    
    
    /* calculate ordering of KKT matrix using AMD */
	P = (idxint *)MALLOC(nK*sizeof(idxint));
#if PROFILING > 1
	tic(&tordering);
#endif
	AMD_defaults(Control);	
	amd_result = AMD_order(nK, KU->jc, KU->ir, P, Control, Info);	
#if PROFILING > 1	
	mywork->info->torder = toc(&tordering);
#endif

	if( amd_result == AMD_OK ){
#if PRINTLEVEL > 2
		PRINTTEXT("AMD ordering successfully computed.\n");
		AMD_info(Info);
#endif
	} else {
#if PRINTLEVEL > 2
		PRINTTEXT("Problem in AMD ordering, exiting.\n");
        AMD_info(Info);
#endif
        return NULL;
	}
	
	/* calculate inverse permutation and permutation mapping of KKT matrix */
	pinv(nK, P, mywork->KKT->Pinv);		
	Pinv = mywork->KKT->Pinv;
#if DEBUG > 0
    dumpDenseMatrix_i(P, nK, 1, "P.txt");
    dumpDenseMatrix_i(mywork->KKT->Pinv, nK, 1, "PINV.txt");
#endif
	permuteSparseSymmetricMatrix(KU, mywork->KKT->Pinv, mywork->KKT->PKPt, mywork->KKT->PK);

	/* permute sign vector */
    for( i=0; i<nK; i++ ){ mywork->KKT->Sign[Pinv[i]] = Sign[i]; }
#if PRINTLEVEL > 3
    PRINTTEXT("P = [");
    for( i=0; i<nK; i++ ){ PRINTTEXT("%d ", (int)P[i]); }
    PRINTTEXT("];\n");
    PRINTTEXT("Pinv = [");
    for( i=0; i<nK; i++ ){ PRINTTEXT("%d ", (int)Pinv[i]); }
    PRINTTEXT("];\n");
    PRINTTEXT("Sign = [");
    for( i=0; i<nK; i++ ){ PRINTTEXT("%+d ", (int)Sign[i]); }
    PRINTTEXT("];\n");
    PRINTTEXT("SignP = [");
    for( i=0; i<nK; i++ ){ PRINTTEXT("%+d ", (int)mywork->KKT->Sign[i]); }
    PRINTTEXT("];\n");
#endif
	
    
	
	/* symbolic factorization */	
	Ljc = (idxint *)MALLOC((nK+1)*sizeof(idxint));
#if PRINTLEVEL > 2
    PRINTTEXT("Allocated memory for cholesky factor L\n");
#endif    
	LDL_symbolic2(
		mywork->KKT->PKPt->n,    /* A and L are n-by-n, where n >= 0 */
		mywork->KKT->PKPt->jc,   /* input of size n+1, not modified */
		mywork->KKT->PKPt->ir,	 /* input of size nz=Ap[n], not modified */
		Ljc,					 /* output of size n+1, not defined on input */
		mywork->KKT->Parent,	 /* output of size n, not defined on input */
		mywork->KKT->Lnz,		 /* output of size n, not defined on input */
		mywork->KKT->Flag		 /* workspace of size n, not defn. on input or output */
	);
	

	/* assign memory for L */
	lnz = Ljc[nK];
#if PRINTLEVEL > 2
	PRINTTEXT("Nonzeros in L, excluding diagonal: %d\n", (int)lnz) ;
#endif
	Lir = (idxint *)MALLOC(lnz*sizeof(idxint));
	Lpr = (pfloat *)MALLOC(lnz*sizeof(pfloat));
	mywork->KKT->L = createSparseMatrix(nK, nK, lnz, Ljc, Lir, Lpr);
#if PRINTLEVEL > 2
	PRINTTEXT("Created Cholesky factor of K in KKT struct\n");
#endif
    

	/* permute KKT matrix - we work on this one from now on */
	permuteSparseSymmetricMatrix(KU, mywork->KKT->Pinv, mywork->KKT->PKPt, NULL);
#if DEBUG > 0
    dumpSparseMatrix(mywork->KKT->PKPt, "PKPt.txt");
#endif
    
#if CONEMODE > 0
    /* zero any off-diagonal elements in (permuted) scalings in KKT matrix */
    for (i=0; i<mywork->C->nsoc; i++) {
        for (j=1; j<mywork->C->soc[i].p; j++) {
            for (k=0; k<j; k++) {
                mywork->KKT->PKPt->pr[mywork->KKT->PK[mywork->C->soc[i].colstart[j]+k]] = 0;
            }
        }
    }
#endif
#if DEBUG > 0
     dumpSparseMatrix(mywork->KKT->PKPt, "PKPt0.txt");
#endif

	/* set up RHSp for initialization */
	k = 0; j = 0;
	for( i=0; i<n; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = 0; }
	for( i=0; i<p; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = b[i]; }
	for( i=0; i<l; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = h[i]; j++; }
	for( l=0; l<ncones; l++ ){ 
		for( i=0; i < mywork->C->soc[l].p; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = h[j++]; }
#if CONEMODE == 0
		mywork->KKT->RHS1[Pinv[k++]] = 0;
        mywork->KKT->RHS1[Pinv[k++]] = 0;
#endif
	}
#if PRINTLEVEL > 2
    PRINTTEXT("Written %d entries of RHS1\n", (int)k);
#endif
	
	/* set up RHSd for initialization */
	for( i=0; i<n; i++ ){ mywork->KKT->RHS2[Pinv[i]] = -c[i]; }
	for( i=n; i<nK; i++ ){ mywork->KKT->RHS2[Pinv[i]] = 0; }

	/* get scalings of problem data */
	rx = norm2(c, n); mywork->resx0 = MAX(1, rx);
	ry = norm2(b, p); mywork->resy0 = MAX(1, ry);
	rz = norm2(h, m); mywork->resz0 = MAX(1, rz);

	/* get memory for residuals */
	mywork->rx = (pfloat *)MALLOC(n*sizeof(pfloat));
	mywork->ry = (pfloat *)MALLOC(p*sizeof(pfloat));
	mywork->rz = (pfloat *)MALLOC(m*sizeof(pfloat));
	
    /* clean up */
    mywork->KKT->P = P;
	FREE(Sign);
  if(At) freeSparseMatrix(At);
	freeSparseMatrix(Gt);
	freeSparseMatrix(KU);
    
#if PROFILING > 0
	mywork->info->tsetup = toc(&tsetup);
#endif

    return mywork;
}
Esempio n. 29
0
void lingot_signal_test() {

	int N = 16;
	int i = 0;
	int n = 5;
	FLT* spd = malloc(N * sizeof(FLT));
	FLT* noise = malloc(N * sizeof(FLT));

	for (i = 0; i < N; i++) {
		spd[i] = i + 1;
		noise[i] = -1.0;
	}

	lingot_signal_compute_noise_level(spd, N, n, noise);

	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");
	printf("N = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", noise[i]);
	}
	printf("] \n");

	puts("done.");

	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");
//	assert(lingot_signal_quick_select(spd, 1) == 1.0);
//	assert(lingot_signal_quick_select(spd, 2) == 1.0);
//	assert(lingot_signal_quick_select(spd, 3) == 2.0);
//	assert(lingot_signal_quick_select(spd, 4) == 2.0);
//	assert(lingot_signal_quick_select(spd, 5) == 3.0);
//	assert(lingot_signal_quick_select(spd, 6) == 3.0);
	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");

	free(spd);
	free(noise);

	N = 512;
	i = 0;
	n = 30;
	spd = malloc(N * sizeof(FLT));
	noise = malloc(N * sizeof(FLT));

	for (i = 0; i < N; i++) {
		spd[i] = N - i;
		noise[i] = -1.0;
	}

	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");

	double m;

	tic();
//	m = lingot_signal_quick_select(spd, 512);
	toc();

	printf("m = %f\n", m);

	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");

	tic();
//	m = lingot_signal_quick_select(spd, 512);
	toc();

	printf("m = %f\n", m);

	printf("S = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", spd[i]);
	}
	printf("] \n");

	// -----------------

	for (i = 0; i < N; i++) {
		spd[i] = N - i;
		noise[i] = -1.0;
	}

	tic();
	lingot_signal_compute_noise_level(spd, N, n, noise);
	toc();

	printf("N = [");
	for (i = 0; i < N; i++) {
		printf(" %f ", noise[i]);
	}
	printf("] \n");

	tic();
	for (i = 0; i < 10000; i++) {
		lingot_signal_compute_noise_level(spd, N, n, noise);
	}
	toc();

	free(spd);
	free(noise);
}
Esempio n. 30
0
Timer::Timer()
{
    tic();
}