WrapImpl *WrapImpl::getWrapImpl(WrapImplType t) { m_wrapImplType = t; FILE *stream = stdout; WrapImpl *w = NULL; if ((t == NoSCM) || (t == NoGuarantee) || (t == NoAtomicity) || (t == MemCheck) || (t == Wrap_Hardware)) { if (t == MemCheck) fprintf(stream, "MemCheck\n"); else if (t == NoAtomicity) fprintf(stream, "NoAtomicity\n"); else if (t == NoSCM) fprintf(stream, "NoSCM\n"); else if (t == NoGuarantee) fprintf(stream, "NoGuarantee\n"); else if (t == Wrap_Hardware) fprintf(stream, "Wrap_Hardware\n"); else assert(0); w = new WrapImpl(); } if (t == UndoLog) { fprintf(stream, "UndoLog\n"); w = new WrapImplUndoLog(); } if (t == Wrap_Software) { fprintf(stream, "Software\n"); w = new WrapImplSoftware(); } tic(); totalWrapTime = getTime(); return w; }
scs_int solveLinSys(const AMatrix * A, const Settings * stgs, Priv * p, scs_float * b, const scs_float * s, scs_int iter) { scs_int cgIts; scs_float cgTol = calcNorm(b, A->n) * (iter < 0 ? CG_BEST_TOL : CG_MIN_TOL / POWF((scs_float) iter + 1, stgs->cg_rate)); tic(&linsysTimer); /* solves Mx = b, for x but stores result in b */ /* s contains warm-start (if available) */ accumByAtrans(A, p, &(b[A->n]), b); /* solves (I+A'A)x = b, s warm start, solution stored in b */ cgIts = pcg(A, stgs, p, s, b, A->n, MAX(cgTol, CG_BEST_TOL)); scaleArray(&(b[A->n]), -1, A->m); accumByA(A, p, b, &(b[A->n])); if (iter >= 0) { totCgIts += cgIts; } totalSolveTime += tocq(&linsysTimer); #if EXTRAVERBOSE > 0 scs_printf("linsys solve time: %1.2es\n", tocq(&linsysTimer) / 1e3); #endif return 0; }
int main(int argc, char **argv) { ref_vector X, B, Bi; vector C, C1; comp_vector S, Si, Scomp, Scompi; comp_vector R, Ri, Rcomp, Rcompi; comp_matrix O, Oi; int s_ratio; exome ex; check_syntax(argc, 5, "preprocess_debug ref_file output_dir s_ratio nucleotides"); timevars(); init_replace_table(argv[4]); s_ratio = atoi(argv[3]); encode_reference(&X, &ex, true, argv[1]); save_exome_file(&ex, argv[2]); tic("Calculating BWT"); calculateBWTdebug(&B, &S, &X, 0); toc(); save_ref_vector(&X, argv[2], "X"); print_vector(S.vector, S.n); print_vector(B.vector, B.n); tic("Calculating prefix-trie matrices C and O"); calculate_C(&C, &C1, &B); calculate_O(&O, &B); toc(); print_vector(C.vector, C.n); print_vector(C1.vector, C1.n); print_comp_matrix(O); save_ref_vector(&B, argv[2], "B"); free(B.vector); save_vector(&C, argv[2], "C"); free(C.vector); save_vector(&C1, argv[2], "C1"); free(C1.vector); save_comp_matrix(&O, argv[2], "O"); free_comp_matrix(NULL, &O); tic("Calculating R"); calculate_R(&R, &S); toc(); print_vector(R.vector, R.n); tic("Calculating Scomp Rcomp"); compress_SR(&S, &Scomp, s_ratio); print_vector(Scomp.vector, Scomp.n); compress_SR(&R, &Rcomp, s_ratio); print_vector(Rcomp.vector, Rcomp.n); toc(); save_comp_vector(&S, argv[2], "S"); free(S.vector); save_comp_vector(&R, argv[2], "R"); free(R.vector); save_comp_vector(&Scomp, argv[2], "Scomp"); free(Scomp.vector); save_comp_vector(&Rcomp, argv[2], "Rcomp"); free(Rcomp.vector); tic("Calculating BWT of reverse reference"); calculateBWTdebug(&Bi, &Si, &X, 1); toc(); save_ref_vector(&X, argv[2], "Xi"); print_vector(Bi.vector, Bi.n); print_vector(Si.vector, Si.n); tic("Calculating inverted prefix-trie matrix Oi"); calculate_O(&Oi, &Bi); toc(); free(X.vector); print_comp_matrix(Oi); save_ref_vector(&Bi, argv[2], "Bi"); free(Bi.vector); save_comp_matrix(&Oi, argv[2], "Oi"); free_comp_matrix(NULL, &Oi); tic("Calculating Ri"); calculate_R(&Ri, &Si); toc(); print_vector(Ri.vector, Ri.n); tic("Calculating Scompi Rcompi"); compress_SR(&Si, &Scompi, s_ratio); print_vector(Scompi.vector, Scompi.n); compress_SR(&Ri, &Rcompi, s_ratio); print_vector(Rcompi.vector, Rcompi.n); toc(); save_comp_vector(&Si, argv[2], "Si"); free(Si.vector); save_comp_vector(&Ri, argv[2], "Ri"); free(Ri.vector); save_comp_vector(&Scompi, argv[2], "Scompi"); free(Scompi.vector); save_comp_vector(&Rcompi, argv[2], "Rcompi"); free(Rcompi.vector); return 0; }
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ ALLOCATES(); CreateTicTacToc( CallMatlab ); CreateTicTacToc( callSort ); int I, J, K, ii, jj, kk; int IJ, IJK, IJK_1; int DI, DJ, DK, DIJK, DIJK_1; int CDI, CDJ, CDK; int result, fevals = 0; int NVOLS, NVOLS_1, n, s, s_start, s_end, v_init; real *volumes, *V, x, y, *DIST, *order, last_distance; int *VV=NULL, nV, v, vv; char skip; triplet *TS=NULL, *DTS=NULL, T; mxArray *INPUT[2]={NULL,NULL}, *OUTPUT[3]={NULL,NULL,NULL}; double *MAXs, LAST_MAX; double thisMINx, thisMINy; double *idxs; double *vols; double *ijk; char callSort; mwSize toVec[2]={1,1}; char VERBOSE = 0; char STR[1024]; if( nlhs > 1 ){ mxErrMsgTxt("too much outputs"); } if( mxIsChar( prhs[nrhs-1] ) ){ mxGetString( prhs[nrhs-1], STR, 100 ); if( ! myStrcmpi(STR,"verbose") ){ VERBOSE = 1; } else { mxErrMsgTxt("only 'verbose' option allowed."); } nrhs = nrhs-1; } if( nrhs != 3 ){ mxErrMsgTxt("sintax error. max_min_multiples_erodes( V , F , volumes )"); } if( mxGetClassID( prhs[1] ) != mxFUNCTION_CLASS ){ mxErrMsgTxt("F have to be a function_handle."); } if( myNDims( prhs[0] ) > 3 ){ mxErrMsgTxt("bigger than 3d arrays is not allowed."); } NVOLS = myNumel( prhs[2] ); NVOLS_1 = NVOLS - 1; volumes = myGetPr( prhs[2] ); I = mySize( prhs[0] , 0 ); J = mySize( prhs[0] , 1 ); K = mySize( prhs[0] , 2 ); IJ = I*J; IJK = IJ*K; VV = (int *) mxMalloc( IJK*sizeof( int ) ); TS = (triplet *) mxMalloc( IJK*sizeof( triplet ) ); V = myGetPr( prhs[0] ); v = 0; nV = 0; for( kk = 0 ; kk < K ; kk++ ){ for( jj = 0 ; jj < J ; jj++ ){ for( ii = 0 ; ii < I ; ii++ ){ x = V[ v ]; if( x == x ){ VV[ nV ] = v; TS[ v ].isnan = 0; TS[ v ].i = ii; TS[ v ].j = jj; TS[ v ].k = kk; nV++; } else { TS[ v ].isnan = 1; } v++; }}} INPUT[0] = prhs[1]; INPUT[1] = mxCreateNumericMatrix( 1 , 3 , mxDOUBLE_CLASS , mxREAL ); ijk = (double *) mxGetData( INPUT[1] ); ijk[0] = TS[ VV[ nV/2] ].i + 1; ijk[1] = TS[ VV[ nV/2] ].j + 1; ijk[2] = TS[ VV[ nV/2] ].k + 1; OUTPUT[2] = mexCallMATLABWithTrap( 2 , OUTPUT , 2 , INPUT , "feval" ); if( OUTPUT[2] == NULL ){ callSort = 0; if( mxGetClassID( OUTPUT[0] ) != mxDOUBLE_CLASS ){ if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1]=NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } mxErrMsgTxt("F debe retornar un double en el primer output."); } if( mxGetClassID( OUTPUT[1] ) != mxDOUBLE_CLASS ){ if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1]=NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } mxErrMsgTxt("F debe retornar un double en el segundo output."); } } else { callSort = 1; if( VERBOSE ){ mexPrintf("sort has to be called\n"); } mxDestroyArray( OUTPUT[2] ); OUTPUT[2] = NULL; result = mexCallMATLAB( 1 , OUTPUT , 2 , INPUT , "feval" ); if( result ){ mxErrMsgTxt("error computing la funcion."); } if( mxGetClassID( OUTPUT[0] ) != mxDOUBLE_CLASS ){ if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1]=NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } mxErrMsgTxt("F debe retornar un double en el primer output."); } } DI = mySize( OUTPUT[0] , 0 ); DJ = mySize( OUTPUT[0] , 1 ); DK = mySize( OUTPUT[0] , 2 ); DTS = (triplet *) mxMalloc( 2*DI*DJ*DK*sizeof( triplet ) ); plhs[0] = mxCreateNumericMatrix( NVOLS , 1 , mxREAL_CLASS , mxREAL ); MAXs = (real *) mxGetData( plhs[0] ); for( n = 0 ; n < NVOLS ; n++ ){ MAXs[n] = -10000; } LAST_MAX = MAXs[ NVOLS_1 ]; for( v_init = 0 ; v_init < EVERY ; v_init++ ){ if( utIsInterruptPending() ){ if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1]=NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } mexPrintf("USER INTERRUP!!!\n"); mxErrMsgTxt("USER INTERRUP!!!"); } if( VERBOSE ){ mexPrintf("v_init: %d (%g) of %d\n", v_init , LAST_MAX , EVERY ); } for( v = v_init ; v < nV ; v += EVERY ){ vv = VV[ v ]; thisMINx = V[ vv ]; thisMINy = -thisMINx; if( ( thisMINx < LAST_MAX ) && ( thisMINy < LAST_MAX ) ){ continue; } T = TS[ vv ]; ijk[0] = T.i + 1; ijk[1] = T.j + 1; ijk[2] = T.k + 1; if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } if( !callSort ){ tic( CallMatlab ); result = mexCallMATLAB( 2 , OUTPUT , 2 , INPUT , "feval" ); fevals++; tac( CallMatlab ); } else { tic( CallMatlab ); result = mexCallMATLAB( 1 , OUTPUT , 2 , INPUT , "feval" ); fevals++; tac( CallMatlab ); } DI = mySize( OUTPUT[0] , 0 ); DJ = mySize( OUTPUT[0] , 1 ); DK = mySize( OUTPUT[0] , 2 ); DIJK = DI*DJ*DK; if( volumes[ NVOLS_1 ] > DIJK ){ if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1]=NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } mxErrMsgTxt("el maximo volumen debe ser menor que numel(DIST)"); } DIJK_1 = DIJK - 1; DIST = (double *) mxGetData( OUTPUT[0] ); DTS = (triplet *) mxRealloc( DTS , DIJK*sizeof( triplet ) ); s = 0; for( kk = 0 ; kk < DK ; kk++ ){ for( jj = 0 ; jj < DJ ; jj++ ){ for( ii = 0 ; ii < DI ; ii++ ){ DTS[ s ].i = ii; DTS[ s ].j = jj; DTS[ s ].k = kk; s++; }}} if( !callSort ){ order = (double *) mxGetData( OUTPUT[1] ); } else { toVec[0] = mxGetNumberOfElements( OUTPUT[0] ); mxSetDimensions( OUTPUT[0] , toVec , 2 ); tic( callSort ); result = mexCallMATLAB( 2 , OUTPUT+1 , 1 , OUTPUT , "sort" ); tac( callSort ); order = (double *) mxGetData( OUTPUT[2] ); } CDI = DTS[ (int) ( order[0] - 1 ) ].i; CDJ = DTS[ (int) ( order[0] - 1 ) ].j; CDK = DTS[ (int) ( order[0] - 1 ) ].k; skip = 0; s = 0; for( n = 0 ; n < NVOLS ; n++ ){ s_end = (int) ( volumes[n] - 1 ); last_distance = DIST[ (int) order[ s_end ] - 1 ]; while( s_end < DIJK_1 && DIST[ (int) ( order[ s_end + 1 ] - 1 ) ] == last_distance ){ s_end++; } s_end++; for( ; s < s_end ; s++ ){ vv = (int) ( order[ s ] - 1 ); ii = T.i + DTS[ vv ].i - CDI; if( ii < 0 || ii > I ){ skip = 1; break; } jj = T.j + DTS[ vv ].j - CDJ; if( jj < 0 || jj > J ){ skip = 1; break; } kk = T.k + DTS[ vv ].k - CDK; if( kk < 0 || kk > K ){ skip = 1; break; } vv = ii + jj*I + kk*IJ; if( TS[ vv ].isnan ){ skip = 1; break; } x = V[ vv ]; if( x < thisMINx ){ thisMINx = x; } y = -x; if( y < thisMINy ){ thisMINy = y; } if( ( thisMINx < LAST_MAX ) && ( thisMINy < LAST_MAX ) ){ skip = 1; break; } } if( skip ){ break; } if( thisMINx > MAXs[n] ){ MAXs[n] = thisMINx; } if( thisMINy > MAXs[n] ){ MAXs[n] = thisMINy; } } LAST_MAX = MAXs[ NVOLS_1 ]; } } if( INPUT[1] != NULL ){ mxDestroyArray( INPUT[1] ); INPUT[1] =NULL; } if( OUTPUT[0] != NULL ){ mxDestroyArray( OUTPUT[0] ); OUTPUT[0]=NULL; } if( OUTPUT[1] != NULL ){ mxDestroyArray( OUTPUT[1] ); OUTPUT[1]=NULL; } if( OUTPUT[2] != NULL ){ mxDestroyArray( OUTPUT[2] ); OUTPUT[2]=NULL; } if( VERBOSE ){ mexPrintf( "\nfevals: %d en tiempo: CallMatlab: %20.30g sorting: %20.30g\n" , fevals , toc( CallMatlab ) , toc( callSort ) ); } if( VV != NULL ){ mxFree( VV ); } if( TS != NULL ){ mxFree( TS ); } if( DTS != NULL ){ mxFree( DTS ); } myFreeALLOCATES(); }
int PreconditionerAS<space_type,coef_space_type>::applyInverse ( const vector_type& X /*R*/, vector_type& Y /*W*/) const { /* * We solve Here P_v w = r * With P_v^-1 = diag(P_m)^-1 (=A) * + P (\bar L + g \bar Q) P^t (=B) * + C (L^-1) C^T (=C) */ U = X; U.close(); // solve equ (12) if ( this->type() == AS ) { tic(); *M_r = U; M_r->close(); // step A : diag(Pm)^-1*r A->pointwiseDivide(*M_r,*M_diagPm); A->close(); // s = P^t r M_Pt->multVector(M_r,M_s); // Impose boundary conditions on M_s #if 1 M_qh3_elt = *M_s; M_qh3_elt.close(); #if FEELPP_DIM == 3 M_qh3_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) ); #else M_qh3_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) ); #endif *M_s = M_qh3_elt; M_s->close(); #endif #if 1 // Subvectors for M_s (per component) need to be updated M_s1 = M_s->createSubVector(M_Qh3_indices[0], true); M_s2 = M_s->createSubVector(M_Qh3_indices[1], true); #if FEELPP_DIM == 3 M_s3 = M_s->createSubVector(M_Qh3_indices[2], true); #endif #else // s = [ s1, s2, s3 ] M_s->updateSubVector(M_s1, M_Qh3_indices[0]); M_s->updateSubVector(M_s2, M_Qh3_indices[1]); #if FEELPP_DIM == 3 M_s->updateSubVector(M_s3, M_Qh3_indices[2]); #endif #endif M_s->close(); /* * hat(L) + g Q is a (Qh,Qh) matrix * [[ hat(L) + g Q, 0 , 0 ], [ y1 ] [ s1 ] * [ 0, hat(L) + g Q, 0 ], * [ y2 ] = [ s2 ] * [ 0, 0 , hat(L) + g Q ]] [ y3 ] [ s3 ] */ M_lgqOp->applyInverse(M_s1,M_y1); M_lgqOp->applyInverse(M_s2,M_y2); #if FEELPP_DIM == 3 M_lgqOp->applyInverse(M_s3,M_y3); #endif // y = [ y1, y2, y3 ] M_y->updateSubVector(M_y1, M_Qh3_indices[0]); M_y->updateSubVector(M_y2, M_Qh3_indices[1]); #if FEELPP_DIM == 3 M_y->updateSubVector(M_y3, M_Qh3_indices[2]); #endif M_y->close(); // step B : P*y M_P->multVector(M_y,B); // Impose boundary conditions on B = Py #if 1 M_vh_elt = *B; M_vh_elt.close(); #if FEELPP_DIM == 3 M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) ); #else M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) ); #endif *B = M_vh_elt; B->close(); #endif // t = C^t r M_Ct->multVector(M_r,M_t); // Impose boundary conditions on M_t #if 1 M_qh_elt = *M_t; M_qh_elt.close(); M_qh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=cst(0.) ); *M_t = M_qh_elt; M_t->close(); #endif // 14.b : hat(L) z = t M_lOp->applyInverse(M_t,M_z); M_z->close(); // step C : M_C z M_C->multVector(M_z,C); C->scale(1./M_g); // Impose boundary conditions on C = Cz #if 1 M_vh_elt = *C; M_vh_elt.close(); #if FEELPP_DIM == 3 M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.), cst(0.)) ); #else M_vh_elt.on( _range=boundaryfaces( M_Qh3->mesh() ), _expr=vec(cst(0.), cst(0.)) ); #endif *C = M_vh_elt; C->close(); #endif //if(M_g != 1.0) A->add(*C); A->add(*B); C->close(); B->close(); A->close(); toc("assemble preconditioner AS",FLAGS_v>0); *M_uout = *A; // 15 : w = A + B + C } else if( this->type() == SIMPLE ) { SimpleOp->applyInverse(X, Y); *M_uout = Y; } else { Y=U; *M_uout = Y; } M_uout->close(); tic(); Y=*M_uout; Y.close(); toc("PreconditionerAS::applyInverse", FLAGS_v>0 ); return 0; }
int hpquads(startree_t* starkd, codefile_t* codes, quadfile_t* quads, int Nside, double scale_min_arcmin, double scale_max_arcmin, int dimquads, int passes, int Nreuses, int Nloosen, int id, anbool scanoccupied, void* sort_data, int (*sort_func)(const void*, const void*), int sort_size, char** args, int argc) { hpquads_t myhpquads; hpquads_t* me = &myhpquads; int i; int pass; anbool circle = TRUE; double radius2; il* hptotry; int Nhptotry = 0; int nquads; double hprad; double quadscale; int skhp, sknside; qfits_header* qhdr; qfits_header* chdr; int N; int dimcodes; int quadsize; int NHP; memset(me, 0, sizeof(hpquads_t)); if (Nside > HP_MAX_INT_NSIDE) { ERROR("Error: maximum healpix Nside = %i", HP_MAX_INT_NSIDE); return -1; } if (Nreuses > 255) { ERROR("Error, reuse (-r) must be less than 256"); return -1; } me->Nside = Nside; me->dimquads = dimquads; NHP = 12 * Nside * Nside; dimcodes = dimquad2dimcode(dimquads); quadsize = sizeof(unsigned int) * dimquads; logmsg("Nside=%i. Nside^2=%i. Number of healpixes=%i. Healpix side length ~ %g arcmin.\n", me->Nside, me->Nside*me->Nside, NHP, healpix_side_length_arcmin(me->Nside)); me->sort_data = sort_data; me->sort_func = sort_func; me->sort_size = sort_size; tic(); me->starkd = starkd; N = startree_N(me->starkd); logmsg("Star tree contains %i objects.\n", N); // get the "HEALPIX" header from the skdt... skhp = qfits_header_getint(startree_header(me->starkd), "HEALPIX", -1); if (skhp == -1) { if (!qfits_header_getboolean(startree_header(me->starkd), "ALLSKY", FALSE)) { logmsg("Warning: skdt does not contain \"HEALPIX\" header. Code and quad files will not contain this header either.\n"); } } // likewise "HPNSIDE" sknside = qfits_header_getint(startree_header(me->starkd), "HPNSIDE", 1); if (sknside && Nside % sknside) { logerr("Error: Nside (-n) must be a multiple of the star kdtree healpixelisation: %i\n", sknside); return -1; } if (!scanoccupied && (N*(skhp == -1 ? 1 : sknside*sknside*12) < NHP)) { logmsg("\n\n"); logmsg("NOTE, your star kdtree is sparse (has only a fraction of the stars expected)\n"); logmsg(" so you probably will get much faster results by setting the \"-E\" command-line\n"); logmsg(" flag.\n"); logmsg("\n\n"); } quads->dimquads = me->dimquads; codes->dimcodes = dimcodes; quads->healpix = skhp; codes->healpix = skhp; quads->hpnside = sknside; codes->hpnside = sknside; if (id) { quads->indexid = id; codes->indexid = id; } qhdr = quadfile_get_header(quads); chdr = codefile_get_header(codes); add_headers(qhdr, args, argc, startree_header(me->starkd), circle, passes); add_headers(chdr, args, argc, startree_header(me->starkd), circle, passes); if (quadfile_write_header(quads)) { ERROR("Couldn't write headers to quad file"); return -1; } if (codefile_write_header(codes)) { ERROR("Couldn't write headers to code file"); return -1; } quads->numstars = codes->numstars = N; me->quad_dist2_upper = arcmin2distsq(scale_max_arcmin); me->quad_dist2_lower = arcmin2distsq(scale_min_arcmin); codes->index_scale_upper = quads->index_scale_upper = distsq2rad(me->quad_dist2_upper); codes->index_scale_lower = quads->index_scale_lower = distsq2rad(me->quad_dist2_lower); me->nuses = calloc(N, sizeof(unsigned char)); // hprad = sqrt(2) * (healpix side length / 2.) hprad = arcmin2dist(healpix_side_length_arcmin(Nside)) * M_SQRT1_2; quadscale = 0.5 * sqrt(me->quad_dist2_upper); // 1.01 for a bit of safety. we'll look at a few extra stars. radius2 = square(1.01 * (hprad + quadscale)); me->radius2 = radius2; logmsg("Healpix radius %g arcsec, quad scale %g arcsec, total %g arcsec\n", distsq2arcsec(hprad*hprad), distsq2arcsec(quadscale*quadscale), distsq2arcsec(radius2)); hptotry = il_new(1024); if (scanoccupied) { logmsg("Scanning %i input stars...\n", N); for (i=0; i<N; i++) { double xyz[3]; int j; if (startree_get(me->starkd, i, xyz)) { ERROR("Failed to get star %i", i); return -1; } j = xyzarrtohealpix(xyz, Nside); il_insert_unique_ascending(hptotry, j); if (log_get_level() > LOG_VERB) { double ra,dec; if (startree_get_radec(me->starkd, i, &ra, &dec)) { ERROR("Failed to get RA,Dec for star %i\n", i); return -1; } logdebug("star %i: RA,Dec %g,%g; xyz %g,%g,%g; hp %i\n", i, ra, dec, xyz[0], xyz[1], xyz[2], j); } } logmsg("Will check %zu healpixes.\n", il_size(hptotry)); if (log_get_level() > LOG_VERB) { logdebug("Checking healpixes: [ "); for (i=0; i<il_size(hptotry); i++) logdebug("%i ", il_get(hptotry, i)); logdebug("]\n"); } } else { if (skhp == -1) { // Try all healpixes. il_free(hptotry); hptotry = NULL; Nhptotry = NHP; } else { // The star kdtree may itself be healpixed int starhp, starx, stary; // In that case, the healpixes we are interested in form a rectangle // within a big healpix. These are the coords (in [0, Nside)) of // that rectangle. int x0, x1, y0, y1; int x, y; healpix_decompose_xy(skhp, &starhp, &starx, &stary, sknside); x0 = starx * (Nside / sknside); x1 = (starx+1) * (Nside / sknside); y0 = stary * (Nside / sknside); y1 = (stary+1) * (Nside / sknside); for (y=y0; y<y1; y++) { for (x=x0; x<x1; x++) { int j = healpix_compose_xy(starhp, x, y, Nside); il_append(hptotry, j); } } assert(il_size(hptotry) == (Nside/sknside) * (Nside/sknside)); } } if (hptotry) Nhptotry = il_size(hptotry); me->quadlist = bl_new(65536, quadsize); if (Nloosen) me->retryhps = il_new(1024); for (pass=0; pass<passes; pass++) { char key[64]; int nthispass; logmsg("Pass %i of %i.\n", pass+1, passes); logmsg("Trying %i healpixes.\n", Nhptotry); nthispass = build_quads(me, Nhptotry, hptotry, Nreuses); logmsg("Made %i quads (out of %i healpixes) this pass.\n", nthispass, Nhptotry); logmsg("Made %i quads so far.\n", (me->bigquadlist ? bt_size(me->bigquadlist) : 0) + (int)bl_size(me->quadlist)); sprintf(key, "PASS%i", pass+1); fits_header_mod_int(chdr, key, nthispass, "quads created in this pass"); fits_header_mod_int(qhdr, key, nthispass, "quads created in this pass"); logmsg("Merging quads...\n"); if (!me->bigquadlist) me->bigquadlist = bt_new(quadsize, 256); for (i=0; i<bl_size(me->quadlist); i++) { void* q = bl_access(me->quadlist, i); bt_insert2(me->bigquadlist, q, FALSE, compare_quads, &me->dimquads); } bl_remove_all(me->quadlist); } il_free(hptotry); hptotry = NULL; if (Nloosen) { int R; for (R=Nreuses+1; R<=Nloosen; R++) { il* trylist; int nthispass; logmsg("Loosening reuse maximum to %i...\n", R); logmsg("Trying %zu healpixes.\n", il_size(me->retryhps)); if (!il_size(me->retryhps)) break; trylist = me->retryhps; me->retryhps = il_new(1024); nthispass = build_quads(me, il_size(trylist), trylist, R); logmsg("Made %i quads (out of %zu healpixes) this pass.\n", nthispass, il_size(trylist)); il_free(trylist); for (i=0; i<bl_size(me->quadlist); i++) { void* q = bl_access(me->quadlist, i); bt_insert2(me->bigquadlist, q, FALSE, compare_quads, &me->dimquads); } bl_remove_all(me->quadlist); } } if (me->retryhps) il_free(me->retryhps); kdtree_free_query(me->res); me->res = NULL; me->inds = NULL; me->stars = NULL; free(me->nuses); me->nuses = NULL; logmsg("Writing quads...\n"); // add the quads from the big-quadlist nquads = bt_size(me->bigquadlist); for (i=0; i<nquads; i++) { unsigned int* q = bt_access(me->bigquadlist, i); quad_write(codes, quads, q, me->starkd, me->dimquads, dimcodes); } // add the quads that were made during the final round. for (i=0; i<bl_size(me->quadlist); i++) { unsigned int* q = bl_access(me->quadlist, i); quad_write(codes, quads, q, me->starkd, me->dimquads, dimcodes); } // fix output file headers. if (quadfile_fix_header(quads)) { ERROR("Failed to fix quadfile headers"); return -1; } if (codefile_fix_header(codes)) { ERROR("Failed to fix codefile headers"); return -1; } bl_free(me->quadlist); bt_free(me->bigquadlist); toc(); logmsg("Done.\n"); return 0; }
int main(int argc, char **argv) { tic(); char *conf_file = NULL; socket_fd = -1; for (int i=1; i<argc; i++) { if (!strcmp(argv[i], "-c")) { if (argc <= i) { print_usage(); } conf_file = argv[++i]; } else if (!strcmp(argv[i], "-fd")) { if (argc <= i) { print_usage(); } socket_fd = atoi(argv[++i]); } else { printf(" >> unknown option: %s\n", argv[i]); } } if (!conf_file) conf_file = "cbot.conf"; load_config(conf_file); // Set rand seed srand(time(NULL)); // Set up cURL curl_global_init(CURL_GLOBAL_ALL); // Set up db connection for logging if (config->enabled_modules & MODULE_LOG) { log_init(); } // Parse markov corpus if (config->enabled_modules & MODULE_MARKOV) { markov_init(config->markovcorpus); } irc_init(); if (socket_fd == -1) { printf(" - Connecting to %s:%s with nick %s, joining channels...\n", config->host, config->port, config->nick); net_connect(); } else { // In-place upgrade yo printf(" >> Already connected, upgraded in-place!\n"); join_channels(); } struct recv_data *irc = malloc(sizeof(struct recv_data)); patterns = malloc(sizeof(*patterns)); compile_patterns(patterns); // Select param fd_set socket_set; FD_ZERO(&socket_set); FD_SET(STDIN_FILENO, &socket_set); FD_SET(socket_fd, &socket_set); int recv_size; char buffer[BUFFER_SIZE]; char input[BUFFER_SIZE]; memset(buffer, 0, BUFFER_SIZE); size_t buffer_length = 0; while (1) { int ret = select(socket_fd+1, &socket_set, 0, 0, 0); if (ret == -1) { printf(" >> Disconnected, reconnecting...\n"); close(socket_fd); net_connect(); } if (FD_ISSET(STDIN_FILENO, &socket_set)) { if (fgets(input, BUFFER_SIZE, stdin) == NULL) { printf(" >> Error while reading from stdin!\n"); continue; } if (strcmp(input, "quit\n") == 0) { printf(" >> Bye!\n"); break; } else if (strcmp(input, "reload\n") == 0) { terminate(); free(irc); free_patterns(patterns); free(patterns); // Set up arguments char * arguments[6]; arguments[0] = argv[0]; arguments[1] = "-c"; arguments[2] = conf_file; arguments[3] = "-fd"; char fdstring[snprintf(NULL, 0, "%d", socket_fd)]; sprintf(fdstring, "%d", socket_fd); arguments[4] = fdstring; arguments[5] = NULL; printf(" >> Upgrading...\n"); execvp(argv[0], arguments); printf(" !!! Execvp failing, giving up...\n"); exit(-1); } else if (strncmp(input, "say ", 4) == 0) { int offsets[30]; int offsetcount = pcre_exec(patterns->command_say, 0, input, strlen(input), 0, 0, offsets, 30); if (offsetcount > 0) { char channel[BUFFER_SIZE]; char message[BUFFER_SIZE]; pcre_copy_substring(input, offsets, offsetcount, 1, channel, BUFFER_SIZE); pcre_copy_substring(input, offsets, offsetcount, 2, message, BUFFER_SIZE); char sendbuf[strlen("PRIVMSG : ") + strlen(channel) + strlen(message)]; sprintf(sendbuf, "PRIVMSG %s :%s\n", channel, message); irc_send_str(sendbuf); } } else if (strncmp(input, "kick ", 5) == 0) { int offsets[30]; int offsetcount = pcre_exec(patterns->command_kick, 0, input, strlen(input), 0, 0, offsets, 30); if (offsetcount > 0) { char channel[BUFFER_SIZE]; char user[BUFFER_SIZE]; pcre_copy_substring(input, offsets, offsetcount, 1, channel, BUFFER_SIZE); pcre_copy_substring(input, offsets, offsetcount, 2, user, BUFFER_SIZE); char sendbuf[strlen("KICK :Gene police! You! Out of the pool, now!\n") + strlen(channel) + strlen(user)]; sprintf(sendbuf, "KICK %s %s :Gene police! You! Out of the pool, now!\n", channel, user); irc_send_str(sendbuf); } } else { printf(" >> Unrecognized command. Try 'quit'\n"); } FD_SET(socket_fd, &socket_set); } else { if (buffer_length >= BUFFER_SIZE - 1) { printf(" >> what the f**k, IRCd, a line longer than 4k? dropping some buffer\n"); memset(buffer, 0, BUFFER_SIZE); buffer_length = 0; continue; } recv_size = recv(socket_fd, buffer + buffer_length, BUFFER_SIZE - buffer_length - 1, 0); buffer_length += recv_size; buffer[buffer_length] = '\0'; if (recv_size == 0) { printf(" >> recv_size is 0, assuming closed remote socket, reconnecting\n"); close(socket_fd); printf("closed\n"); net_connect(); printf("reconnected\n"); } char *newlinepos = 0; char *bufbegin = buffer; while ((newlinepos = strchr(bufbegin, '\n'))) { *newlinepos = 0; printf(" ~ %s\n", bufbegin); // Only handle privmsg if (irc_parse_input(bufbegin, irc, patterns)) { irc_handle_input(irc, patterns); } bufbegin = newlinepos + 1; } size_t bytes_removed = bufbegin - buffer; memmove(buffer, bufbegin, buffer_length - bytes_removed); buffer_length -= bytes_removed; memset(buffer + buffer_length, 0, BUFFER_SIZE - buffer_length); FD_SET(STDIN_FILENO, &socket_set); } } printf(" >> Socket closed, quitting...\n"); close(socket_fd); free(irc); free_patterns(patterns); free(patterns); terminate(); return 0; }
void run_benchmark( void *vargs, cl_context& context, cl_command_queue& commands, cl_program& program, cl_kernel& kernel ) { struct bench_args_t *args = (struct bench_args_t *)vargs; int num_jobs = 1 << 16; char* seqA_batch = (char *)malloc(sizeof(args->seqA) * num_jobs); char* seqB_batch = (char *)malloc(sizeof(args->seqB) * num_jobs); char* alignedA_batch = (char *)malloc(sizeof(args->alignedA) * num_jobs); char* alignedB_batch = (char *)malloc(sizeof(args->alignedB) * num_jobs); int i; for (i=0; i<num_jobs; i++) { memcpy(seqA_batch + i*sizeof(args->seqA), args->seqA, sizeof(args->seqA)); memcpy(seqB_batch + i*sizeof(args->seqB), args->seqB, sizeof(args->seqB)); memcpy(alignedA_batch + i*sizeof(args->alignedA), args->alignedA, sizeof(args->alignedA)); memcpy(alignedB_batch + i*sizeof(args->alignedB), args->alignedB, sizeof(args->alignedB)); } // 0th: initialize the timer at the beginning of the program timespec timer = tic(); // Create device buffers // cl_mem seqA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqA)*num_jobs, NULL, NULL); cl_mem seqB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->seqB)*num_jobs, NULL, NULL); cl_mem alignedA_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedA)*num_jobs, NULL, NULL); cl_mem alignedB_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(args->alignedB)*num_jobs, NULL, NULL); if (!seqA_buffer || !seqB_buffer || !alignedA_buffer || !alignedB_buffer) { printf("Error: Failed to allocate device memory!\n"); printf("Test failed\n"); exit(1); } // 1st: time of buffer allocation toc(&timer, "buffer allocation"); // Write our data set into device buffers // int err; err = clEnqueueWriteBuffer(commands, seqA_buffer, CL_TRUE, 0, sizeof(args->seqA)*num_jobs, seqA_batch, 0, NULL, NULL); err |= clEnqueueWriteBuffer(commands, seqB_buffer, CL_TRUE, 0, sizeof(args->seqB)*num_jobs, seqB_batch, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to device memory!\n"); printf("Test failed\n"); exit(1); } // 2nd: time of pageable-pinned memory copy toc(&timer, "memory copy"); // Set the arguments to our compute kernel // err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &seqA_buffer); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &seqB_buffer); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &alignedA_buffer); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &alignedB_buffer); err |= clSetKernelArg(kernel, 4, sizeof(int), &num_jobs); if (err != CL_SUCCESS) { printf("Error: Failed to set kernel arguments! %d\n", err); printf("Test failed\n"); exit(1); } // 3rd: time of setting arguments toc(&timer, "set arguments"); // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device // #ifdef C_KERNEL err = clEnqueueTask(commands, kernel, 0, NULL, NULL); #else printf("Error: OpenCL kernel is not currently supported!\n"); exit(1); #endif if (err) { printf("Error: Failed to execute kernel! %d\n", err); printf("Test failed\n"); exit(1); } // 4th: time of kernel execution clFinish(commands); toc(&timer, "kernel execution"); // Read back the results from the device to verify the output // err = clEnqueueReadBuffer( commands, alignedA_buffer, CL_TRUE, 0, sizeof(args->alignedA)*num_jobs, alignedA_batch, 0, NULL, NULL ); err |= clEnqueueReadBuffer( commands, alignedB_buffer, CL_TRUE, 0, sizeof(args->alignedB)*num_jobs, alignedB_batch, 0, NULL, NULL ); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); printf("Test failed\n"); exit(1); } // 5th: time of data retrieving (PCIe + memcpy) toc(&timer, "data retrieving"); // memcpy(args->alignedA, alignedA_batch, sizeof(args->alignedA)); // memcpy(args->alignedB, alignedB_batch, sizeof(args->alignedB)); for (i=0; i<sizeof(args->alignedA); i++) { args->alignedA[i] = 'a'; } for (i=0; i<sizeof(args->alignedB); i++) { args->alignedB[i] = 'b'; } free(seqA_batch); free(seqB_batch); free(alignedA_batch); free(alignedB_batch); }
int main (int argc, char** argv) { int i; int iterations = 100; // prepare grids // declare_grids --> float * u_0_0_out; float * u_0_0; float * ux_1_0; float * uy_2_0; float * uz_3_0; float * u_0_0_out_cpu; float * u_0_0_cpu; float * ux_1_0_cpu; float * uy_2_0_cpu; float * uz_3_0_cpu; if ((argc<4)) { printf("Wrong number of parameters. Syntax:\n%s <x_max> <y_max> <z_max> <# of iterations>\n", argv[0]); exit(-1); } int x_max = atoi(argv[1]); int y_max = atoi(argv[2]); int z_max = atoi(argv[3]); if(argc==5) iterations = atoi(argv[4]); // <-- // allocate_grids --> u_0_0=((float * )malloc((((x_max*y_max)*z_max)*sizeof (float)))); ux_1_0=((float * )malloc(((((x_max+2)*y_max)*z_max)*sizeof (float)))); uy_2_0=((float * )malloc((((x_max*(y_max+2))*z_max)*sizeof (float)))); uz_3_0=((float * )malloc((((x_max*y_max)*(z_max+2))*sizeof (float)))); u_0_0_cpu=((float * )malloc((((x_max*y_max)*z_max)*sizeof (float)))); ux_1_0_cpu=((float * )malloc(((((x_max+2)*y_max)*z_max)*sizeof (float)))); uy_2_0_cpu=((float * )malloc((((x_max*(y_max+2))*z_max)*sizeof (float)))); uz_3_0_cpu=((float * )malloc((((x_max*y_max)*(z_max+2))*sizeof (float)))); // <-- // initialize // initialize_grids --> initialize(u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.1, 0.2, 0.30000000000000004, x_max, y_max, z_max); initialize(u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.1, 0.2, 0.30000000000000004, x_max, y_max, z_max); // <-- long nFlopsPerStencil = 8; long nGridPointsCount = iterations * ((x_max*y_max)*z_max); long nBytesTransferred = iterations * (((((((x_max+2)*y_max)*z_max)*sizeof (float))+(((x_max*(y_max+2))*z_max)*sizeof (float)))+(((x_max*y_max)*(z_max+2))*sizeof (float)))+(((x_max*y_max)*z_max)*sizeof (float))); /* *************************** PGI GPU-acc benchmark ********************* */ // warm up { // compute_stencil --> divergence(( & u_0_0_out), u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.4, 0.5, 0.6, x_max, y_max, z_max,iterations); // <-- } // run the benchmark tic (); { // compute_stencil --> divergence(( & u_0_0_out), u_0_0, ux_1_0, uy_2_0, uz_3_0, 0.7, 0.7999999999999999, 0.8999999999999999, x_max, y_max, z_max,iterations); // <-- } toc (nFlopsPerStencil, nGridPointsCount, nBytesTransferred); /* *************************** ******************** ********************* */ /* *************************** Naive CPU Comparison ********************* */ // warm up cpu comparison { // compute_stencil --> divergence(( & u_0_0_out_cpu), u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.4, 0.5, 0.6, x_max, y_max, z_max,iterations); // <-- } // run the benchmark tic (); { // compute_stencil --> divergence_cpu(( & u_0_0_out_cpu), u_0_0_cpu, ux_1_0_cpu, uy_2_0_cpu, uz_3_0_cpu, 0.7, 0.7999999999999999, 0.8999999999999999, x_max, y_max, z_max,iterations); // <-- } toc (nFlopsPerStencil, nGridPointsCount, nBytesTransferred); // checking "correctness" (assuming cpu version is correct) int error_count=0; int halo = 0; int x,y,z; for(y=0;y<x_max;y++) { for(x=0;x<x_max;x++) { for(z=0;z<y_max;z++) { i = x + (x_max+halo)*y + (x_max+halo)*(y_max+halo)*z; if(fabs(u_0_0_out[i] - u_0_0_out_cpu[i])>0.001) { error_count++; printf("%dth error encountered at u[%d]: |%f-%f|=%5.16f\n",error_count,i,u_0_0_out[i],u_0_0_out_cpu[i],fabs(u_0_0_out[i] - u_0_0_out_cpu[i])); if(error_count>30) { printf("too many errors\n"); printf("print some solutions\n"); for(x=0;x<100;x++) { printf("u_pgi[%d]=%2.2f ?? u_cpu[%d]=%2.2f\n",x,u_0_0_out[x],x,u_0_0_out_cpu[x]); } exit(1); } } } } } if(error_count==0) { printf("Error Check Successful. No errors encountered.\n"); } // free memory // deallocate_grids --> free(u_0_0); free(ux_1_0); free(uy_2_0); free(uz_3_0); // <-- return EXIT_SUCCESS; }
int main(int argc, char **argv) { struct timespec timer_1, timer_2; hsa_status_t err; err = hsa_init(); check(Initializing the hsa runtime, err); /* * Iterate over the agents and pick the gpu agent using * the get_gpu_agent callback. */ hsa_agent_t agent; err = hsa_iterate_agents(get_gpu_agent, &agent); if(err == HSA_STATUS_INFO_BREAK) { err = HSA_STATUS_SUCCESS; } check(Getting a gpu agent, err); /* * Query the name of the agent. */ char name[64] = { 0 }; err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); check(Querying the agent name, err); printf("The agent name is %s.\n", name); /* * Query the maximum size of the queue. */ uint32_t queue_size = 0; err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); check(Querying the agent maximum queue size, err); printf("The maximum queue size is %u.\n", (unsigned int) queue_size); /* * Create a queue using the maximum size. */ hsa_queue_t* queue; err = hsa_queue_create(agent, queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); check(Creating the queue, err); /* * Load the BRIG binary. */ hsa_ext_module_t module; load_module_from_file("vector_copy.brig",&module); /* * Create hsa program. */ hsa_ext_program_t program; memset(&program,0,sizeof(hsa_ext_program_t)); err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &program); check(Create the program, err); /* * Add the BRIG module to hsa program. */ err = hsa_ext_program_add_module(program, module); check(Adding the brig module to the program, err); /* * Determine the agents ISA. */ hsa_isa_t isa; err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); check(Query the agents isa, err); /* * Finalize the program and extract the code object. */ hsa_ext_control_directives_t control_directives; memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); hsa_code_object_t code_object; err = hsa_ext_program_finalize(program, isa, 0, control_directives, "-O0", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); check(Finalizing the program, err); /* * Destroy the program, it is no longer needed. */ err=hsa_ext_program_destroy(program); check(Destroying the program, err); /* * Create the empty executable. */ hsa_executable_t executable; err = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); check(Create the executable, err); /* * Load the code<F3> object. */ err = hsa_executable_load_code_object(executable, agent, code_object, ""); check(Loading the code object, err); /* * Freeze the executable; it can now be queried for symbols. */ err = hsa_executable_freeze(executable, ""); check(Freeze the executable, err); /* * Extract the symbol from the executable. */ hsa_executable_symbol_t symbol; err = hsa_executable_get_symbol(executable, "", "&__OpenCL_vector_copy_kernel", agent, 0, &symbol); check(Extract the symbol from the executable, err); /* * Extract dispatch information from the symbol */ uint64_t kernel_object; uint32_t kernarg_segment_size; uint32_t group_segment_size; uint32_t private_segment_size; err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); check(Extracting the symbol from the executable, err); err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernarg_segment_size); check(Extracting the kernarg segment size from the executable, err); err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size); check(Extracting the group segment size from the executable, err); err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &private_segment_size); check(Extracting the private segment from the executable, err); /* * Create a signal to wait for the dispatch to finish. */ hsa_signal_t signal; err=hsa_signal_create(1, 0, NULL, &signal); check(Creating a HSA signal, err); /* * Allocate and initialize the kernel arguments and data. */ int* in=(int*)malloc(SIZE); int i; for(i=0;i<ELEMENT;i++) in[i]=(rand()%50000+1); err=hsa_memory_register(in, SIZE); check(Registering argument memory for input parameter, err); int* out=(int*)malloc(SIZE); memset(out, 0, SIZE); err=hsa_memory_register(out, SIZE); check(Registering argument memory for output parameter, err); int element = ELEMENT; int iter = ITER; struct __attribute__ ((aligned(16))) args_t { uint64_t global_offset_0; uint64_t global_offset_1; uint64_t global_offset_2; uint64_t printf_buffer; uint64_t vqueue_pointer; uint64_t aqlwrap_pointer; void* in; void* out; int iter; int element; } args; memset(&args, 0, sizeof(args)); args.in=in; args.out=out; args.element=element; args.iter=iter; /* * Find a memory region that supports kernel arguments. */ hsa_region_t kernarg_region; kernarg_region.handle=(uint64_t)-1; hsa_agent_iterate_regions(agent, get_kernarg_memory_region, &kernarg_region); err = (kernarg_region.handle == (uint64_t)-1) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS; check(Finding a kernarg memory region, err); void* kernarg_address = NULL; /* * Allocate the kernel argument buffer from the correct region. */ err = hsa_memory_allocate(kernarg_region, kernarg_segment_size, &kernarg_address); check(Allocating kernel argument memory buffer, err); memcpy(kernarg_address, &args, sizeof(args)); /* * Obtain the current queue write index. */ uint64_t index = hsa_queue_load_write_index_relaxed(queue); /* * Write the aql packet at the calculated queue index address. */ const uint32_t queueMask = queue->size - 1; hsa_kernel_dispatch_packet_t* dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(queue->base_address))[index&queueMask]); dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; dispatch_packet->setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; dispatch_packet->workgroup_size_x = (uint16_t)LOCAL_SIZE; dispatch_packet->workgroup_size_y = (uint16_t)1; dispatch_packet->workgroup_size_z = (uint16_t)1; dispatch_packet->grid_size_x = (uint32_t) (GLOBAL_SIZE); dispatch_packet->grid_size_y = 1; dispatch_packet->grid_size_z = 1; dispatch_packet->completion_signal = signal; dispatch_packet->kernel_object = kernel_object; dispatch_packet->kernarg_address = (void*) kernarg_address; dispatch_packet->private_segment_size = private_segment_size; dispatch_packet->group_segment_size = group_segment_size; __atomic_store_n((uint8_t*)(&dispatch_packet->header), (uint8_t)HSA_PACKET_TYPE_KERNEL_DISPATCH, __ATOMIC_RELEASE); /* * Increment the write index and ring the doorbell to dispatch the kernel. */ tic(&timer_1); hsa_queue_store_write_index_relaxed(queue, index+1); hsa_signal_store_relaxed(queue->doorbell_signal, index); check(Dispatching the kernel, err); /* * Wait on the dispatch completion signal until the kernel is finished. */ hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); toc("Execution Period", &timer_1, &timer_2); /* * Validate the data in the output buffer. */ int temp = 0; for(i=0;i<element;i++) { if(temp<in[i]) temp = in[i]; } if(temp==out[GLOBAL_SIZE]) printf("PASS \n"); else printf("FAIL out=%d in=%d \n",out[GLOBAL_SIZE],temp); /* * Cleanup all allocated resources. */ err=hsa_signal_destroy(signal); check(Destroying the signal, err); err=hsa_executable_destroy(executable); check(Destroying the executable, err); err=hsa_code_object_destroy(code_object); check(Destroying the code object, err); err=hsa_queue_destroy(queue); check(Destroying the queue, err); err=hsa_shut_down(); check(Shutting down the runtime, err); free(in); free(out); //printf("kernarg_segment_size:%d group_segment_size:%d private_segment_size:%d",kernarg_segment_size,group_segment_size,private_segment_size); return 0; }
int main(int argc, char **argv) { u16 (*bayer)[WAMI_DEBAYER_IMG_NUM_COLS] = NULL; rgb_pixel (*debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; char *input_directory = NULL; #ifdef ENABLE_CORRECTNESS_CHECKING rgb_pixel (*gold_debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; #endif const size_t num_bayer_pixels = WAMI_DEBAYER_IMG_NUM_ROWS * WAMI_DEBAYER_IMG_NUM_COLS; const size_t num_debayer_pixels = (WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD) * (WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); if (argc != 2) { fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]); exit(EXIT_FAILURE); } input_directory = argv[1]; bayer = XMALLOC(sizeof(u16) * num_bayer_pixels); debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #ifdef ENABLE_CORRECTNESS_CHECKING gold_debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #endif read_image_file( (char *) bayer, input_filename, input_directory, sizeof(u16) * num_bayer_pixels); memset(debayer, 0, sizeof(u16) * num_debayer_pixels); printf("WAMI kernel 1 parameters:\n\n"); printf("Input image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS); printf("Input image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS); printf("Output image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); printf("Output image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD); printf("\nStarting WAMI kernel 1 (debayer).\n"); tic(); accept_roi_begin(); wami_debayer( debayer, bayer); accept_roi_end(); PRINT_STAT_DOUBLE("CPU time using func toc - ", toc()); #ifdef ENABLE_CORRECTNESS_CHECKING read_image_file( (char *) gold_debayer, golden_output_filename, input_directory, sizeof(rgb_pixel) * num_debayer_pixels); /* * An exact match is expected for the debayer kernel, so we check * each pixel individually and report either the first failure or * a success message. */ { /* // original error metric int r, c, success = 1; for (r = 0; success && r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { if (ENDORSE(debayer[r][c].r != gold_debayer[r][c].r)) { printf("Validation error: red pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].r, gold_debayer[r][c].r); success = 0; break; } if (ENDORSE(debayer[r][c].g != gold_debayer[r][c].g)) { printf("Validation error: green pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].g, gold_debayer[r][c].g); success = 0; break; } if (ENDORSE(debayer[r][c].b != gold_debayer[r][c].b)) { printf("Validation error: blue pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].b, gold_debayer[r][c].b); success = 0; break; } } } if (success) { printf("\nValidation checks passed -- the test output matches the golden output.\n\n"); } */ // new error metric int r, c; double err; for (r = 0; r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { double pixel_error = 0.0; pixel_error += ENDORSE(((double) abs(debayer[r][c].r - gold_debayer[r][c].r)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].g - gold_debayer[r][c].g)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].b - gold_debayer[r][c].b)) / ((double) 65535)); err += (pixel_error / ((double) 3)) / ((double) ((WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD) * (WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD))); } } FILE *fp = fopen("err.txt", "wb"); assert(fp != NULL); fprintf(fp, "%.2f\n", err); fclose(fp); } #endif #ifdef WRITE_OUTPUT_TO_DISK printf("Writing output to %s/%s.\n", output_directory, output_filename); { const u16 output_channels = 3; write_image_file( (char *) debayer, output_filename, output_directory, WAMI_DEBAYER_IMG_NUM_COLS - 2*PAD, WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD, output_channels); } #endif FREE_AND_NULL(bayer); FREE_AND_NULL(debayer); #ifdef ENABLE_CORRECTNESS_CHECKING FREE_AND_NULL(gold_debayer); #endif return 0; }
PreconditionerBlockMS<space_type>::PreconditionerBlockMS(space_ptrtype Xh, // (u)x(p) ModelProperties model, // model std::string const& p, // prefix sparse_matrix_ptrtype AA, value_type relax ) // The matrix : M_backend(backend()), // the backend associated to the PC M_Xh( Xh ), M_Vh( Xh->template functionSpace<0>() ), // Potential M_Qh( Xh->template functionSpace<1>() ), // Lagrange M_Vh_indices( M_Vh->nLocalDofWithGhost() ), M_Qh_indices( M_Qh->nLocalDofWithGhost() ), M_uin( M_backend->newVector( M_Vh ) ), M_uout( M_backend->newVector( M_Vh ) ), M_pin( M_backend->newVector( M_Qh ) ), M_pout( M_backend->newVector( M_Qh ) ), U( M_Xh, "U" ), M_mass(M_backend->newMatrix(M_Vh,M_Vh)), M_L(M_backend->newMatrix(M_Qh,M_Qh)), M_er( 1. ), M_model( model ), M_prefix( p ), M_prefix_11( p+".11" ), M_prefix_22( p+".22" ), u(M_Vh, "u"), ozz(M_Vh, "ozz"), zoz(M_Vh, "zoz"), zzo(M_Vh, "zzo"), M_ozz(M_backend->newVector( M_Vh )), M_zoz(M_backend->newVector( M_Vh )), M_zzo(M_backend->newVector( M_Vh )), X(M_Qh, "X"), Y(M_Qh, "Y"), Z(M_Qh, "Z"), M_X(M_backend->newVector( M_Qh )), M_Y(M_backend->newVector( M_Qh )), M_Z(M_backend->newVector( M_Qh )), phi(M_Qh, "phi"), M_relax(relax) { tic(); LOG(INFO) << "[PreconditionerBlockMS] setup starts"; this->setMatrix( AA ); this->setName(M_prefix); /* Indices are need to extract sub matrix */ std::iota( M_Vh_indices.begin(), M_Vh_indices.end(), 0 ); std::iota( M_Qh_indices.begin(), M_Qh_indices.end(), M_Vh->nLocalDofWithGhost() ); M_11 = AA->createSubMatrix( M_Vh_indices, M_Vh_indices, true, true); /* Boundary conditions */ BoundaryConditions M_bc = M_model.boundaryConditions(); map_vector_field<FEELPP_DIM,1,2> m_dirichlet_u { M_bc.getVectorFields<FEELPP_DIM> ( "u", "Dirichlet" ) }; map_scalar_field<2> m_dirichlet_p { M_bc.getScalarFields<2> ( "phi", "Dirichlet" ) }; /* Compute the mass matrix (needed in first block, constant) */ auto f2A = form2(_test=M_Vh, _trial=M_Vh, _matrix=M_mass); auto f1A = form1(_test=M_Vh); f2A = integrate(_range=elements(M_Vh->mesh()), _expr=inner(idt(u),id(u))); // M for(auto const & it : m_dirichlet_u ) { LOG(INFO) << "Applying " << it.second << " on " << it.first << " for "<<M_prefix_11<<"\n"; f2A += on(_range=markedfaces(M_Vh->mesh(),it.first), _expr=it.second,_rhs=f1A, _element=u, _type="elimination_symmetric"); } /* Compute the L (= er * grad grad) matrix (the second block) */ auto f2L = form2(_test=M_Qh,_trial=M_Qh, _matrix=M_L); #if 0 //If you want to manage the relative permittivity materials per material, //here is the entry to deal with. for(auto it : M_model.materials() ) { f2L += integrate(_range=markedelements(M_Qh->mesh(),marker(it)), _expr=M_er*inner(gradt(phi), grad(phi))); } #else f2L += integrate(_range=elements(M_Qh->mesh()), _expr=M_er*inner(gradt(phi), grad(phi))); #endif auto f1LQ = form1(_test=M_Qh); for(auto const & it : m_dirichlet_p) { LOG(INFO) << "Applying " << it.second << " on " << it.first << " for "<<M_prefix_22<<"\n"; f2L += on(_range=markedfaces(M_Qh->mesh(),it.first),_element=phi, _expr=it.second, _rhs=f1LQ, _type="elimination_symmetric"); } toc( "[PreconditionerBlockMS] setup done ", FLAGS_v > 0 ); }
int main(int argc, char *argv[]) { if (argc < 4) { fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n"); fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]); exit(0); } /* Timing */ STATS_INIT("kernel", "pthread_porter_stemming"); PRINT_STAT_STRING("abrv", "pthread_stemmer"); NTHREADS = atoi(argv[1]); int WORDS = atoi(argv[2]); PRINT_STAT_INT("threads", NTHREADS); FILE *f = fopen(argv[3], "r"); if (f == 0) { fprintf(stderr, "File %s not found\n", argv[1]); exit(1); } stem_list = (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *)); int words = load_data(WORDS, stem_list, f); fclose(f); if (words < 0) goto out; PRINT_STAT_INT("words", words); tic(); int start, tids[NTHREADS]; pthread_t threads[NTHREADS]; pthread_attr_t attr; iterations = words / NTHREADS; sirius_pthread_attr_init(&attr); sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (int i = 0; i < NTHREADS; i++) { tids[i] = i; sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]); } for (int i = 0; i < NTHREADS; i++) { sirius_pthread_join(threads[i], NULL); } PRINT_STAT_DOUBLE("pthread_stemmer", toc()); STATS_END(); #ifdef TESTING f = fopen("../input/stem_porter.pthread", "w"); for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b); fclose(f); #endif out: sirius_free(s); // free up allocated data for (int i = 0; i < words; i++) { sirius_free(stem_list[i]->b); sirius_free(stem_list[i]); } return 0; }
int main(void) { std::string filePath = "CNN-DocTermCountMatrix.txt"; Matrix& X_Ori = loadMatrix(filePath); int NSample = min(20, X_Ori.getRowDimension()); Matrix& X = X_Ori.getSubMatrix(0, NSample - 1, 0, X_Ori.getColumnDimension() - 1); // disp(X.getSubMatrix(0, 10, 0, 100)); println(sprintf("%d samples loaded", X.getRowDimension())); GraphOptions& options = *new GraphOptions(); options.graphType = "nn"; std::string type = options.graphType; double NN = options.graphParam; fprintf("Graph type: %s with NN: %d\n", type.c_str(), (int)NN); // Parameter setting for text data options.kernelType = "cosine"; options.graphDistanceFunction = "cosine"; // Parameter setting for image data /*options.kernelType = "rbf"; options.graphDistanceFunction = "euclidean";*/ options.graphNormalize = true; options.graphWeightType = "heat"; bool show = true && !false; // Test adjacency function - pass tic(); std::string DISTANCEFUNCTION = options.graphDistanceFunction; Matrix& A = adjacency(X, type, NN, DISTANCEFUNCTION); fprintf("Elapsed time: %.2f seconds.\n", toc()); std::string adjacencyFilePath = "adjacency.txt"; saveMatrix(adjacencyFilePath, A); if (show) disp(A.getSubMatrix(0, 4, 0, 4)); // Test laplacian function - pass tic(); Matrix& L = laplacian(X, type, options); fprintf("Elapsed time: %.2f seconds.\n", toc()); std::string LaplacianFilePath = "Laplacian.txt"; saveMatrix(LaplacianFilePath, L); if (show) disp(L.getSubMatrix(0, 4, 0, 4)); // Test local learning regularization - pass NN = options.graphParam; std::string DISTFUNC = options.graphDistanceFunction; std::string KernelType = options.kernelType; double KernelParam = options.kernelParam; double lambda = 0.001; tic(); Matrix& LLR_text = calcLLR(X, NN, DISTFUNC, KernelType, KernelParam, lambda); fprintf("Elapsed time: %.2f seconds.\n", toc()); std::string LLRFilePath = "localLearningRegularization.txt"; saveMatrix(LLRFilePath, LLR_text); if (show) display(LLR_text.getSubMatrix(0, 4, 0, 4)); return EXIT_SUCCESS; }
int main(int argc, char** argv) { int err; // error code returned from api calls int* a = NULL; // input pointer int* results = NULL; // output pointer unsigned int correct; // number of correct results returned size_t global[2]; // global domain size for our calculation size_t local[2]; // local domain size for our calculation cl_platform_id platform_id; // platform id cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel kernel; // compute kernel char cl_platform_vendor[1001]; char cl_platform_name[1001]; cl_mem input_a; // device memory used for the input array //cl_mem input_b; // device memory used for the input array cl_mem output; // device memory used for the output array int inc; double t_start, t_end; if (argc != 2) { printf("%s <inputfile>\n", argv[0]); return EXIT_FAILURE; } // Connect to first platform // err = clGetPlatformIDs(1,&platform_id,NULL); if (err != CL_SUCCESS) { printf("Error: Failed to find an OpenCL platform!\n"); printf("Test failed\n"); return EXIT_FAILURE; } err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL); if (err != CL_SUCCESS) { printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n"); printf("Test failed\n"); return EXIT_FAILURE; } printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor); err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL); if (err != CL_SUCCESS) { printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n"); printf("Test failed\n"); return EXIT_FAILURE; } printf("CL_PLATFORM_NAME %s\n",cl_platform_name); // Connect to a compute device // int fpga = 0; #if defined (FPGA_DEVICE) fpga = 1; #endif err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to create a device group!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Create a compute context // context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("Error: Failed to create a compute context!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Create a command commands // commands = clCreateCommandQueue(context, device_id, 0, &err); if (!commands) { printf("Error: Failed to create a command commands!\n"); printf("Error: code %i\n",err); printf("Test failed\n"); return EXIT_FAILURE; } int status; // Create Program Objects // // Load binary from disk unsigned char *kernelbinary; char *xclbin=argv[1]; printf("loading %s\n", xclbin); int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary); if (n_i < 0) { printf("failed to load kernel from xclbin: %s\n", xclbin); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to load kernel from xclbin: %s\n", xclbin); } size_t n = n_i; // Create the compute program from offline program = clCreateProgramWithBinary(context, 1, &device_id, &n, (const unsigned char **) &kernelbinary, &status, &err); if ((!program) || (err!=CL_SUCCESS)) { printf("Error: Failed to create compute program from binary %d!\n", err); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to create compute program from binary %d!\n", err); } // Build the program executable // err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to build program executable!\n"); } // Create the compute kernel in the program we wish to run // kernel = clCreateKernel(program, "mmult", &err); if (!kernel || err != CL_SUCCESS) { printf("Error: Failed to create compute kernel!\n"); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to create compute kernel!\n"); } // Create the input and output arrays in device memory for our calculation // input_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * DATA_SIZE, NULL, NULL); output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * RESULT_SIZE, NULL, NULL); if (!input_a || !output) { printf("Error: Failed to allocate device memory!\n"); printf("Test failed\n"); return EXIT_FAILURE; } else { printf("Succeed to allocate device memory!\n"); } // set up socket printf("\n************* Welcome to UCLA FPGA agent! **********\n"); struct sockaddr_in stSockAddr; int SocketFD = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if(-1 == SocketFD) { perror("can not create socket"); exit(EXIT_FAILURE); } memset(&stSockAddr, 0, sizeof(stSockAddr)); stSockAddr.sin_family = AF_INET; stSockAddr.sin_port = htons(7000); stSockAddr.sin_addr.s_addr = htonl(INADDR_ANY); if(-1 == bind(SocketFD,(struct sockaddr *)&stSockAddr, sizeof(stSockAddr))) { perror("error bind failed"); close(SocketFD); exit(EXIT_FAILURE); } if(-1 == listen(SocketFD, 10)) { perror("error listen failed"); close(SocketFD); exit(EXIT_FAILURE); } int taskNum = -1; // polling setting timespec deadline; deadline.tv_sec = 0; deadline.tv_nsec = 100; // Get the start time timespec timer = tic( ); timespec socListenTime = diff(timer, timer); timespec socSendTime = diff(timer, timer); timespec socRecvTime = diff(timer, timer); timespec exeTime = diff(timer, timer); bool broadcastFlag = false; int packet_buf[PACKET_SIZE]; int time_buf[TIME_BUF_SIZE]; while (true) { //printf("\n************* Got a new task! *************\n"); timer = tic(); int ConnectFD = accept(SocketFD, NULL, NULL); if (!broadcastFlag) { broadcastFlag = true; timer = tic(); } // For profiling only //struct timeval tv; //gettimeofday(&tv, NULL); //double time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond //printf("Receive time (ms): %lf\n", time_in_mill); accTime (&socListenTime, &timer); if(0 > ConnectFD) { perror("error accept failed"); close(SocketFD); exit(EXIT_FAILURE); } read(ConnectFD, &packet_buf, PACKET_SIZE * sizeof(int)); // send FPGA stats back to java application if(packet_buf[0] == -1) { // for profiling use collect_timer_stats(ConnectFD, &socListenTime, &socSendTime, &socRecvTime, &exeTime, &timer); broadcastFlag = false; continue; } char* shm_addr; int shmid = -1; int data_size = -1; // data sent to FPGA (unit: int) shmid = packet_buf[0]; data_size = packet_buf[1]; printf("Shmid: %d, Data size (# of int): %d\n", shmid, data_size); // shared memory if((shm_addr = (char *) shmat(shmid, NULL, 0)) == (char *) -1) { perror("Server: shmat failed."); exit(1); } //else //printf("Server: attach shared memory: %p\n", shm_addr); int done = 0; while(done == 0) { done = (int) *((int*)shm_addr); clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL); } //printf("Copy data to the array in the host\n"); a = (int *)(shm_addr + FLAG_NUM * sizeof(int)); results = (int *)(shm_addr + FLAG_NUM * sizeof(int)); accTime (&socSendTime, &timer); taskNum = a[2]; for (int i=0; i<taskNum; i++) { int tmp = *(a+8+i*8+7); assert(tmp >=0 && tmp < TOTAL_TASK_NUMS); } printf("Task Num: %d\n", taskNum); //printf("\nparameter recieved --- \n"); //Write our data set into the input array in device memory //printf("Write data from host to FPGA\n"); err = clEnqueueWriteBuffer(commands, input_a, CL_TRUE, 0, sizeof(int) * data_size, a, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to source array a!\n"); printf("Test failed\n"); return EXIT_FAILURE; } // Set the arguments to our compute kernel // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err |= clSetKernelArg(kernel, 2, sizeof(int), &taskNum); if (err != CL_SUCCESS) { printf("Error: Failed to set kernel arguments! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device // //printf("Enqueue Task\n"); err = clEnqueueTask(commands, kernel, 0, NULL, NULL); if (err) { printf("Error: Failed to execute kernel! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } // Read back the results from the device to verify the output // cl_event readevent; //printf("Enqueue read buffer\n"); err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(int) * FPGA_RET_PARAM_NUM * taskNum, results, 0, NULL, &readevent ); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); printf("Test failed\n"); return EXIT_FAILURE; } //printf("Wait for FPGA results\n"); clWaitForEvents(1, &readevent); accTime(&exeTime, &timer); // Get the execution time //toc(&timer); // put data back to shared memory //printf("Put data back to the shared memory\n"); *((int*)(shm_addr + sizeof(int))) = DONE; //printf("\n************* Task finished! *************\n"); if (-1 == shutdown(ConnectFD, SHUT_RDWR)) { perror("can not shutdown socket"); close(ConnectFD); close(SocketFD); exit(EXIT_FAILURE); } close(ConnectFD); //printf("done\n"); // free the shared memory shmdt(shm_addr); //shmctl(shmid, IPC_RMID, 0); accTime(&socRecvTime, &timer); printf("**********timing begin**********\n"); printTimeSpec(socListenTime); printTimeSpec(socSendTime); printTimeSpec(socRecvTime); printTimeSpec(exeTime); printf("**********timing end**********\n\n"); } close(SocketFD); // Shutdown and cleanup // clReleaseMemObject(input_a); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); return EXIT_SUCCESS; }
int main(int argc, char** argv) { cl_context context = 0; cl_command_queue commandQueue = 0; cl_program program = 0; cl_device_id device = 0; cl_kernel kernel = 0; cl_int status; char filename[] = "../../kernels/VectorUpdate_vec_kernel.cl"; char filename2[] = "../../common/types_kernel.h"; int profiling_info = 0; cl_event myEvent, myEvent2; if( argc != 4 ) { printf("Usage: %s vector_file1 vector_file2 alpha\n", argv[0]); return EXIT_FAILURE; } char xfilename[50]; char yfilename[50]; real alpha; strcpy(xfilename, argv[1]); strcpy(yfilename, argv[2]); alpha = strtod(argv[3], NULL); #ifdef PROFILE cl_ulong startTime, endTime, startTime2, endTime2; cl_ulong kernelExecTimeNs, readFromGpuTime; profiling_info = 1; #endif /* READING DATA FROM FILE */ real *x; real *y; real *ref_x; int N, M, N4; std::ifstream xfile; xfile.open (xfilename, std::ios::in); if (!xfile.is_open()) { printf("Error: cannot open file\n"); return EXIT_FAILURE; } xfile >> N; // it must be N%4 == 0 N4 = ((N & (4-1)) == 0) ? N : N+(4-(N&3)); HANDLE_ALLOC_ERROR(x = (real*)malloc(N4*sizeof(real))); for( int i = 0; i < N; i++) xfile >> x[i]; for(int i = N; i < N4; ++i) x[i] = 0; xfile.close(); // needed for checking result HANDLE_ALLOC_ERROR(ref_x = (real*)malloc(N*sizeof(real))); memcpy(ref_x, x, N*sizeof(real)); std::ifstream yfile; yfile.open (yfilename, std::ios::in); if (!yfile.is_open()) { printf("Error: cannot open file\n"); return EXIT_FAILURE; } yfile >> M; assert(N==M); HANDLE_ALLOC_ERROR(y = (real*)malloc(N4*sizeof(real))); for( int i = 0; i < N; i++) yfile >> y[i]; for(int i = N; i < N4; ++i) y[i] = 0; yfile.close(); int Ndev4 = N4/4; TIME start = tic(); TIME init = tic(); // Create an OpenCL context context = CreateContext(); if(context == NULL) { std::cerr << "Failed to create OpenCL context." << std::endl; Cleanup(context, commandQueue, program, kernel); return EXIT_FAILURE; } // Create a command queue commandQueue = CreateCommandQueue(context, &device, profiling_info); if(commandQueue == NULL) { std::cerr << "Failed to create OpenCL command queue." << std::endl; Cleanup(context, commandQueue, program, kernel); return EXIT_FAILURE; } // Create OpenCL program program = CreateProgram(context, device, filename, filename2); if (program == NULL) { Cleanup(context, commandQueue, program, kernel); return EXIT_FAILURE; } // Create OpenCL kernel kernel = clCreateKernel(program, "VectorUpdate", NULL); if(kernel == NULL) { std::cerr << "Failed to create kernel." << std::endl; Cleanup(context, commandQueue, program, kernel); return EXIT_FAILURE; } printf("%lf\n",toc(init)); /* QUERYING DEVICE INFO */ size_t kernelWorkGroupSize; // maximum work-group size that can be used to execute a kernel size_t sizeOfWarp; // the preferred multiple of workgroup size for launch cl_ulong localMemSize; // the amount of local memory in bytes being used by a kernel cl_ulong privateMemSize; // the minimum amount of private memory, in bytes, used by each workitem in the kernel. HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, NULL)); HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &sizeOfWarp, NULL)); #ifdef PRINT_INFO HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemSize, NULL)); HANDLE_OPENCL_ERROR(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), &privateMemSize, NULL)); #endif #ifdef PRINT_INFO printf("------------ Some info: --------------\n"); printf("kernelWorkGroupSize = %lu \n", kernelWorkGroupSize); printf("sizeOfWarp = %lu \n", sizeOfWarp); printf("localMemSize = %lu \n", localMemSize); printf("privateMemSize = %lu \n", privateMemSize); printf("------------------------ --------------\n"); #endif if( WORK_GROUP_SIZE > kernelWorkGroupSize ) { printf("Error: wrong work group size\n"); return EXIT_FAILURE; } size_t localWorkSize[1] = {WORK_GROUP_SIZE}; int numWorkGroups = (Ndev4-1)/WORK_GROUP_SIZE+1; size_t globalWorkSize[1] = {numWorkGroups*WORK_GROUP_SIZE}; TIME t = tic(); cl_mem DEV_x = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(real)*N4, x, &status); HANDLE_OPENCL_ERROR(status); cl_mem DEV_y = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(real)*N4, y, &status); HANDLE_OPENCL_ERROR(status); int n = 0; status = clSetKernelArg(kernel, n++, sizeof(cl_mem), (void*)&DEV_x); status |= clSetKernelArg(kernel, n++, sizeof(cl_mem), (void*)&DEV_y); status |= clSetKernelArg(kernel, n++, sizeof(real), (void*)&alpha); status |= clSetKernelArg(kernel, n++, sizeof(int), (void*)&Ndev4); HANDLE_OPENCL_ERROR(status); printf("%lf\n",toc(t)); // Queue the kernel HANDLE_OPENCL_ERROR(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &myEvent)); // Read the output buffer back to the Host HANDLE_OPENCL_ERROR(clEnqueueReadBuffer(commandQueue, DEV_x, CL_TRUE, 0, N4*sizeof(real), x, 0, NULL, &myEvent2)); clFinish(commandQueue); // wait for all events to finish double elapsed_time = toc(start); /* CHECK RESULT */ TIME start_seq = tic(); for (int i = 0; i < N; i++) ref_x[i] += alpha*y[i]; double elapsed_time_seq = toc(start_seq); assert(ref_x[10] < 1000000); //std::cout << ref_x[0] << " " << x[0] << std::endl; // for (int i = 0; i < N; i++) // assert( abs(x[i] - ref_x[i]) < TOL ); //std::cout << "Verified..." << std::endl; #ifdef PROFILE clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL); clGetEventProfilingInfo(myEvent2, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime2, NULL); clGetEventProfilingInfo(myEvent2, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime2, NULL); kernelExecTimeNs = endTime-startTime; readFromGpuTime = endTime2-startTime2; printf(/*"Kernel execution time: %lf\n"*/"%lf\n", (double)readFromGpuTime/1000000000.0); printf(/*"Kernel execution time: %lf\n"*/"%lf\n", (double)kernelExecTimeNs/1000000000.0); #endif printf(/*"Total execution time: %lf\n"*/"%lf\n", elapsed_time); printf(/*"Total execution time (seq.):*/"%lf\n", elapsed_time_seq); Cleanup(context, commandQueue, program, kernel); free(x); free(y); clReleaseMemObject(DEV_x); clReleaseMemObject(DEV_y); return EXIT_SUCCESS; }
//Main tracking Algorithm void AnalysisModule::larvaFind(uchar * img, int imWidth, int imHeight, int frameInd){ input = cv::Mat(imHeight,imWidth,CV_8UC1,NULL); input.data = img; if(output.rows != imHeight | output.cols != imWidth) output.create(imHeight,imWidth,CV_8UC1); int nextInd = (index+1)%sampleInd.size(); //for Profiling tic(); sampleInd[nextInd] = frameInd; sampleTime[nextInd] = frameInd * frameIntervalMS; //On first image, automatically determine threshold level using the Otsu method // Minimizes within group variance of thresholded classes. Should land on the best boundary between backlight and larva if(index == -1) threshold = otsuThreshold(img,imWidth*imHeight); //Can speed this up by applying to a roi bounding box a bit larger than the previous one //Simple inverted binary threshold of the image cv::threshold(input,output,threshold,255,CV_THRESH_BINARY_INV); profile[0] = toctic(); //Detect Contours in the binary image cv::findContours(output,contours,CV_RETR_EXTERNAL,CV_CHAIN_APPROX_NONE); profile[1] = toctic(); //No contours detected if (contours.size() == 0) { return; } //find contour with largest perimeter length double maxLen = 0; int maxInd = -1; double cLen; for(int i=0; i<contours.size(); i++){ cLen = cv::arcLength(cv::Mat(contours[i]), false); if(cLen >= maxLen){ maxLen = cLen; maxInd = i; }; } //Check to make sure that the perimeter is a larva by simple size analysis //(larva should have a certain perimeter length at 8.1um/pixel) cLarva[nextInd] = contours[maxInd]; //calculate bounding box bBox[nextInd] = cv::boundingRect(cv::Mat(cLarva[nextInd])); profile[2] = toctic(); //Calculate fourier coefficients fourierDecompose(cLarva[nextInd],nFourier,fourier[nextInd]); centroid[nextInd] = cv::Point2f(fourier[nextInd][0][AX],fourier[nextInd][0][AY]); profile[3] = toctic(); //Reconstruct the estimated boundary fourierReconstruct(fourier[nextInd],cFit,fitRes); profile[4] = toctic(); //Calculate Curvature perimeterCurvature(cFit,curve,fitRes/8); profile[5] = toctic(); //Find head and tail based on curvature minimums (small angle = sharp region) findHeadTail(cFit,curve,headTail); head[nextInd] = headTail[0]; tail[nextInd] = headTail[1]; profile[6] = toctic(); //Calculate Skeleton skeletonCalc(cFit,skeleton,headTail,length[nextInd],neck[nextInd]); profile[7] = toctic(); //Calculate bearing and head angle to bearing bodyAngles(tailBearingAngle[nextInd], headToBodyAngle[nextInd], head[nextInd], neck[nextInd], tail[nextInd]); profile[8] = toctic(); //Capture stage position stagePos[nextInd] = cv::Point(gui->stageThread->xpos,gui->stageThread->ypos); //Keep track of entire history with a sample every 30 frames if((nextInd % 30) == 0){ fullTrack[(fullTrackInd+1)%fullTrack.size()].x = stagePos[nextInd].x/gui->stageThread->tickPerMM_X+centroid[nextInd].x*gui->camThread->umPerPixel/1000.0; fullTrack[(fullTrackInd+1)%fullTrack.size()].y = stagePos[nextInd].y/gui->stageThread->tickPerMM_Y+centroid[nextInd].y*gui->camThread->umPerPixel/1000.0; fullTrackStim[(fullTrackInd+1)%fullTrack.size()] = binStimMax; binStimMax = 0; //updated from stimThread fullTrackInd++; } //Calculate Velocities of head and tail calcVelocities(nextInd); //Spew out profiling info //for(int i=0; i<9; i++) qDebug("%d: %.4fms",i,profile[i]*1000); //qDebug("\n"); index++; };
int main(int argc, char*argv[]) { // 6 config, each has three files to read char *files[] = { "../resources/config_32N32M_B.txt", "../resources/config_32N32M_A.txt", "../resources/config_32N32M_prior.txt", "../resources/config_64N64M_B.txt", "../resources/config_64N64M_A.txt", "../resources/config_64N64M_prior.txt", "../resources/config_128N128M_B.txt", "../resources/config_128N128M_A.txt", "../resources/config_128N128M_prior.txt", "../resources/config_256N256M_B.txt", "../resources/config_256N256M_A.txt", "../resources/config_256N256M_prior.txt", "../resources/config_512N512M_B.txt", "../resources/config_512N512M_A.txt", "../resources/config_512N512M_prior.txt", "../resources/config_1024N1024M_B.txt", "../resources/config_1024N1024M_A.txt", "../resources/config_1024N1024M_prior.txt", }; // variables int i,j,k; int Len; int debug=0; int job=0; // select job frome commmand line int argi; if (argc == 1) { puts("Please specify an option.\nUsage: \"./ocl_fo -job number(0-5) \"\n"); exit(1); } for (argi = 1; argi < argc; ++argi) { if (!strcmp(argv[argi], "-job")) { need_argument(argc, argv,argi); job = atoi(argv[++argi]) ; continue; } if (argv[argi][0] == '-') { fatal("'%s' is not a valid command-line option.\n",argv[argi]); } } //printf("job = %d\n", job); if( job > 5) { printf("Job number exceeds the limit 5! Exit Programm!\n"); exit(1); } HMM *word; word = (HMM*)malloc(sizeof(HMM)); Len = getLineNum(files[job*3+2]); printf("config_%dN_%dM\n",Len, Len); //read B,A,prior printf("Read the following files..."); //read_config(files,job,B,A,prior,Len); read_config(word,files,job,Len,Len); printf("Done!\n"); if( debug && job == 0 ) { puts("a"); check_a(word); puts("b"); check_b(word); puts("pri"); check_pri(word); } //---------------------- // run forward algorithm //---------------------- //--------------------------- // GPU Version //--------------------------- run_opencl_fo(word); //--------------------------- // CPU Version //--------------------------- puts("\n=>CPU"); struct timeval cpu_timer; int N = word->nstates; int T = word->len; float *B = word->b; float *A = word->a; float *prior = word->pri; double tmp, alpha_sum; double log_likelihood; float *alpha; // NxT alpha = (float*)malloc(sizeof(float)*N*T); float *A_t; // NxN A_t = (float*)malloc(sizeof(float)*N*N); log_likelihood = 0.0; // start timing tic(&cpu_timer); transpose(A, A_t, N, T); for(j=0;j<T;++j) { alpha_sum = 0.0; if(j==0){ // initialize for(i=0;i<N;++i){ alpha[i*T + 0] = B[i*T + 0] * prior[i]; alpha_sum += alpha[i*T + 0]; } }else{ // move forward for(i=0;i<N;++i) { // go through each state tmp = 0.0; for(k=0;k<N;++k){ tmp += A_t[i*N + k] * alpha[k*T + j-1]; } alpha[i*T + j] = (float)tmp * B[i*T + j]; alpha_sum += alpha[i*T + j]; } } // scaling for(i=0;i<N;++i){ alpha[i*T + j] /= alpha_sum; } log_likelihood += log(alpha_sum); } // end timing toc(&cpu_timer); printf("log_likelihood = %lf\n", log_likelihood); // free memory free_hmm(word); free(A_t); free(alpha); return 0; }
int main(int argc, char* argv[]) { #if 0 Stack *stack = Read_Stack("../data/binimg.tif"); Set_Matlab_Path("/Applications/MATLAB74/bin/matlab"); Stack *dist = Stack_Bwdist(stack); Stack* seeds = Stack_Local_Max(dist, NULL, STACK_LOCMAX_ALTER1); Stack *out = Scale_Double_Stack((double *) dist->array, stack->width, stack->height, stack->depth, GREY); Translate_Stack(out, COLOR, 1); Rgb_Color color; Set_Color(&color, 255, 0, 0); Stack_Label_Bwc(out, seeds, color); Print_Stack_Info(dist); Write_Stack("../data/test.tif", out); #endif #if 0 Stack *stack = Read_Stack("../data/benchmark/sphere_bw.tif"); //Stack *stack = Read_Stack("../data/sphere_data.tif"); //Stack_Not(stack, stack); int i; /* uint8 *array = stack->array + 512 * 600; for (i = 1; i < 512; i++) { array[i] = 1; } */ //stack->depth = 50; /* long int *label = (long int *) malloc(sizeof(long int) * Stack_Voxel_Number(stack)); */ tic(); Stack *out = Stack_Bwdist_L_U16(stack, NULL, 0); uint16 *out_array = (uint16 *) out->array; printf("%llu\n", toc()); //int *hist = Stack_Hist(out); //Print_Int_Histogram(hist); Stack *out2 = Stack_Bwdist_L(stack, NULL, NULL); float *out2_array = (float *) out2->array; int n = Stack_Voxel_Number(out); int t = 0; int x, y, z; for (i = 0; i < n; i++) { uint16 d2 = (uint16) out2_array[i]; if (out_array[i] != d2){ int area = stack->width * stack->height; STACK_UTIL_COORD(i, stack->width, area, x, y, z); printf("(%d %d %d)", x, y, z); printf("%d %d %d\n", out_array[i], d2, stack->array[i]); t++; } } printf("%d error\n", t); # if 0 //Translate_Stack(out, GREY, 1); float *out_array = (float *) out->array; int i; int n = Stack_Voxel_Number(out); /* for (i = 0; i < n; i++) { out_array[i] = sqrt(out_array[i]); } Stack *out2 = Scale_Float_Stack((float *)out->array, out->width, out->height, out->depth, GREY); */ Stack *out2 = Make_Stack(GREY, out->width, out->height, out->depth); for (i = 0; i < n; i++) { out2->array[i] = (uint8) round(sqrt(out_array[i])); } Write_Stack("../data/test.tif", out2); # endif Write_Stack("../data/test.tif", out); Kill_Stack(out); Kill_Stack(out2); #endif #if 1 Stack *stack = Read_Stack("../data/system/29.tif"); Print_Stack_Info(stack); tic(); Stack *out = Stack_Bwdist_L_U16P(stack, NULL, 0); ptoc(); Stack *golden = Read_Stack("../data/system/29_dist2.tif"); printf("Checking result ...\n"); if (Stack_Identical(out, golden) == FALSE) { printf("Result unmatched.\n"); } else { printf("Good.\n"); } #endif return 0; }
int test_7(){ idxint n = 15; idxint m = 29; pfloat feas_Gx[120] = {9999,-9999,9999,-9999,9999,-9999,9999,-9999,9999,-9999,-3.5008,3.5008,-0.4504,0.4504,-0.8764999999999999,0.8764999999999999,-0.1088,0.1088,1,1,-1,-8.4095,8.4095,-1.0107,1.0107,-1.686,1.686,-0.3525,0.3525,1,1,-1,-15.1987,15.1987,-2.0203,2.0203,-2.3932,2.3932,-0.6233,0.6233,1,1,-1,-22.5405,22.5405,-3.1862,3.1862,-2.8749,2.8749,-0.7923,0.7923,1,1,-1,-29.2639,29.2639,-4.3096,4.3096,-3.0189,3.0189,-0.8116,0.8116,1,1,-1,3.5008,-3.5008,0.4504,-0.4504,0.8764999999999999,-0.8764999999999999,0.1088,-0.1088,1,1,-1,8.4095,-8.4095,1.0107,-1.0107,1.686,-1.686,0.3525,-0.3525,1,1,-1,15.1987,-15.1987,2.0203,-2.0203,2.3932,-2.3932,0.6233,-0.6233,1,1,-1,22.5405,-22.5405,3.1862,-3.1862,2.8749,-2.8749,0.7923,-0.7923,1,1,-1,29.2639,-29.2639,4.3096,-4.3096,3.0189,-3.0189,0.8116,-0.8116,1,1,-1}; idxint feas_Gp[16] = {0,2,4,6,8,10,21,32,43,54,65,76,87,98,109,120}; idxint feas_Gi[120] = {8,9,10,11,12,13,14,15,16,17,0,1,2,3,4,5,6,7,8,18,19,0,1,2,3,4,5,6,7,10,18,20,0,1,2,3,4,5,6,7,12,18,21,0,1,2,3,4,5,6,7,14,18,22,0,1,2,3,4,5,6,7,16,18,23,0,1,2,3,4,5,6,7,9,18,24,0,1,2,3,4,5,6,7,11,18,25,0,1,2,3,4,5,6,7,13,18,26,0,1,2,3,4,5,6,7,15,18,27,0,1,2,3,4,5,6,7,17,18,28}; pfloat feas_c[15] = {0,0,0,0,0,0.127,0.9134,0.6324,0.0975,0.2785,0.873,0.0866,0.3676,0.9025,0.7215}; pfloat feas_h[29] = {-729.9349999999999,789.9349999999999,-71.015,131.015,-89.66,149.66,-1.165,61.165,9999,0,9999,0,9999,0,9999,0,9999,0,150,0,0,0,0,0,0,0,0,0,0}; idxint bool_idx[5] = {0,1,2,3,4}; /* Answer: */ pfloat x[15] = {0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,32.383266,0.00,0.00,0.00, 0.00,0.00,0.00}; pfloat x2[15] = {0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,86.798858, 0.000000,0.000000,0.000000}; idxint i, ret_code, pass; timer t; ecos_bb_pwork* prob = ECOS_BB_setup( n, m, 0, m, 0, NULL, 0, feas_Gx, feas_Gp, feas_Gi, NULL, NULL, NULL, feas_c, feas_h, NULL, 5, bool_idx, 0, NULL, NULL); prob->stgs->verbose = 0; tic(&t); ret_code = ECOS_BB_solve(prob); pfloat msRuntime = toc(&t); pass = 1; printf("Soln: "); for (i=5; i<n; ++i){ pass &= float_eqls(x[i] ,prob->x[i], prob->stgs->integer_tol ); printf("%f ", prob->x[i]); } printf("\nRuntime: %f\n", msRuntime); updateDataEntry_h(prob, 0, 789.935); updateDataEntry_h(prob, 1, -729.935); updateDataEntry_h(prob, 2, 131.015); updateDataEntry_h(prob, 3, -71.015); updateDataEntry_h(prob, 4, 149.66); updateDataEntry_h(prob, 5, -89.66); updateDataEntry_h(prob, 6, 61.165); updateDataEntry_h(prob, 7, -1.165); tic(&t); ret_code = ECOS_BB_solve(prob); msRuntime = toc(&t); printf("Soln2: "); for (i=5; i<n; ++i){ pass &= float_eqls(x2[i] ,prob->x[i], prob->stgs->integer_tol ); printf("%f ", prob->x[i]); } printf("\nRuntime: %f\n", msRuntime); ECOS_BB_cleanup(prob, 0); return pass; }
Timer() { tic(); TicksPerSeconds = clock::duration::period::den / clock::duration::period::num; }
/* Cholesky update/downdate */ int demo3 (problem *Prob) { cs *A, *C, *W = NULL, *WW, *WT, *E = NULL, *W2 ; int n, k, *Li, *Lp, *Wi, *Wp, p1, p2, *p = NULL, ok ; double *b, *x, *resid, *y = NULL, *Lx, *Wx, s, t, t1 ; css *S = NULL ; csn *N = NULL ; if (!Prob || !Prob->sym || Prob->A->n == 0) return (0) ; A = Prob->A ; C = Prob->C ; b = Prob->b ; x = Prob->x ; resid = Prob->resid; n = A->n ; if (!Prob->sym || n == 0) return (1) ; rhs (x, b, n) ; /* compute right-hand side */ printf ("\nchol then update/downdate ") ; print_order (1) ; y = cs_malloc (n, sizeof (double)) ; t = tic () ; S = cs_schol (1, C) ; /* symbolic Chol, amd(A+A') */ printf ("\nsymbolic chol time %8.2f\n", toc (t)) ; t = tic () ; N = cs_chol (C, S) ; /* numeric Cholesky */ printf ("numeric chol time %8.2f\n", toc (t)) ; if (!S || !N || !y) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ printf ("solve chol time %8.2f\n", toc (t)) ; printf ("original: ") ; print_resid (1, C, x, b, resid) ; /* print residual */ k = n/2 ; /* construct W */ W = cs_spalloc (n, 1, n, 1, 0) ; if (!W) return (done3 (0, S, N, y, W, E, p)) ; Lp = N->L->p ; Li = N->L->i ; Lx = N->L->x ; Wp = W->p ; Wi = W->i ; Wx = W->x ; Wp [0] = 0 ; p1 = Lp [k] ; Wp [1] = Lp [k+1] - p1 ; s = Lx [p1] ; srand (1) ; for ( ; p1 < Lp [k+1] ; p1++) { p2 = p1 - Lp [k] ; Wi [p2] = Li [p1] ; Wx [p2] = s * rand () / ((double) RAND_MAX) ; } t = tic () ; ok = cs_updown (N->L, +1, W, S->parent) ; /* update: L*L'+W*W' */ t1 = toc (t) ; printf ("update: time: %8.2f\n", t1) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; p = cs_pinv (S->pinv, n) ; W2 = cs_permute (W, p, NULL, 1) ; /* E = C + (P'W)*(P'W)' */ WT = cs_transpose (W2,1) ; WW = cs_multiply (W2, WT) ; cs_spfree (WT) ; cs_spfree (W2) ; E = cs_add (C, WW, 1, 1) ; cs_spfree (WW) ; if (!E || !p) return (done3 (0, S, N, y, W, E, p)) ; printf ("update: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, E, x, b, resid) ; /* print residual */ cs_nfree (N) ; /* clear N */ t = tic () ; N = cs_chol (E, S) ; /* numeric Cholesky */ if (!N) return (done3 (0, S, N, y, W, E, p)) ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("rechol: time: %8.2f (incl solve) ", t) ; print_resid (1, E, x, b, resid) ; /* print residual */ t = tic () ; ok = cs_updown (N->L, -1, W, S->parent) ; /* downdate: L*L'-W*W' */ t1 = toc (t) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; printf ("downdate: time: %8.2f\n", t1) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("downdate: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, C, x, b, resid) ; /* print residual */ return (done3 (1, S, N, y, W, E, p)) ; }
PreconditionerAS<space_type,coef_space_type>::PreconditionerAS( std::string t, space_ptrtype Xh, coef_space_ptrtype Mh, BoundaryConditions bcFlags, std::string const& p, sparse_matrix_ptrtype Pm, double k ) : M_type( AS ), M_Xh( Xh ), M_Vh(Xh->template functionSpace<0>() ), M_Qh(Xh->template functionSpace<1>() ), M_Mh( Mh ), M_Vh_indices( M_Vh->nLocalDofWithGhost() ), M_Qh_indices( M_Qh->nLocalDofWithGhost() ), M_Qh3_indices( Dim ), A(backend()->newVector(M_Vh)), B(backend()->newVector(M_Vh)), C(backend()->newVector(M_Vh)), M_r(backend()->newVector(M_Vh)), M_r_t(backend()->newVector(M_Vh)), M_uout(backend()->newVector(M_Vh)), M_diagPm(backend()->newVector(M_Vh)), //M_t(backend()->newVector(M_Vh)), U( M_Vh, "U" ), M_mu(M_Mh, "mu"), M_er(M_Mh, "er"), M_bcFlags( bcFlags ), M_prefix( p ), M_k(k), M_g(1.-k*k) { tic(); LOG(INFO) << "[PreconditionerAS] setup starts"; this->setMatrix( Pm ); // Needed only if worldComm > 1 // QH3 : Lagrange vectorial space type M_Qh3 = lag_v_space_type::New(Xh->mesh()); M_qh3_elt = M_Qh3->element(); M_qh_elt = M_Qh->element(); M_vh_elt = M_Vh->element(); // Block 11.1 M_s = backend()->newVector(M_Qh3); M_y = backend()->newVector(M_Qh3); // Block 11.2 M_z = backend()->newVector(M_Qh); M_t = backend()->newVector(M_Qh); // Create the interpolation and keep only the matrix auto pi_curl = I(_domainSpace=M_Qh3, _imageSpace=M_Vh); auto Igrad = Grad( _domainSpace=M_Qh, _imageSpace=M_Vh); M_P = pi_curl.matPtr(); M_C = Igrad.matPtr(); M_Pt = backend()->newMatrix(M_Qh3,M_Vh); M_Ct = backend()->newMatrix(M_Qh3,M_Vh); M_P->transpose(M_Pt,MATRIX_TRANSPOSE_UNASSEMBLED); M_C->transpose(M_Ct,MATRIX_TRANSPOSE_UNASSEMBLED); LOG(INFO) << "size of M_C = " << M_C->size1() << ", " << M_C->size2() << std::endl; LOG(INFO) << "size of M_P = " << M_P->size1() << ", " << M_P->size2() << std::endl; // Create vector of indices to create subvectors/matrices std::iota( M_Vh_indices.begin(), M_Vh_indices.end(), 0 ); // Vh indices in Xh std::iota( M_Qh_indices.begin(), M_Qh_indices.end(), M_Vh->nLocalDofWithGhost() ); // Qh indices in Xh // "Components" of Qh3 auto Qh3_dof_begin = M_Qh3->dof()->dofPointBegin(); auto Qh3_dof_end = M_Qh3->dof()->dofPointEnd(); int dof_comp, dof_idx; for( auto it = Qh3_dof_begin; it!= Qh3_dof_end; it++ ) { dof_comp = it->template get<2>(); //Component dof_idx = it->template get<1>(); //Global index M_Qh3_indices[dof_comp].push_back( dof_idx ); } // Subvectors for M_y (per component) M_y1 = M_y->createSubVector(M_Qh3_indices[0], true); M_y2 = M_y->createSubVector(M_Qh3_indices[1], true); #if FEELPP_DIM == 3 M_y3 = M_y->createSubVector(M_Qh3_indices[2], true); #endif // Subvectors for M_s (per component) M_s1 = M_y->createSubVector(M_Qh3_indices[0], true); M_s2 = M_y->createSubVector(M_Qh3_indices[1], true); #if FEELPP_DIM == 3 M_s3 = M_y->createSubVector(M_Qh3_indices[2], true); #endif this->setType ( t ); toc( "[PreconditionerAS] setup done ", FLAGS_v > 0 ); }
static double toc (double t) { double s = tic () ; return (CS_MAX (0, s-t)) ; }
/** * Iterates the ACWE for several iterations using 1 or more bands * @param numIterations * @param useAllBands */ void ActiveContours::iterate(int numIterations, bool useAllBands) { //Default origin and region to copy the entire region of the 3D texture dout << "Initializing origin and region with " << width << "," << height << "," << depth << endl; origin.push_back(0); origin.push_back(0); origin.push_back(0); region.push_back(width); region.push_back(height); region.push_back(depth); cl::CommandQueue* queue = clMan.getQueue(); // Only used if we are printing the buffers. It defines the slides that we are going to print int* slidesToPrint = new int[3]; slidesToPrint[0] = 9; slidesToPrint[1] = 10; slidesToPrint[2] = 11; int sizeOfArray = 3; try { err = queue->enqueueAcquireGLObjects(&cl_textures, NULL, &evAcOGL); queue->finish(); if (currIter == 0) { //Copying img_in_gl to buf_img_in cl::Event evCopyInGlToIn; tic(tm_copyGlToBuffer); dout << "Copying input texture (img_in_gl) to cl_buffer buf_img_in" << endl; vecEvPrevTextToBuffer.push_back(evAcOGL); queue->enqueueCopyImageToBuffer(img_in_gl, buf_img_in, origin, region, (size_t)0, &vecEvPrevTextToBuffer,&evCopyInGlToIn); toc(tm_copyGlToBuffer); if (WRITE) {//Writes the init image on the temporal folder // Sets the precision of cout to 2 cout << std::setprecision(3) << endl; vecEvPrevPrinting.push_back(evCopyInGlToIn); res = queue->enqueueReadBuffer(buf_img_in, CL_TRUE, 0, sizeof (float) *width*height*depth, (void*) arr_img_out, &vecEvPrevPrinting, 0); bool normalized_values = 1; dout << "Done copying texture to buffer.... writing result to images/temp_results/InputImage/" << endl; ImageManager::write3DImage((char*) "images/temp_results/InputImage/", arr_img_out, width, height,depth, normalized_values); /* Just to test that the TEXTURE is being copied to img_in_gl correctly*/ /* int rowSize = sizeof(float)*width; res = queue->enqueueReadImage(img_in_gl, CL_FALSE, origin, region, (size_t) rowSize , (size_t) (rowSize*height), (void*) arr_img_out, &vecEvPrevPrinting, 0); queue->finish(); //Finish everything before the iterations ImageManager::write3DImage((char*) "images/temp_results/3dTexture/", arr_img_out, width, height,depth, normalized_values); queue->finish(); //Finish everything before the iterations dout << "Writing done!!!!" << endl; */ } vecEvPrevAvgInOut.push_back(evCopyInGlToIn); //For the first iteration we need to wait to copy the texture }//If iter == 0 //Compute the last iteration of this 'round' int lastIter = min(currIter + numIterations, totalIterations); // -------------------- MAIN Active Countours iteration for (; currIter < lastIter; currIter++) { if (currIter % ITER == 0) { dout << endl << endl << "******************** Iter " << currIter << " ******************** " << endl; } tic(tm_avgInOut); evAvgInOut_SmoothPhi = compAvgInAndOut(buf_phi, buf_img_in, vecEvPrevAvgInOut); toc(tm_avgInOut); if (WRITE) {// Prints the previous values of phi cout << endl << "----------- Previous Phi ------------" << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi); printBuffer(buf_phi, width*height, width*height*9, width, height, vecEvPrevPrinting); printBuffer(buf_phi, width*height, width*height*16, width, height, vecEvPrevPrinting); printBuffer(buf_phi, width*height, width*height*28, width, height, vecEvPrevPrinting); } if (WRITE) {// Gets the final average values obtained cout << endl << "----------- Final Average (avg out, avg in, count out, count in, sum out, sum in)------------" << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi); printBuffer(buf_avg_in_out, 6, vecEvPrevPrinting); } //It computes the curvatue and F values, the curvature is stored on the first layer //and the F values are stored on the second layer tic(tm_curvature); evCurvature_copySmoothToPhi = compCurvature(vecEvPrevCurvature); toc(tm_curvature); if (WRITE) { cout << "--------------------Displaying the value of curvature..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evCurvature_copySmoothToPhi); //printBuffer(buf_curvature, 10, vecEvPrevPrinting); printBufferArray(buf_curvature, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray); } // Computing the maximum F value (max value of: // pow( curr_img - avgIn, 2) - pow( curr_img - avgOut, 2)) vecEvPrevF.push_back(evAvgInOut_SmoothPhi);//Wait to compute the average in and out tic(tm_F); evF = compF(vecEvPrevF); toc(tm_F); if (WRITE) { cout << "--------------------Displaying the value of F ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evF); //printBuffer(buf_F, width*height, 0, width, height, vecEvPrevPrinting); printBufferArray(buf_F, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray); } //Computing maximum value of F vecEvPrevMaxF.push_back(evF); tic(tm_maxF); evMaxF = compReduce(buf_F, buf_max_F, true, vecEvPrevMaxF); // Use abs value toc(tm_maxF); if (WRITE) { cout << "--------------------Displaying max value of F ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evF); printBuffer(buf_max_F, 1, vecEvPrevPrinting); } vecEvPrevDphiDt.push_back(evCurvature_copySmoothToPhi);// Wait for curvature vecEvPrevDphiDt.push_back(evMaxF);// Wait for max F -> and F tic(tm_DphiDt); evDphiDt_MaxDphiDt = compDphiDt(vecEvPrevDphiDt); toc(tm_DphiDt); if (WRITE) { cout << "--------------------Displaying values of Dphi/dt ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evDphiDt_MaxDphiDt); //printBuffer(buf_dphidt, width*height, 0, width, height, vecEvPrevPrinting); printBufferArray(buf_dphidt, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray); } vecEvPrevMaxDphiDt.push_back(evDphiDt_MaxDphiDt); tic(tm_maxDphiDt); evDphiDt_MaxDphiDt = compReduce(buf_dphidt, buf_max_dphidt, false, vecEvPrevMaxDphiDt ); toc(tm_maxDphiDt); if (WRITE) { cout << "--------------------Displaying Max Dphi/dt ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evDphiDt_MaxDphiDt); printBuffer(buf_max_dphidt, 1, vecEvPrevPrinting); } vecEvPrevNewPhi.push_back(evDphiDt_MaxDphiDt); tic(tm_phi); evSDF_newPhi = compNewPhi(vecEvPrevNewPhi); //This phi without smooth term toc(tm_phi); if (WRITE) { cout << "--------------------Displaying values of new phi ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evSDF_newPhi); //printBuffer(buf_phi, width*height, width*height*7, width, height, vecEvPrevPrinting); printBufferArray(buf_phi, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray); } vecEvPrevSmPhi.push_back(evSDF_newPhi); tic(tm_smoothPhi); evAvgInOut_SmoothPhi = smoothPhi(vecEvPrevSmPhi, dt_smooth); //This phi without smooth term toc(tm_smoothPhi); if (WRITE) { cout << "--------------------Displaying values of smoothed phi ..." << endl; vecEvPrevPrinting.clear(); vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi); printBufferArray(buf_smooth_phi, width*height, width, height, vecEvPrevPrinting, slidesToPrint, sizeOfArray); } vecEvPrevCopySmoothToPhi.push_back(evAvgInOut_SmoothPhi); tic(tm_copySmoothPhi); res = queue->enqueueCopyBuffer(buf_smooth_phi, buf_phi, (size_t)0, (size_t) 0, (size_t) sizeof (float) *buf_size, &vecEvPrevCopySmoothToPhi, &evCurvature_copySmoothToPhi); toc(tm_copySmoothPhi); vecEvPrevAvgInOut.push_back(evCurvature_copySmoothToPhi); vecEvPrevAvgInOut.clear(); vecEvPrevAvg.clear(); vecEvPrevCurvature.clear(); vecEvPrevF.clear(); vecEvPrevMaxF.clear(); vecEvPrevDphiDt.clear(); vecEvPrevMaxDphiDt.clear(); vecEvPrevNewPhi.clear(); vecEvPrevSmPhi.clear(); vecEvPrevSDF.clear(); vecEvPrevPrinting.clear(); vecEvPrevTextToBuffer.clear(); }//Main loop queue->finish(); //Be sure we finish everything dout << "Done ..................." << endl; if (WRITE) { cout << "--------------------Writing new PHI as images in images/temp_results/newPhi/" << endl; vecEvPrevPrinting.push_back(evAvgInOut_SmoothPhi); //Reads from buf_phi (GPU) and writes to arr_img_out (Host) res = queue->enqueueReadBuffer(buf_smooth_phi, CL_TRUE, 0, sizeof (float) *buf_size, (void*) arr_img_out, &vecEvPrevPrinting, 0); // Prints image into png file ImageManager::write3DImage((char*) "images/temp_results/newPhi/", arr_img_out, width, height, depth, 0); } dout << " Copying back everything to OpenGL ... " << endl; vecEvPrevCopyPhiBackToGL.push_back(evAvgInOut_SmoothPhi); tic(tm_bufToGL); queue->enqueueCopyBufferToImage(buf_smooth_phi, img_phi_gl, (size_t)0, origin, region, &vecEvPrevCopyPhiBackToGL, &evAcOGL); toc(tm_bufToGL); queue->finish(); //Be sure we finish everything err = queue->enqueueReleaseGLObjects(&cl_textures, NULL, 0); } catch (cl::Error ex) { cout << "EXCEPTION" << endl; clMan.printError(ex); return; } }
int main(int argc, char** argv) { if(argc != 2) { printf("You should use the following format for running this program: %s <Number of Iterations>\n", argv[0]); exit(1); } int N = atoi(argv[1]); int rng = 42; srand(rng); array_number_t vec1 = vector_fill(DIM, 0.0); array_number_t vec2 = vector_fill(DIM, 0.0); array_number_t vec3 = vector_fill(DIM, 0.0); for(int i=0; i<DIM; i++) { vec1->arr[i] = dist(rng); vec2->arr[i] = dist(rng); vec3->arr[i] = dist(rng); } #ifdef HOIST storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM)); #endif timer_t t = tic(); double total = 0; for (int count = 0; count < N; ++count) { vec1->arr[0] += 1.0 / (2.0 + vec1->arr[0]); vec2->arr[10] += 1.0 / (2.0 + vec2->arr[10]); #ifdef DPS #ifndef HOIST storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM)); #endif #endif #ifdef ADD3 #ifdef DPS total += vectorSum(TOP_LEVEL_linalg_vectorAdd3_dps(s, vec1, vec2, vec3, DIM, DIM, DIM)); #else total += vectorSum(TOP_LEVEL_linalg_vectorAdd3(vec1, vec2, vec3)); #endif #elif DOT #ifdef DPS total += TOP_LEVEL_linalg_dot_prod_dps(s, vec1, vec2, DIM, DIM); #else total += TOP_LEVEL_linalg_dot_prod(vec1, vec2); #endif #elif CROSS #ifdef DPS total += vectorSum(TOP_LEVEL_linalg_cross_dps(s, vec1, vec2, DIM, DIM)); #else total += vectorSum(TOP_LEVEL_linalg_cross(vec1, vec2)); #endif #endif #ifdef DPS #ifndef HOIST storage_free(s, VECTOR_ALL_BYTES(DIM)); #endif #endif } float elapsed = toc2(t); printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N)); return 0; }
float* mirageaudio_decode(MirageAudio *ma, const gchar *file, int *frames, int* size, int* ret) { GstBus *bus; tic(); ma->fftwsamples = 0; ma->curhop = 0; ma->cursample = 0; ma->quit = FALSE; g_mutex_lock(ma->decoding_mutex); ma->invalidate = FALSE; g_mutex_unlock(ma->decoding_mutex); // Gstreamer setup mirageaudio_initgstreamer(ma, file); if (ma->filerate < 0) { *size = 0; *frames = 0; *ret = -1; // Gstreamer cleanup gst_element_set_state(ma->pipeline, GST_STATE_NULL); gst_object_unref(GST_OBJECT(ma->pipeline)); return NULL; } // libsamplerate initialization ma->src_data.src_ratio = (double)ma->rate/(double)ma->filerate; ma->src_data.input_frames = 0; ma->src_data.end_of_input = 0; src_reset(ma->src_state); g_print("libmirageaudio: rate=%d, resampling=%f\n", ma->filerate, ma->src_data.src_ratio); // decode... gst_element_set_state(ma->pipeline, GST_STATE_PLAYING); g_print("libmirageaudio: decoding %s\n", file); bus = gst_pipeline_get_bus(GST_PIPELINE(ma->pipeline)); gboolean decoding = TRUE; *ret = 0; while (decoding) { GstMessage* message = gst_bus_timed_pop_filtered(bus, GST_MSECOND*100, GST_MESSAGE_ERROR | GST_MESSAGE_EOS); if (message == NULL) continue; switch (GST_MESSAGE_TYPE(message)) { case GST_MESSAGE_ERROR: { GError *err; gchar *debug; gst_message_parse_error(message, &err, &debug); g_print("libmirageaudio: error: %s\n", err->message); g_error_free(err); g_free(debug); ma->curhop = 0; decoding = FALSE; *ret = -1; break; } case GST_MESSAGE_EOS: { g_print("libmirageaudio: EOS Message received\n"); decoding = FALSE; break; } default: break; } gst_message_unref(message); } gst_object_unref(bus); g_mutex_lock(ma->decoding_mutex); // Gstreamer cleanup gst_element_set_state(ma->pipeline, GST_STATE_NULL); gst_object_unref(GST_OBJECT(ma->pipeline)); toc(); if (ma->invalidate) { *size = 0; *frames = 0; *ret = -2; } else { *size = ma->winsize/2 + 1; *frames = ma->curhop; } g_mutex_unlock(ma->decoding_mutex); g_print("libmirageaudio: frames=%d (maxhops=%d), size=%d\n", *frames, ma->hops, *size); return ma->out; }
/* * Sets up all data structures needed. * Replace by codegen */ pwork* ECOS_setup(idxint n, idxint m, idxint p, idxint l, idxint ncones, idxint* q, pfloat* Gpr, idxint* Gjc, idxint* Gir, pfloat* Apr, idxint* Ajc, idxint* Air, pfloat* c, pfloat* h, pfloat* b) { idxint i, j, k, cidx, conesize, lnz, amd_result, nK, *Ljc, *Lir, *P, *Pinv, *Sign; pwork* mywork; double Control [AMD_CONTROL], Info [AMD_INFO]; pfloat rx, ry, rz, *Lpr; spmat *At, *Gt, *KU; #if PROFILING > 0 timer tsetup; #endif #if PROFILING > 1 timer tcreatekkt; timer tmattranspose; timer tordering; #endif #if PROFILING > 0 tic(&tsetup); #endif #if PRINTLEVEL > 2 PRINTTEXT("\n"); PRINTTEXT(" *******************************************************************************\n"); PRINTTEXT(" * ECOS: Embedded Conic Solver - Sparse Interior Point method for SOCPs *\n"); PRINTTEXT(" * *\n"); PRINTTEXT(" * NOTE: The solver is based on L. Vandenberghe's 'The CVXOPT linear and quad- *\n"); PRINTTEXT(" * ratic cone program solvers', March 20, 2010. Available online: *\n"); PRINTTEXT(" * [http://abel.ee.ucla.edu/cvxopt/documentation/coneprog.pdf] *\n"); PRINTTEXT(" * *\n"); PRINTTEXT(" * This code uses T.A. Davis' sparse LDL package and AMD code. *\n"); PRINTTEXT(" * [http://www.cise.ufl.edu/research/sparse] *\n"); PRINTTEXT(" * *\n"); PRINTTEXT(" * Written during a summer visit at Stanford University with S. Boyd. *\n"); PRINTTEXT(" * *\n"); PRINTTEXT(" * (C) Alexander Domahidi, Automatic Control Laboratory, ETH Zurich, 2012-13. *\n"); PRINTTEXT(" * Email: [email protected] *\n"); PRINTTEXT(" *******************************************************************************\n"); PRINTTEXT("\n\n"); PRINTTEXT("PROBLEM SUMMARY:\n"); PRINTTEXT(" Primal variables (n): %d\n", (int)n); PRINTTEXT("Equality constraints (p): %d\n", (int)p); PRINTTEXT(" Conic variables (m): %d\n", (int)m); PRINTTEXT("- - - - - - - - - - - - - - -\n"); PRINTTEXT(" Size of LP cone: %d\n", (int)l); PRINTTEXT(" Number of SOCs: %d\n", (int)ncones); for( i=0; i<ncones; i++ ){ PRINTTEXT(" Size of SOC #%02d: %d\n", (int)(i+1), (int)q[i]); } #endif /* get work data structure */ mywork = (pwork *)MALLOC(sizeof(pwork)); #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for WORK struct\n"); #endif /* dimensions */ mywork->n = n; mywork->m = m; mywork->p = p; mywork->D = l + ncones; #if PRINTLEVEL > 2 PRINTTEXT("Set dimensions\n"); #endif /* variables */ mywork->x = (pfloat *)MALLOC(n*sizeof(pfloat)); mywork->y = (pfloat *)MALLOC(p*sizeof(pfloat)); mywork->z = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->s = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->lambda = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->dsaff_by_W = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->dsaff = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->dzaff = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->saff = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->zaff = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->W_times_dzaff = (pfloat *)MALLOC(m*sizeof(pfloat)); #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for variables\n"); #endif /* best iterates so far */ mywork->best_x = (pfloat *)MALLOC(n*sizeof(pfloat)); mywork->best_y = (pfloat *)MALLOC(p*sizeof(pfloat)); mywork->best_z = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->best_s = (pfloat *)MALLOC(m*sizeof(pfloat)); mywork->best_info = (stats *)MALLOC(sizeof(stats)); /* cones */ mywork->C = (cone *)MALLOC(sizeof(cone)); #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for cone struct\n"); #endif /* LP cone */ mywork->C->lpc = (lpcone *)MALLOC(sizeof(lpcone)); mywork->C->lpc->p = l; if( l > 0 ){ mywork->C->lpc->w = (pfloat *)MALLOC(l*sizeof(pfloat)); mywork->C->lpc->v = (pfloat *)MALLOC(l*sizeof(pfloat)); mywork->C->lpc->kkt_idx = (idxint *)MALLOC(l*sizeof(idxint)); #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for LP cone\n"); #endif } else { mywork->C->lpc->w = NULL; mywork->C->lpc->v = NULL; mywork->C->lpc->kkt_idx = NULL; #if PRINTLEVEL > 2 PRINTTEXT("No LP cone present, pointers filled with NULL\n"); #endif } /* Second-order cones */ mywork->C->soc = (socone *)MALLOC(ncones*sizeof(socone)); mywork->C->nsoc = ncones; cidx = 0; for( i=0; i<ncones; i++ ){ conesize = (idxint)q[i]; mywork->C->soc[i].p = conesize; mywork->C->soc[i].a = 0; mywork->C->soc[i].eta = 0; mywork->C->soc[i].q = (pfloat *)MALLOC((conesize-1)*sizeof(pfloat)); mywork->C->soc[i].skbar = (pfloat *)MALLOC((conesize)*sizeof(pfloat)); mywork->C->soc[i].zkbar = (pfloat *)MALLOC((conesize)*sizeof(pfloat)); #if CONEMODE == 0 mywork->C->soc[i].Didx = (idxint *)MALLOC((conesize)*sizeof(idxint)); #endif #if CONEMODE > 0 mywork->C->soc[i].colstart = (idxint *)MALLOC((conesize)*sizeof(idxint)); #endif cidx += conesize; } #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for second-order cones\n"); #endif /* info struct */ mywork->info = (stats *)MALLOC(sizeof(stats)); #if PROFILING > 1 mywork->info->tfactor = 0; mywork->info->tkktsolve = 0; mywork->info->tfactor_t1 = 0; mywork->info->tfactor_t2 = 0; #endif #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for info struct\n"); #endif #if defined EQUILIBRATE && EQUILIBRATE > 0 /* equilibration vector */ mywork->xequil = (pfloat *)MALLOC(n*sizeof(pfloat)); mywork->Aequil = (pfloat *)MALLOC(p*sizeof(pfloat)); mywork->Gequil = (pfloat *)MALLOC(m*sizeof(pfloat)); #if PRINTLEVEL > 2 PRINTTEXT("Memory allocated for equilibration vectors\n"); #endif #endif /* settings */ mywork->stgs = (settings *)MALLOC(sizeof(settings)); mywork->stgs->maxit = MAXIT; mywork->stgs->gamma = GAMMA; mywork->stgs->delta = DELTA; mywork->stgs->eps = EPS; mywork->stgs->nitref = NITREF; mywork->stgs->abstol = ABSTOL; mywork->stgs->feastol = FEASTOL; mywork->stgs->reltol = RELTOL; mywork->stgs->abstol_inacc = ATOL_INACC; mywork->stgs->feastol_inacc = FTOL_INACC; mywork->stgs->reltol_inacc = RTOL_INACC; mywork->stgs->verbose = VERBOSE; #if PRINTLEVEL > 2 PRINTTEXT("Written settings\n"); #endif mywork->c = c; mywork->h = h; mywork->b = b; #if PRINTLEVEL > 2 PRINTTEXT("Hung pointers for c, h and b into WORK struct\n"); #endif /* Store problem data */ if(Apr && Ajc && Air) { mywork->A = createSparseMatrix(p, n, Ajc[n], Ajc, Air, Apr); } else { mywork->A = NULL; } if (Gpr && Gjc && Gir) { mywork->G = createSparseMatrix(m, n, Gjc[n], Gjc, Gir, Gpr); } else { /* create an empty sparse matrix */ mywork->G = createSparseMatrix(m, n, 0, Gjc, Gir, Gpr); } #if defined EQUILIBRATE && EQUILIBRATE > 0 set_equilibration(mywork); #if PRINTLEVEL > 2 PRINTTEXT("Done equilibrating\n"); #endif #endif #if PROFILING > 1 mywork->info->ttranspose = 0; tic(&tmattranspose); #endif if(mywork->A) At = transposeSparseMatrix(mywork->A); else At = NULL; #if PROFILING > 1 mywork->info->ttranspose += toc(&tmattranspose); #endif #if PRINTLEVEL > 2 PRINTTEXT("Transposed A\n"); #endif #if PROFILING > 1 tic(&tmattranspose); #endif Gt = transposeSparseMatrix(mywork->G); #if PROFILING > 1 mywork->info->ttranspose += toc(&tmattranspose); #endif #if PRINTLEVEL > 2 PRINTTEXT("Transposed G\n"); #endif /* set up KKT system */ #if PROFILING > 1 tic(&tcreatekkt); #endif createKKT_U(Gt, At, mywork->C, &Sign, &KU); #if PROFILING > 1 mywork->info->tkktcreate = toc(&tcreatekkt); #endif #if PRINTLEVEL > 2 PRINTTEXT("Created upper part of KKT matrix K\n"); #endif /* * Set up KKT system related data * (L comes later after symbolic factorization) */ nK = KU->n; #if DEBUG > 0 dumpSparseMatrix(KU, "KU0.txt"); #endif #if PRINTLEVEL > 2 PRINTTEXT("Dimension of KKT matrix: %d\n", (int)nK); PRINTTEXT("Non-zeros in KKT matrix: %d\n", (int)KU->nnz); #endif /* allocate memory in KKT system */ mywork->KKT = (kkt *)MALLOC(sizeof(kkt)); mywork->KKT->D = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->Parent = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->Pinv = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->work1 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->work2 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->work3 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->work4 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->work5 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->work6 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->Flag = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->Pattern = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->Lnz = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->RHS1 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->RHS2 = (pfloat *)MALLOC(nK*sizeof(pfloat)); mywork->KKT->dx1 = (pfloat *)MALLOC(mywork->n*sizeof(pfloat)); mywork->KKT->dx2 = (pfloat *)MALLOC(mywork->n*sizeof(pfloat)); mywork->KKT->dy1 = (pfloat *)MALLOC(mywork->p*sizeof(pfloat)); mywork->KKT->dy2 = (pfloat *)MALLOC(mywork->p*sizeof(pfloat)); mywork->KKT->dz1 = (pfloat *)MALLOC(mywork->m*sizeof(pfloat)); mywork->KKT->dz2 = (pfloat *)MALLOC(mywork->m*sizeof(pfloat)); mywork->KKT->Sign = (idxint *)MALLOC(nK*sizeof(idxint)); mywork->KKT->PKPt = newSparseMatrix(nK, nK, KU->nnz); mywork->KKT->PK = (idxint *)MALLOC(KU->nnz*sizeof(idxint)); #if PRINTLEVEL > 2 PRINTTEXT("Created memory for KKT-related data\n"); #endif /* calculate ordering of KKT matrix using AMD */ P = (idxint *)MALLOC(nK*sizeof(idxint)); #if PROFILING > 1 tic(&tordering); #endif AMD_defaults(Control); amd_result = AMD_order(nK, KU->jc, KU->ir, P, Control, Info); #if PROFILING > 1 mywork->info->torder = toc(&tordering); #endif if( amd_result == AMD_OK ){ #if PRINTLEVEL > 2 PRINTTEXT("AMD ordering successfully computed.\n"); AMD_info(Info); #endif } else { #if PRINTLEVEL > 2 PRINTTEXT("Problem in AMD ordering, exiting.\n"); AMD_info(Info); #endif return NULL; } /* calculate inverse permutation and permutation mapping of KKT matrix */ pinv(nK, P, mywork->KKT->Pinv); Pinv = mywork->KKT->Pinv; #if DEBUG > 0 dumpDenseMatrix_i(P, nK, 1, "P.txt"); dumpDenseMatrix_i(mywork->KKT->Pinv, nK, 1, "PINV.txt"); #endif permuteSparseSymmetricMatrix(KU, mywork->KKT->Pinv, mywork->KKT->PKPt, mywork->KKT->PK); /* permute sign vector */ for( i=0; i<nK; i++ ){ mywork->KKT->Sign[Pinv[i]] = Sign[i]; } #if PRINTLEVEL > 3 PRINTTEXT("P = ["); for( i=0; i<nK; i++ ){ PRINTTEXT("%d ", (int)P[i]); } PRINTTEXT("];\n"); PRINTTEXT("Pinv = ["); for( i=0; i<nK; i++ ){ PRINTTEXT("%d ", (int)Pinv[i]); } PRINTTEXT("];\n"); PRINTTEXT("Sign = ["); for( i=0; i<nK; i++ ){ PRINTTEXT("%+d ", (int)Sign[i]); } PRINTTEXT("];\n"); PRINTTEXT("SignP = ["); for( i=0; i<nK; i++ ){ PRINTTEXT("%+d ", (int)mywork->KKT->Sign[i]); } PRINTTEXT("];\n"); #endif /* symbolic factorization */ Ljc = (idxint *)MALLOC((nK+1)*sizeof(idxint)); #if PRINTLEVEL > 2 PRINTTEXT("Allocated memory for cholesky factor L\n"); #endif LDL_symbolic2( mywork->KKT->PKPt->n, /* A and L are n-by-n, where n >= 0 */ mywork->KKT->PKPt->jc, /* input of size n+1, not modified */ mywork->KKT->PKPt->ir, /* input of size nz=Ap[n], not modified */ Ljc, /* output of size n+1, not defined on input */ mywork->KKT->Parent, /* output of size n, not defined on input */ mywork->KKT->Lnz, /* output of size n, not defined on input */ mywork->KKT->Flag /* workspace of size n, not defn. on input or output */ ); /* assign memory for L */ lnz = Ljc[nK]; #if PRINTLEVEL > 2 PRINTTEXT("Nonzeros in L, excluding diagonal: %d\n", (int)lnz) ; #endif Lir = (idxint *)MALLOC(lnz*sizeof(idxint)); Lpr = (pfloat *)MALLOC(lnz*sizeof(pfloat)); mywork->KKT->L = createSparseMatrix(nK, nK, lnz, Ljc, Lir, Lpr); #if PRINTLEVEL > 2 PRINTTEXT("Created Cholesky factor of K in KKT struct\n"); #endif /* permute KKT matrix - we work on this one from now on */ permuteSparseSymmetricMatrix(KU, mywork->KKT->Pinv, mywork->KKT->PKPt, NULL); #if DEBUG > 0 dumpSparseMatrix(mywork->KKT->PKPt, "PKPt.txt"); #endif #if CONEMODE > 0 /* zero any off-diagonal elements in (permuted) scalings in KKT matrix */ for (i=0; i<mywork->C->nsoc; i++) { for (j=1; j<mywork->C->soc[i].p; j++) { for (k=0; k<j; k++) { mywork->KKT->PKPt->pr[mywork->KKT->PK[mywork->C->soc[i].colstart[j]+k]] = 0; } } } #endif #if DEBUG > 0 dumpSparseMatrix(mywork->KKT->PKPt, "PKPt0.txt"); #endif /* set up RHSp for initialization */ k = 0; j = 0; for( i=0; i<n; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = 0; } for( i=0; i<p; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = b[i]; } for( i=0; i<l; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = h[i]; j++; } for( l=0; l<ncones; l++ ){ for( i=0; i < mywork->C->soc[l].p; i++ ){ mywork->KKT->RHS1[Pinv[k++]] = h[j++]; } #if CONEMODE == 0 mywork->KKT->RHS1[Pinv[k++]] = 0; mywork->KKT->RHS1[Pinv[k++]] = 0; #endif } #if PRINTLEVEL > 2 PRINTTEXT("Written %d entries of RHS1\n", (int)k); #endif /* set up RHSd for initialization */ for( i=0; i<n; i++ ){ mywork->KKT->RHS2[Pinv[i]] = -c[i]; } for( i=n; i<nK; i++ ){ mywork->KKT->RHS2[Pinv[i]] = 0; } /* get scalings of problem data */ rx = norm2(c, n); mywork->resx0 = MAX(1, rx); ry = norm2(b, p); mywork->resy0 = MAX(1, ry); rz = norm2(h, m); mywork->resz0 = MAX(1, rz); /* get memory for residuals */ mywork->rx = (pfloat *)MALLOC(n*sizeof(pfloat)); mywork->ry = (pfloat *)MALLOC(p*sizeof(pfloat)); mywork->rz = (pfloat *)MALLOC(m*sizeof(pfloat)); /* clean up */ mywork->KKT->P = P; FREE(Sign); if(At) freeSparseMatrix(At); freeSparseMatrix(Gt); freeSparseMatrix(KU); #if PROFILING > 0 mywork->info->tsetup = toc(&tsetup); #endif return mywork; }
void lingot_signal_test() { int N = 16; int i = 0; int n = 5; FLT* spd = malloc(N * sizeof(FLT)); FLT* noise = malloc(N * sizeof(FLT)); for (i = 0; i < N; i++) { spd[i] = i + 1; noise[i] = -1.0; } lingot_signal_compute_noise_level(spd, N, n, noise); printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); printf("N = ["); for (i = 0; i < N; i++) { printf(" %f ", noise[i]); } printf("] \n"); puts("done."); printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); // assert(lingot_signal_quick_select(spd, 1) == 1.0); // assert(lingot_signal_quick_select(spd, 2) == 1.0); // assert(lingot_signal_quick_select(spd, 3) == 2.0); // assert(lingot_signal_quick_select(spd, 4) == 2.0); // assert(lingot_signal_quick_select(spd, 5) == 3.0); // assert(lingot_signal_quick_select(spd, 6) == 3.0); printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); free(spd); free(noise); N = 512; i = 0; n = 30; spd = malloc(N * sizeof(FLT)); noise = malloc(N * sizeof(FLT)); for (i = 0; i < N; i++) { spd[i] = N - i; noise[i] = -1.0; } printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); double m; tic(); // m = lingot_signal_quick_select(spd, 512); toc(); printf("m = %f\n", m); printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); tic(); // m = lingot_signal_quick_select(spd, 512); toc(); printf("m = %f\n", m); printf("S = ["); for (i = 0; i < N; i++) { printf(" %f ", spd[i]); } printf("] \n"); // ----------------- for (i = 0; i < N; i++) { spd[i] = N - i; noise[i] = -1.0; } tic(); lingot_signal_compute_noise_level(spd, N, n, noise); toc(); printf("N = ["); for (i = 0; i < N; i++) { printf(" %f ", noise[i]); } printf("] \n"); tic(); for (i = 0; i < 10000; i++) { lingot_signal_compute_noise_level(spd, N, n, noise); } toc(); free(spd); free(noise); }
Timer::Timer() { tic(); }