THREADABLE_FUNCTION_END //evolve the configuration with the momenta THREADABLE_FUNCTION_3ARG(evolve_lx_conf_with_momenta, quad_su3*,lx_conf, quad_su3*,H, double,dt) { GET_THREAD_ID(); verbosity_lv2_master_printf("Evolving conf with momenta, dt=%lg\n",dt); START_TIMING(conf_evolve_time,nconf_evolve); //evolve NISSA_PARALLEL_LOOP(ivol,0,loc_vol) for(int mu=0; mu<NDIM; mu++) { su3 t1,t2; su3_prod_double(t1,H[ivol][mu],dt); safe_hermitian_exact_i_exponentiate(t2,t1); safe_su3_prod_su3(lx_conf[ivol][mu],t2,lx_conf[ivol][mu]); } set_borders_invalid(lx_conf); STOP_TIMING(conf_evolve_time); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PLASMA_zplrnt_Tile(descA, 3456); { PLASMA_Complex64_t *Amat; int m, i, ldam; for(m=0; m<MT; m++) { ldam = BLKLDD( *descA, m ); Amat = (PLASMA_Complex64_t*)plasma_getaddr(*descA, m, m); for(i=0; i<ldam; i++) { Amat[i*ldam+i] += max(M,N); } } } /* Save AT in lapack layout for check */ PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, N ); START_TIMING(); PLASMA_zgetrf_nopiv_Tile( descA ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, N, NRHS ); PLASMA_zplrnt_Tile( descB, 7732 ); PASTE_TILE_TO_LAPACK( descB, b, check, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_ztrsm_Tile( PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaUnit, 1.0, descA, descB ); PLASMA_ztrsm_Tile( PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, 1.0, descA, descB ); PASTE_TILE_TO_LAPACK( descB, x, check, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, b, x, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(A); free(b); free(x); } PASTE_CODE_FREE_MATRIX( descA ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *descT; PASTE_CODE_IPARAM_LOCALS( iparam ); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PLASMA_zplrnt_Tile( descA, 5373 ); /* Save A for check */ PASTE_TILE_TO_LAPACK( descA, A, ( check && M == N ), PLASMA_Complex64_t, LDA, N ); /* Allocate B for check */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, (check && M == N), PLASMA_Complex64_t, PlasmaComplexDouble, LDB, M, NRHS ); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT); /* Do the computations */ START_TIMING(); PLASMA_zgeqrf_Tile( descA, descT ); STOP_TIMING(); /* Check the solution */ if ( check && M == N ) { /* Initialize and save B */ PLASMA_zplrnt_Tile( descB, 2264 ); PASTE_TILE_TO_LAPACK( descB, B, 1, PLASMA_Complex64_t, LDB, NRHS ); /* Compute the solution */ PLASMA_zgeqrs_Tile( descA, descT, descB ); /* Copy solution to X */ PASTE_TILE_TO_LAPACK( descB, X, 1, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); /* Free checking structures */ PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } /* Free data */ PLASMA_Dealloc_Handle_Tile(&descT); PASTE_CODE_FREE_MATRIX( descA ); return 0; }
void cWorld3D::Update(float afTimeStep) { START_TIMING(Physics); if(mpPhysicsWorld) mpPhysicsWorld->Update(afTimeStep); STOP_TIMING(Physics); START_TIMING(Entities); UpdateEntities(afTimeStep); STOP_TIMING(Entities); START_TIMING(Bodies); UpdateBodies(afTimeStep); STOP_TIMING(Bodies); START_TIMING(Particles); UpdateParticles(afTimeStep); STOP_TIMING(Particles); START_TIMING(Lights); UpdateLights(afTimeStep); STOP_TIMING(Lights); START_TIMING(SoundEntities); UpdateSoundEntities(afTimeStep); STOP_TIMING(SoundEntities); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *T; PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, PLASMA_Complex64_t, LDA, N ); /* Initialize Data */ PLASMA_zplrnt(M, N, A, LDA, 3456); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels(M, N, &T); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, PLASMA_Complex64_t, A, LDA, N ); START_TIMING(); PLASMA_zgeqrf( M, N, A, LDA, T ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX( X, 1, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_zplrnt( N, NRHS, X, LDB, 5673 ); PASTE_CODE_ALLOCATE_COPY( B, 1, PLASMA_Complex64_t, X, LDB, NRHS ); PLASMA_zgeqrs(M, N, NRHS, A, LDA, T, X, LDB); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free( Acpy ); free( B ); free( X ); } /* Free Workspace */ PLASMA_Dealloc_Handle_Tile( &T ); free( A ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t alpha, beta; PASTE_CODE_IPARAM_LOCALS( iparam ); LDB = max(K, iparam[IPARAM_LDB]); LDC = max(M, iparam[IPARAM_LDC]); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, K ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, K, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDC, M, N ); /* Initialiaze Data */ PLASMA_zplrnt_Tile( descA, 5373 ); PLASMA_zplrnt_Tile( descB, 7672 ); PLASMA_zplrnt_Tile( descC, 6387 ); LAPACKE_zlarnv_work(1, ISEED, 1, &alpha); LAPACKE_zlarnv_work(1, ISEED, 1, &beta); /* Save C for check */ PASTE_TILE_TO_LAPACK( descC, C2, check, PLASMA_Complex64_t, LDC, N ); START_TIMING(); PLASMA_zgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC ); STOP_TIMING(); /* Check the solution */ if (check) { PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, K ); PASTE_TILE_TO_LAPACK( descB, B, check, PLASMA_Complex64_t, LDB, N ); PASTE_TILE_TO_LAPACK( descC, C, check, PLASMA_Complex64_t, LDC, N ); dparam[IPARAM_RES] = z_check_gemm( PlasmaNoTrans, PlasmaNoTrans, M, N, K, alpha, A, LDA, B, LDB, beta, C, C2, LDC, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(A); free(B); free(C); free(C2); } PASTE_CODE_FREE_MATRIX( descA ); PASTE_CODE_FREE_MATRIX( descB ); PASTE_CODE_FREE_MATRIX( descC ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *L; int *piv; PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N ) { fprintf(stderr, "This timing works only with M == N\n"); return -1; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, PLASMA_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, PLASMA_Complex64_t, LDB, NRHS ); /* Initialiaze Data */ PLASMA_zplrnt( N, N, A, LDA, 51 ); PLASMA_zplrnt( N, NRHS, X, LDB, 5673 ); PLASMA_Alloc_Workspace_zgesv_incpiv(N, &L, &piv); /* Save A and b */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, PLASMA_Complex64_t, A, LDA, N ); PASTE_CODE_ALLOCATE_COPY( B, check, PLASMA_Complex64_t, X, LDB, NRHS ); START_TIMING(); PLASMA_zgesv_incpiv( N, NRHS, A, LDA, L, piv, X, LDB ); STOP_TIMING(); /* Check the solution */ if (check) { dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(Acpy); free(B); } PLASMA_Dealloc_Handle_Tile( &L ); free( piv ); free( X ); free( A ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX( piv, 1, int, min(M, N), 1 ); PLASMA_zplrnt_Tile(descA, 3456); /* Save AT in lapack layout for check */ PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, N ); START_TIMING(); PLASMA_zgetrf_tntpiv_Tile( descA, piv ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, N, NRHS ); PLASMA_zplrnt_Tile( descB, 7732 ); PASTE_TILE_TO_LAPACK( descB, b, check, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_zgetrs_Tile( PlasmaNoTrans, descA, piv, descB ); PASTE_TILE_TO_LAPACK( descB, x, check, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, b, x, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); PASTE_CODE_FREE_MATRIX( descB ); free(A); free(b); free(x); } PASTE_CODE_FREE_MATRIX( descA ); free( piv ); return 0; }
static double RunTest(real_Double_t *t_, struct user_parameters* params) { double t; PLASMA_desc *descT; int64_t N = params->matrix_size; int64_t IB = params->iblocksize; int64_t NB = params->blocksize; int check = params->check; double check_res = 0; /* Allocate Data */ PLASMA_desc *descA = NULL; double *ptr = (double*)malloc(N * N * sizeof(double)); PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N); #pragma omp parallel { #pragma omp single { plasma_pdpltmg_quark(*descA, 5373 ); } } /* Save A for check */ double *A = NULL; if ( check ) { A = (double*)malloc(N * N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descA, (void*)A, N); } /* Allocate Workspace */ plasma_alloc_ibnb_tile(N, N, PlasmaRealDouble, &descT, IB, NB); /* Do the computations */ START_TIMING(); #pragma omp parallel { #pragma omp single { plasma_pdgeqrf_quark( *descA, *descT , IB); } } STOP_TIMING(); /* Check the solution */ if ( check ) { /* Allocate B for check */ PLASMA_desc *descB = NULL; double* ptr = (double*)malloc(N * sizeof(double)); PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1); /* Initialize and save B */ plasma_pdpltmg_seq(*descB, 2264 ); double *B = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)B, N); /* Compute the solution */ PLASMA_dgeqrs_Tile( descA, descT, descB , IB); /* Copy solution to X */ double *X = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)X, N); check_res = d_check_solution(N, N, 1, A, N, B, X, N); /* Free checking structures */ PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } /* Free data */ PLASMA_Dealloc_Handle_Tile(&descT); PASTE_CODE_FREE_MATRIX( descA ); return check_res; }
static double RunTest(real_Double_t *t_, struct user_parameters* params) { double t; int64_t N = params->matrix_size; int64_t NB = params->blocksize; int check = params->check; int uplo = PlasmaUpper; double check_res = 0; /* Allocate Data */ PLASMA_desc *descA = NULL; double* ptr = malloc(N * N * sizeof(double)); PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N); #pragma omp parallel { #pragma omp single { plasma_pdplgsy_quark( (double)N, *descA, 51 ); } } /* Save A for check */ double *A = NULL; if(check) { A = (double*)malloc(N * N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descA, (void*)A, N); } /* PLASMA DPOSV */ START_TIMING(); #pragma omp parallel { #pragma omp single { plasma_pdpotrf_quark(uplo, *descA); } } STOP_TIMING(); /* Check the solution */ if ( check ) { PLASMA_desc *descB = NULL; double* ptr = (double*)malloc(N * sizeof(double)); PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1); plasma_pdpltmg_seq(* descB, 7672 ); double* B = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)B, N); PLASMA_dpotrs_Tile( uplo, descA, descB ); double* X = (double*)malloc(N * sizeof(double)); plasma_pdtile_to_lapack_quark(*descB, (void*)X, N); check_res = d_check_solution(N, N, 1, A, N, B, X, N); PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } PASTE_CODE_FREE_MATRIX( descA ); return check_res; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); PLASMA_desc *descT; int jobu = PlasmaNoVec; int jobvt = PlasmaNoVec; int INFO; /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX( VT, (jobvt == PlasmaVec), PLASMA_Complex64_t, N, N ); PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == PlasmaVec), PLASMA_Complex64_t, M, M ); PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 ); /* Initialiaze Data */ PLASMA_zplrnt_Tile(descA, 51 ); /* Save AT and bT in lapack layout for check */ if ( check ) { } /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgesvd(N, N, &descT); if ( jobu == PlasmaVec ) { LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M); } if ( jobvt == PlasmaVec ) { LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N); } START_TIMING(); INFO = PLASMA_zgesvd_Tile(jobu, jobvt, descA, S, descT, U, M, VT, N); STOP_TIMING(); if(INFO!=0){ printf(" ERROR OCCURED INFO %d\n",INFO); } /* Check the solution */ if ( check ) { } /* DeAllocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); if (jobu == PlasmaVec) { free( U ); } if (jobvt == PlasmaVec) { free( VT ); } PASTE_CODE_FREE_MATRIX( descA ); free( S ); return 0; }
//take also the TA THREADABLE_FUNCTION_3ARG(compute_gluonic_force_lx_conf, quad_su3*,F, quad_su3*,conf, theory_pars_t*,physics) { GET_THREAD_ID(); START_TIMING(gluon_force_time,ngluon_force); #ifdef DEBUG vector_reset(F); double eps=1e-5; //store initial link and compute action su3 sto; su3_copy(sto,conf[0][0]); double act_ori; gluonic_action(&act_ori,conf,physics->gauge_action_name,physics->beta); //store derivative su3 nu_plus,nu_minus; su3_put_to_zero(nu_plus); su3_put_to_zero(nu_minus); for(int igen=0;igen<NCOL*NCOL-1;igen++) { //prepare increment and change su3 ba; su3_prod_double(ba,gell_mann_matr[igen],eps/2); su3 exp_mod; safe_hermitian_exact_i_exponentiate(exp_mod,ba); //change -, compute action unsafe_su3_dag_prod_su3(conf[0][0],exp_mod,sto); double act_minus; gluonic_action(&act_minus,conf,physics->gauge_action_name,physics->beta); //change +, compute action unsafe_su3_prod_su3(conf[0][0],exp_mod,sto); double act_plus; gluonic_action(&act_plus,conf,physics->gauge_action_name,physics->beta); //set back everything su3_copy(conf[0][0],sto); //printf("plus: %+016.016le, ori: %+016.016le, minus: %+016.016le, eps: %lg\n",act_plus,act_ori,act_minus,eps); double gr_plus=-(act_plus-act_ori)/eps; double gr_minus=-(act_ori-act_minus)/eps; su3_summ_the_prod_idouble(nu_plus,gell_mann_matr[igen],gr_plus); su3_summ_the_prod_idouble(nu_minus,gell_mann_matr[igen],gr_minus); } //take the average su3 nu; su3_summ(nu,nu_plus,nu_minus); su3_prodassign_double(nu,0.5); vector_reset(F); #endif compute_gluonic_force_lx_conf_do_not_finish(F,conf,physics); //finish the calculation gluonic_force_finish_computation(F,conf); #ifdef DEBUG master_printf("checking pure gauge force\n"); master_printf("an\n"); su3_print(F[0][0]); master_printf("nu\n"); su3_print(nu); master_printf("nu_plus\n"); su3_print(nu_plus); master_printf("nu_minus\n"); su3_print(nu_minus); //crash("anna"); #endif //print the intensity of the force if(VERBOSITY_LV2) { double norm=0; norm+=double_vector_glb_norm2(F,loc_vol); master_printf(" Gluonic force average norm: %lg\n",sqrt(norm/glb_vol)); } STOP_TIMING(gluon_force_time); }
///////////////////////////////////////////////////////// // snapMess // ///////////////////////////////////////////////////////// void pix_snap :: snapMess(void) { if(getState()==INIT) { verbose(0, "not initialized yet with a valid context"); return; } if(!GLEW_VERSION_1_1 && !GLEW_EXT_texture_object) { return; } if (m_cache&&m_cache->m_magic!=GEMCACHE_MAGIC) { m_cache=NULL; } if (m_width <= 0 || m_height <= 0) { error("Illegal size"); return; } // do we need to remake the data? bool makeNew = false; bool makePbo = false; // release previous data if (m_originalImage) { if (m_originalImage->xsize != m_width || m_originalImage->ysize != m_height) { m_originalImage->clear(); delete m_originalImage; m_originalImage = NULL; makeNew = true; } } else { makeNew = true; } if (makeNew) { m_originalImage = new imageStruct; m_originalImage->xsize = m_width; m_originalImage->ysize = m_height; m_originalImage->setCsizeByFormat(GL_RGBA_GEM); // FIXXXME: upsidedown should default be 'true' m_originalImage->upsidedown = false; m_originalImage->allocate(m_originalImage->xsize * m_originalImage->ysize * m_originalImage->csize); makePbo=true; } if(m_numPbo>0 && !m_pbo) { makePbo=true; } else if(m_numPbo<=0) { makePbo=false; } /* FIXXME */ if(makePbo) { if(m_pbo) { delete[]m_pbo; m_pbo=NULL; } if(GLEW_ARB_pixel_buffer_object) { m_pbo=new GLuint[m_numPbo]; glGenBuffersARB(m_numPbo, m_pbo); int i=0; for(i=0; i<m_numPbo; i++) { glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, m_pbo[i]); glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, m_originalImage->xsize*m_originalImage->ysize*m_originalImage->csize, 0, GL_STREAM_READ_ARB); } glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0); } else { verbose(1, "PBOs not supported! disabling"); m_numPbo=0; } } if(m_pbo) { START_TIMING(); m_curPbo=(m_curPbo+1)%m_numPbo; int index=m_curPbo; int nextIndex=(m_curPbo+1)%m_numPbo; glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, m_pbo[index]); glReadPixels(m_x, m_y, m_width, m_height, m_originalImage->format, m_originalImage->type, 0); glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, m_pbo[nextIndex]); GLubyte* src = (GLubyte*)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); if(src) { m_originalImage->fromRGBA(src); glUnmapBufferARB( GL_PIXEL_PACK_BUFFER_ARB); // release pointer to the mapped buffer } glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0); STOP_TIMING(m_numPbo); } else { START_TIMING(); glFinish(); glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ROW_LENGTH, 0); glPixelStorei(GL_PACK_SKIP_ROWS, 0); glPixelStorei(GL_PACK_SKIP_PIXELS, 0); glReadPixels(m_x, m_y, m_width, m_height, m_originalImage->format, m_originalImage->type, m_originalImage->data); STOP_TIMING(-1); } if (m_cache) { m_cache->resendImage = 1; } }
int MeanShift(const IplImage* img, int **labels) { DECLARE_TIMING(timer); START_TIMING(timer); int level = 1; double color_radius2 = color_radius*color_radius; int minRegion = 50; // use Lab rather than L*u*v! // since Luv may produce noise points IplImage *result = cvCreateImage(cvGetSize(img), img->depth, img->nChannels); cvCvtColor(img, result, CV_RGB2Lab); // Step One. Filtering stage of meanshift segmentation // http://rsbweb.nih.gov/ij/plugins/download/Mean_Shift.java for (int i = 0; i<img->height; i++) for (int j = 0; j<img->width; j++) { int ic = i; int jc = j; int icOld, jcOld; float LOld, UOld, VOld; float L = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 0]; float U = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 1]; float V = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 2]; // in the case of 8-bit and 16-bit images R, G and B are converted to floating-point format and scaled to fit 0 to 1 range // http://opencv.willowgarage.com/documentation/c/miscellaneous_image_transformations.html L = L * 100 / 255; U = U - 128; V = V - 128; double shift = 5; for (int iters = 0; shift > 3 && iters < 100; iters++) { icOld = ic; jcOld = jc; LOld = L; UOld = U; VOld = V; float mi = 0; float mj = 0; float mL = 0; float mU = 0; float mV = 0; int num = 0; int i2from = max(0, i - spatial_radius), i2to = min(img->height, i + spatial_radius + 1); int j2from = max(0, j - spatial_radius), j2to = min(img->width, j + spatial_radius + 1); for (int i2 = i2from; i2 < i2to; i2++) { for (int j2 = j2from; j2 < j2to; j2++) { float L2 = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 0], U2 = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 1], V2 = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 2]; L2 = L2 * 100 / 255; U2 = U2 - 128; V2 = V2 - 128; double dL = L2 - L; double dU = U2 - U; double dV = V2 - V; if (dL*dL + dU*dU + dV*dV <= color_radius2) { mi += i2; mj += j2; mL += L2; mU += U2; mV += V2; num++; } } } float num_ = 1.f / num; L = mL*num_; U = mU*num_; V = mV*num_; ic = (int)(mi*num_ + 0.5); jc = (int)(mj*num_ + 0.5); int di = ic - icOld; int dj = jc - jcOld; double dL = L - LOld; double dU = U - UOld; double dV = V - VOld; shift = di*di + dj*dj + dL*dL + dU*dU + dV*dV; } L = L * 255 / 100; U = U + 128; V = V + 128; ((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 0] = (uchar)L; ((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 1] = (uchar)U; ((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 2] = (uchar)V; } IplImage *tobeshow = cvCreateImage(cvGetSize(img), img->depth, img->nChannels); cvCvtColor(result, tobeshow, CV_Lab2RGB); cvSaveImage("filtered.png", tobeshow); cvReleaseImage(&tobeshow); // Step Two. Cluster // Connect int regionCount = 0; int *modePointCounts = new int[img->height*img->width]; memset(modePointCounts, 0, img->width*img->height*sizeof(int)); float *mode = new float[img->height*img->width * 3]; { int label = -1; for (int i = 0; i<img->height; i++) for (int j = 0; j<img->width; j++) labels[i][j] = -1; for (int i = 0; i<img->height; i++) for (int j = 0; j<img->width; j++) if (labels[i][j]<0) { labels[i][j] = ++label; float L = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 0], U = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 1], V = (float)((uchar *)(result->imageData + i*img->widthStep))[j*result->nChannels + 2]; mode[label * 3 + 0] = L * 100 / 255; mode[label * 3 + 1] = 354 * U / 255 - 134; mode[label * 3 + 2] = 256 * V / 255 - 140; // Fill std::stack<CvPoint> neighStack; neighStack.push(cvPoint(i, j)); const int dxdy[][2] = { { -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, -1 }, { 0, 1 }, { 1, -1 }, { 1, 0 }, { 1, 1 } }; while (!neighStack.empty()) { CvPoint p = neighStack.top(); neighStack.pop(); for (int k = 0; k<8; k++) { int i2 = p.x + dxdy[k][0], j2 = p.y + dxdy[k][1]; if (i2 >= 0 && j2 >= 0 && i2<img->height && j2<img->width && labels[i2][j2]<0 && color_distance(result, i, j, i2, j2)<color_radius2) { labels[i2][j2] = label; neighStack.push(cvPoint(i2, j2)); modePointCounts[label]++; L = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 0]; U = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 1]; V = (float)((uchar *)(result->imageData + i2*img->widthStep))[j2*result->nChannels + 2]; mode[label * 3 + 0] += L * 100 / 255; mode[label * 3 + 1] += 354 * U / 255 - 134; mode[label * 3 + 2] += 256 * V / 255 - 140; } } } mode[label * 3 + 0] /= modePointCounts[label]; mode[label * 3 + 1] /= modePointCounts[label]; mode[label * 3 + 2] /= modePointCounts[label]; } //current Region count regionCount = label + 1; } std::cout << "Mean Shift(Connect):" << regionCount << std::endl; int oldRegionCount = regionCount; // TransitiveClosure for (int counter = 0, deltaRegionCount = 1; counter<5 && deltaRegionCount>0; counter++) { // 1.Build RAM using classifiction structure RAList *raList = new RAList[regionCount], *raPool = new RAList[10 * regionCount]; //10 is hard coded! for (int i = 0; i < regionCount; i++) { raList[i].label = i; raList[i].next = NULL; } for (int i = 0; i < regionCount * 10 - 1; i++) { raPool[i].next = &raPool[i + 1]; } raPool[10 * regionCount - 1].next = NULL; RAList *raNode1, *raNode2, *oldRAFreeList, *freeRAList = raPool; for (int i = 0; i<img->height; i++) for (int j = 0; j<img->width; j++) { if (i>0 && labels[i][j] != labels[i - 1][j]) { // Get 2 free node raNode1 = freeRAList; raNode2 = freeRAList->next; oldRAFreeList = freeRAList; freeRAList = freeRAList->next->next; // connect the two region raNode1->label = labels[i][j]; raNode2->label = labels[i - 1][j]; if (raList[labels[i][j]].Insert(raNode2)) //already exists! freeRAList = oldRAFreeList; else raList[labels[i - 1][j]].Insert(raNode1); } if (j>0 && labels[i][j] != labels[i][j - 1]) { // Get 2 free node raNode1 = freeRAList; raNode2 = freeRAList->next; oldRAFreeList = freeRAList; freeRAList = freeRAList->next->next; // connect the two region raNode1->label = labels[i][j]; raNode2->label = labels[i][j - 1]; if (raList[labels[i][j]].Insert(raNode2)) freeRAList = oldRAFreeList; else raList[labels[i][j - 1]].Insert(raNode1); } } // 2.Treat each region Ri as a disjoint set for (int i = 0; i < regionCount; i++) { RAList *neighbor = raList[i].next; while (neighbor) { if (color_distance(&mode[3 * i], &mode[3 * neighbor->label])<color_radius2) { int iCanEl = i, neighCanEl = neighbor->label; while (raList[iCanEl].label != iCanEl) iCanEl = raList[iCanEl].label; while (raList[neighCanEl].label != neighCanEl) neighCanEl = raList[neighCanEl].label; if (iCanEl<neighCanEl) raList[neighCanEl].label = iCanEl; else { //raList[raList[iCanEl].label].label = iCanEl; raList[iCanEl].label = neighCanEl; } } neighbor = neighbor->next; } } // 3. Union Find for (int i = 0; i < regionCount; i++) { int iCanEl = i; while (raList[iCanEl].label != iCanEl) iCanEl = raList[iCanEl].label; raList[i].label = iCanEl; } // 4. Traverse joint sets, relabeling image. int *modePointCounts_buffer = new int[regionCount]; memset(modePointCounts_buffer, 0, regionCount*sizeof(int)); float *mode_buffer = new float[regionCount * 3]; int *label_buffer = new int[regionCount]; for (int i = 0; i<regionCount; i++) { label_buffer[i] = -1; mode_buffer[i * 3 + 0] = 0; mode_buffer[i * 3 + 1] = 0; mode_buffer[i * 3 + 2] = 0; } for (int i = 0; i<regionCount; i++) { int iCanEl = raList[i].label; modePointCounts_buffer[iCanEl] += modePointCounts[i]; for (int k = 0; k<3; k++) mode_buffer[iCanEl * 3 + k] += mode[i * 3 + k] * modePointCounts[i]; } int label = -1; for (int i = 0; i < regionCount; i++) { int iCanEl = raList[i].label; if (label_buffer[iCanEl] < 0) { label_buffer[iCanEl] = ++label; for (int k = 0; k < 3; k++) mode[label * 3 + k] = (mode_buffer[iCanEl * 3 + k]) / (modePointCounts_buffer[iCanEl]); modePointCounts[label] = modePointCounts_buffer[iCanEl]; } } regionCount = label + 1; for (int i = 0; i < img->height; i++) for (int j = 0; j < img->width; j++) labels[i][j] = label_buffer[raList[labels[i][j]].label]; delete[] mode_buffer; delete[] modePointCounts_buffer; delete[] label_buffer; //Destroy RAM delete[] raList; delete[] raPool; deltaRegionCount = oldRegionCount - regionCount; oldRegionCount = regionCount; std::cout << "Mean Shift(TransitiveClosure):" << regionCount << std::endl; } // Prune { int *modePointCounts_buffer = new int[regionCount]; float *mode_buffer = new float[regionCount * 3]; int *label_buffer = new int[regionCount]; int minRegionCount; do{ minRegionCount = 0; // Build RAM again RAList *raList = new RAList[regionCount], *raPool = new RAList[10 * regionCount]; //10 is hard coded! for (int i = 0; i < regionCount; i++) { raList[i].label = i; raList[i].next = NULL; } for (int i = 0; i < regionCount * 10 - 1; i++) { raPool[i].next = &raPool[i + 1]; } raPool[10 * regionCount - 1].next = NULL; RAList *raNode1, *raNode2, *oldRAFreeList, *freeRAList = raPool; for (int i = 0; i<img->height; i++) for (int j = 0; j<img->width; j++) { if (i>0 && labels[i][j] != labels[i - 1][j]) { // Get 2 free node raNode1 = freeRAList; raNode2 = freeRAList->next; oldRAFreeList = freeRAList; freeRAList = freeRAList->next->next; // connect the two region raNode1->label = labels[i][j]; raNode2->label = labels[i - 1][j]; if (raList[labels[i][j]].Insert(raNode2)) //already exists! freeRAList = oldRAFreeList; else raList[labels[i - 1][j]].Insert(raNode1); } if (j>0 && labels[i][j] != labels[i][j - 1]) { // Get 2 free node raNode1 = freeRAList; raNode2 = freeRAList->next; oldRAFreeList = freeRAList; freeRAList = freeRAList->next->next; // connect the two region raNode1->label = labels[i][j]; raNode2->label = labels[i][j - 1]; if (raList[labels[i][j]].Insert(raNode2)) freeRAList = oldRAFreeList; else raList[labels[i][j - 1]].Insert(raNode1); } } // Find small regions for (int i = 0; i < regionCount; i++) if (modePointCounts[i] < minRegion) { minRegionCount++; RAList *neighbor = raList[i].next; int candidate = neighbor->label; float minDistance = color_distance(&mode[3 * i], &mode[3 * candidate]); neighbor = neighbor->next; while (neighbor) { float minDistance2 = color_distance(&mode[3 * i], &mode[3 * neighbor->label]); if (minDistance2<minDistance) { minDistance = minDistance2; candidate = neighbor->label; } neighbor = neighbor->next; } int iCanEl = i, neighCanEl = candidate; while (raList[iCanEl].label != iCanEl) iCanEl = raList[iCanEl].label; while (raList[neighCanEl].label != neighCanEl) neighCanEl = raList[neighCanEl].label; if (iCanEl < neighCanEl) raList[neighCanEl].label = iCanEl; else { //raList[raList[iCanEl].label].label = neighCanEl; raList[iCanEl].label = neighCanEl; } } for (int i = 0; i < regionCount; i++) { int iCanEl = i; while (raList[iCanEl].label != iCanEl) iCanEl = raList[iCanEl].label; raList[i].label = iCanEl; } memset(modePointCounts_buffer, 0, regionCount*sizeof(int)); for (int i = 0; i < regionCount; i++) { label_buffer[i] = -1; mode_buffer[3 * i + 0] = 0; mode_buffer[3 * i + 1] = 0; mode_buffer[3 * i + 2] = 0; } for (int i = 0; i<regionCount; i++) { int iCanEl = raList[i].label; modePointCounts_buffer[iCanEl] += modePointCounts[i]; for (int k = 0; k<3; k++) mode_buffer[iCanEl * 3 + k] += mode[i * 3 + k] * modePointCounts[i]; } int label = -1; for (int i = 0; i < regionCount; i++) { int iCanEl = raList[i].label; if (label_buffer[iCanEl] < 0) { label_buffer[iCanEl] = ++label; for (int k = 0; k < 3; k++) mode[label * 3 + k] = (mode_buffer[iCanEl * 3 + k]) / (modePointCounts_buffer[iCanEl]); modePointCounts[label] = modePointCounts_buffer[iCanEl]; } } regionCount = label + 1; for (int i = 0; i < img->height; i++) for (int j = 0; j < img->width; j++) labels[i][j] = label_buffer[raList[labels[i][j]].label]; //Destroy RAM delete[] raList; delete[] raPool; std::cout << "Mean Shift(Prune):" << regionCount << std::endl; } while (minRegionCount > 0); delete[] mode_buffer; delete[] modePointCounts_buffer; delete[] label_buffer; } // Output STOP_TIMING(timer); std::cout << "Mean Shift(ms):" << GET_TIMING(timer) << std::endl; cvReleaseImage(&result); delete[]mode; delete[]modePointCounts; return regionCount; }