// compute nOrients gradient histograms per bin x bin block of pixels void gradHist( float *M, float *O, float *H, int h, int w, int bin, int nOrients, int softBin, bool full ) { const int hb=h/bin, wb=w/bin, h0=hb*bin, w0=wb*bin, nb=wb*hb; const float s=(float)bin, sInv=1/s, sInv2=1/s/s; float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1; float xb, init; O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16); O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16); // main loop for( x=0; x<w0; x++ ) { // compute target orientation bins for entire column - very fast gradQuantize(O+x*h,M+x*h,O0,O1,M0,M1,nb,h0,sInv2,nOrients,full,softBin>=0); if( softBin<0 && softBin%2==0 ) { // no interpolation w.r.t. either orienation or spatial bin H1=H+(x/bin)*hb; #define GH H1[O0[y]]+=M0[y]; y++; if( bin==1 ) for(y=0; y<h0;) { GH; H1++; } else if( bin==2 ) for(y=0; y<h0;) { GH; GH; H1++; } else if( bin==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; } else if( bin==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; } else for( y=0; y<h0;) { for( int y1=0; y1<bin; y1++ ) { GH; } H1++; } #undef GH } else if( softBin%2==0 || bin==1 ) { // interpolate w.r.t. orientation only, not spatial bin H1=H+(x/bin)*hb; #define GH H1[O0[y]]+=M0[y]; H1[O1[y]]+=M1[y]; y++; if( bin==1 ) for(y=0; y<h0;) { GH; H1++; } else if( bin==2 ) for(y=0; y<h0;) { GH; GH; H1++; } else if( bin==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; } else if( bin==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; } else for( y=0; y<h0;) { for( int y1=0; y1<bin; y1++ ) { GH; } H1++; } #undef GH } else { // interpolate using trilinear interpolation float ms[4], xyd, yb, xd, yd; __m128 _m, _m0, _m1; bool hasLf, hasRt; int xb0, yb0; if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; } hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1; xd=xb-xb0; xb+=sInv; yb=init; y=0; // macros for code conciseness #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \ ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd; #define GH(H,ma,mb) H1=H; STRu(*H1,ADD(LDu(*H1),MUL(ma,mb))); // leading rows, no top bin for( ; y<bin/2; y++ ) { yb0=-1; GHinit; if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; } if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; } } // main rows, has top and bottom bins, use SSE for minor speedup if( softBin<0 ) for( ; ; y++ ) { yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; _m0=SET(M0[y]); if(hasLf) { _m=SET(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); } if(hasRt) { _m=SET(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); } } else for( ; ; y++ ) { yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; _m0=SET(M0[y]); _m1=SET(M1[y]); if(hasLf) { _m=SET(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); } if(hasRt) { _m=SET(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); } } // final rows, no bottom bin for( ; y<h0; y++ ) { yb0 = (int) yb; GHinit; if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; } if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; } } #undef GHinit #undef GH } } alFree(O0); alFree(O1); alFree(M0); alFree(M1); // normalize boundary bins which only get 7/8 of weight of interior bins if( softBin%2!=0 ) for( int o=0; o<nOrients; o++ ) { x=0; for( y=0; y<hb; y++ ) H[o*nb+x*hb+y]*=8.f/7.f; y=0; for( x=0; x<wb; x++ ) H[o*nb+x*hb+y]*=8.f/7.f; x=wb-1; for( y=0; y<hb; y++ ) H[o*nb+x*hb+y]*=8.f/7.f; y=hb-1; for( x=0; x<wb; x++ ) H[o*nb+x*hb+y]*=8.f/7.f; } }
void pcl::people::HOG::gradHist( float *M, float *O, int h, int w, int bin_size, int n_orients, bool soft_bin, float *H ) const { const int hb=h/bin_size, wb=w/bin_size, h0=hb*bin_size, w0=wb*bin_size, nb=wb*hb; const float s=(float)bin_size, sInv=1/s, sInv2=1/s/s; float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1; O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16); O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16); // main loop float xb = 0; float init = 0; for( x=0; x<w0; x++ ) { // compute target orientation bins for entire column - very fast gradQuantize( O+x*h, M+x*h, O0, O1, M0, M1, n_orients, nb, h0, sInv2 ); if( !soft_bin || bin_size==1 ) { // interpolate w.r.t. orientation only, not spatial bin_size H1=H+(x/bin_size)*hb; #define GH H1[O0[y]]+=M0[y]; H1[O1[y]]+=M1[y]; y++; if( bin_size==1 ) for(y=0; y<h0;) { GH; H1++; } else if( bin_size==2 ) for(y=0; y<h0;) { GH; GH; H1++; } else if( bin_size==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; } else if( bin_size==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; } else for( y=0; y<h0;) { for( int y1=0; y1<bin_size; y1++ ) { GH; } H1++; } #undef GH } else { // interpolate using trilinear interpolation #if defined(__SSE2__) float ms[4], xyd, yb, xd, yd; __m128 _m, _m0, _m1; bool hasLf, hasRt; int xb0, yb0; if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; } hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1; xd=xb-xb0; xb+=sInv; yb=init; y=0; // macros for code conciseness #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \ ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd; #define GH(H,ma,mb) H1=H; pcl::sse_stru(*H1,pcl::sse_add(pcl::sse_ldu(*H1),pcl::sse_mul(ma,mb))); // leading rows, no top bin_size for( ; y<bin_size/2; y++ ) { yb0=-1; GHinit; if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; } if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; } } // main rows, has top and bottom bins, use SSE for minor speedup for( ; ; y++ ) { yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; _m0=pcl::sse_set(M0[y]); _m1=pcl::sse_set(M1[y]); if(hasLf) { _m=pcl::sse_set(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); } if(hasRt) { _m=pcl::sse_set(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); } } // final rows, no bottom bin_size for( ; y<h0; y++ ) { yb0 = (int) yb; GHinit; if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; } if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; } } #undef GHinit #undef GH #else float ms[4], xyd, yb, xd, yd; bool hasLf, hasRt; int xb0, yb0; if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; } hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1; xd=xb-xb0; xb+=sInv; yb=init; y=0; // macros for code conciseness #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \ ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd; // leading rows, no top bin_size for( ; y<bin_size/2; y++ ) { yb0=-1; GHinit; if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; } if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; } } // main rows, has top and bottom bins for( ; ; y++ ) { yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; } if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; } } // final rows, no bottom bin_size for( ; y<h0; y++ ) { yb0 = (int) yb; GHinit; if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; } if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; } } #undef GHinit #endif } } alFree(O0); alFree(O1); alFree(M0); alFree(M1); }
#undef GH #undef GHT #undef GHT_MAX #if HEAP32 #define GH(x) x##_32 #define GHT ut32 #define GHT_MAX UT32_MAX #else #define GH(x) x##_64 #define GHT ut64 #define GHT_MAX UT64_MAX #endif static void GH(update_main_arena)(RCore *core, GHT m_arena, GH(RHeap_MallocState) *main_arena) { (void)r_core_read_at (core, m_arena, (ut8 *)main_arena, sizeof (GH(RHeap_MallocState))); } static void GH(get_brks)(RCore *core, GHT *brk_start, GHT *brk_end) { RListIter *iter; RDebugMap *map; r_debug_map_sync (core->dbg); r_list_foreach (core->dbg->maps, iter, map) { if (strstr (map->name, "[heap]")) { *brk_start = map->addr; *brk_end = map->addr_end; break; } } }
#undef GH #undef GHT #undef GHT_MAX #if HEAP32 #define GH(x) x##_32 #define GHT ut32 #define GHT_MAX UT32_MAX #else #define GH(x) x##_64 #define GHT ut64 #define GHT_MAX UT64_MAX #endif static void GH(update_arena_with_tc)(GH(RHeap_MallocState_tcache) *cmain_arena, MallocState *main_arena) { int i = 0; main_arena->mutex = cmain_arena->mutex; main_arena->flags = cmain_arena->flags; for (i = 0; i <= BINMAPSIZE; i++ ) { main_arena->binmap[i] = cmain_arena->binmap[i]; } main_arena->have_fast_chunks = cmain_arena->have_fast_chunks; main_arena->attached_threads = cmain_arena->attached_threads; for (i = 0; i <= NFASTBINS; i++) { main_arena->GH(fastbinsY)[i] = cmain_arena->fastbinsY[i]; } main_arena->GH(top) = cmain_arena->top; main_arena->GH(last_remainder) = cmain_arena->last_remainder; for (i = 0; i <= NBINS * 2 - 2; i++) { main_arena->GH(bins)[i] = cmain_arena->bins[i];