int main(int ac, char **argv) { init_global(ac, argv); if (init_env(&g_env, &g_lenv) == 1 || (a_init() == -1)) return (1); g_hash = hash_table(get_path(g_env, g_lenv)); xmalloc(100); if (sh21() == 1) { hash_del(&(g_hash)); return (1); } hash_del(&(g_hash)); return (0); }
void Permutohedral::init ( const float* feature, int feature_size, int N ) { // Compute the lattice coordinates for each feature [there is going to be a lot of magic here N_ = N; d_ = feature_size; HashTable hash_table( d_, N_/**(d_+1)*/ ); const int blocksize = sizeof(__m128) / sizeof(float); const __m128 invdplus1 = _mm_set1_ps( 1.0f / (d_+1) ); const __m128 dplus1 = _mm_set1_ps( d_+1 ); const __m128 Zero = _mm_set1_ps( 0 ); const __m128 One = _mm_set1_ps( 1 ); // Allocate the class memory if (offset_) delete [] offset_; offset_ = new int[ (d_+1)*(N_+16) ]; memset( offset_, 0, (d_+1)*(N_+16)*sizeof(int) ); if (barycentric_) delete [] barycentric_; barycentric_ = new float[ (d_+1)*(N_+16) ]; memset( barycentric_, 0, (d_+1)*(N_+16)*sizeof(float) ); // Allocate the local memory __m128 * scale_factor = (__m128*) _mm_malloc( (d_ )*sizeof(__m128) , 16 ); __m128 * f = (__m128*) _mm_malloc( (d_ )*sizeof(__m128) , 16 ); __m128 * elevated = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128) , 16 ); __m128 * rem0 = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128) , 16 ); __m128 * rank = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128), 16 ); float * barycentric = new float[(d_+2)*blocksize]; short * canonical = new short[(d_+1)*(d_+1)]; short * key = new short[d_+1]; // Compute the canonical simplex for( int i=0; i<=d_; i++ ){ for( int j=0; j<=d_-i; j++ ) canonical[i*(d_+1)+j] = i; for( int j=d_-i+1; j<=d_; j++ ) canonical[i*(d_+1)+j] = i - (d_+1); } // Expected standard deviation of our filter (p.6 in [Adams etal 2010]) float inv_std_dev = sqrt(2.0 / 3.0)*(d_+1); // Compute the diagonal part of E (p.5 in [Adams etal 2010]) for( int i=0; i<d_; i++ ) scale_factor[i] = _mm_set1_ps( 1.0 / sqrt( float((i+2)*(i+1) ) * inv_std_dev) ); // Setup the SSE rounding #ifndef __SSE4_1__ const unsigned int old_rounding = _mm_getcsr(); _mm_setcsr( (old_rounding&~_MM_ROUND_MASK) | _MM_ROUND_NEAREST ); #endif // Compute the simplex each feature lies in for( int k=0; k<N_; k+=blocksize ){ // Load the feature from memory float * ff = (float*)f; for( int j=0; j<d_; j++ ) for( int i=0; i<blocksize; i++ ) ff[ j*blocksize + i ] = k+i < N_ ? feature[ (k+i)*d_+j ] : 0.0; // Elevate the feature ( y = Ep, see p.5 in [Adams etal 2010]) // sm contains the sum of 1..n of our faeture vector __m128 sm = Zero; for( int j=d_; j>0; j-- ){ __m128 cf = f[j-1]*scale_factor[j-1]; elevated[j] = sm - _mm_set1_ps(j)*cf; sm += cf; } elevated[0] = sm; // Find the closest 0-colored simplex through rounding __m128 sum = Zero; for( int i=0; i<=d_; i++ ){ __m128 v = invdplus1 * elevated[i]; #ifdef __SSE4_1__ v = _mm_round_ps( v, _MM_FROUND_TO_NEAREST_INT ); #else v = _mm_cvtepi32_ps( _mm_cvtps_epi32( v ) ); #endif rem0[i] = v*dplus1; sum += v; } // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) for( int i=0; i<=d_; i++ ) rank[i] = Zero; for( int i=0; i<d_; i++ ){ __m128 di = elevated[i] - rem0[i]; for( int j=i+1; j<=d_; j++ ){ __m128 dj = elevated[j] - rem0[j]; __m128 c = _mm_and_ps( One, _mm_cmplt_ps( di, dj ) ); rank[i] += c; rank[j] += One-c; } } // If the point doesn't lie on the plane (sum != 0) bring it back for( int i=0; i<=d_; i++ ){ rank[i] += sum; __m128 add = _mm_and_ps( dplus1, _mm_cmplt_ps( rank[i], Zero ) ); __m128 sub = _mm_and_ps( dplus1, _mm_cmpge_ps( rank[i], dplus1 ) ); rank[i] += add-sub; rem0[i] += add-sub; } // Compute the barycentric coordinates (p.10 in [Adams etal 2010]) for( int i=0; i<(d_+2)*blocksize; i++ ) barycentric[ i ] = 0; for( int i=0; i<=d_; i++ ){ __m128 v = (elevated[i] - rem0[i])*invdplus1; // Didn't figure out how to SSE this float * fv = (float*)&v; float * frank = (float*)&rank[i]; for( int j=0; j<blocksize; j++ ){ int p = d_-frank[j]; barycentric[j*(d_+2)+p ] += fv[j]; barycentric[j*(d_+2)+p+1] -= fv[j]; } } // The rest is not SSE'd for( int j=0; j<blocksize; j++ ){ // Wrap around barycentric[j*(d_+2)+0]+= 1 + barycentric[j*(d_+2)+d_+1]; float * frank = (float*)rank; float * frem0 = (float*)rem0; // Compute all vertices and their offset for( int remainder=0; remainder<=d_; remainder++ ){ for( int i=0; i<d_; i++ ){ key[i] = frem0[i*blocksize+j] + canonical[ remainder*(d_+1) + (int)frank[i*blocksize+j] ]; } offset_[ (j+k)*(d_+1)+remainder ] = hash_table.find( key, true ); barycentric_[ (j+k)*(d_+1)+remainder ] = barycentric[ j*(d_+2)+remainder ]; } } } _mm_free( scale_factor ); _mm_free( f ); _mm_free( elevated ); _mm_free( rem0 ); _mm_free( rank ); delete [] barycentric; delete [] canonical; delete [] key; // Reset the SSE rounding #ifndef __SSE4_1__ _mm_setcsr( old_rounding ); #endif // This is normally fast enough so no SSE needed here // Find the Neighbors of each lattice point // Get the number of vertices in the lattice M_ = hash_table.size(); // Create the neighborhood structure if(blur_neighbors_) delete[] blur_neighbors_; blur_neighbors_ = new Neighbors[ (d_+1)*M_ ]; short * n1 = new short[d_+1]; short * n2 = new short[d_+1]; // For each of d+1 axes, for( int j = 0; j <= d_; j++ ){ for( int i=0; i<M_; i++ ){ const short * key = hash_table.getKey( i ); for( int k=0; k<d_; k++ ){ n1[k] = key[k] - 1; n2[k] = key[k] + 1; } n1[j] = key[j] + d_; n2[j] = key[j] - d_; blur_neighbors_[j*M_+i].n1 = hash_table.find( n1 ); blur_neighbors_[j*M_+i].n2 = hash_table.find( n2 ); } } delete[] n1; delete[] n2; }
void Permutohedral::init ( const float* feature, int feature_size, int N ) { // Compute the lattice coordinates for each feature [there is going to be a lot of magic here N_ = N; d_ = feature_size; HashTable hash_table( d_, N_*(d_+1) ); // Allocate the class memory if (offset_) delete [] offset_; offset_ = new int[ (d_+1)*N_ ]; if (barycentric_) delete [] barycentric_; barycentric_ = new float[ (d_+1)*N_ ]; // Allocate the local memory float * scale_factor = new float[d_]; float * elevated = new float[d_+1]; float * rem0 = new float[d_+1]; float * barycentric = new float[d_+2]; short * rank = new short[d_+1]; short * canonical = new short[(d_+1)*(d_+1)]; short * key = new short[d_+1]; // Compute the canonical simplex for( int i=0; i<=d_; i++ ){ for( int j=0; j<=d_-i; j++ ) canonical[i*(d_+1)+j] = i; for( int j=d_-i+1; j<=d_; j++ ) canonical[i*(d_+1)+j] = i - (d_+1); } // Expected standard deviation of our filter (p.6 in [Adams etal 2010]) float inv_std_dev = sqrt(2.0 / 3.0)*(d_+1); // Compute the diagonal part of E (p.5 in [Adams etal 2010]) for( int i=0; i<d_; i++ ) scale_factor[i] = 1.0 / sqrt(float( (i+2)*(i+1) )) * inv_std_dev; // Compute the simplex each feature lies in for( int k=0; k<N_; k++ ){ // Elevate the feature ( y = Ep, see p.5 in [Adams etal 2010]) const float * f = feature + k*feature_size; // sm contains the sum of 1..n of our faeture vector float sm = 0; for( int j=d_; j>0; j-- ){ float cf = f[j-1]*scale_factor[j-1]; elevated[j] = sm - j*cf; sm += cf; } elevated[0] = sm; // Find the closest 0-colored simplex through rounding float down_factor = 1.0f / (d_+1); float up_factor = (d_+1); int sum = 0; for( int i=0; i<=d_; i++ ){ /* int rd = round(double( down_factor * elevated[i]));*/ int rd=floor(down_factor * elevated[i]+0.5f); rem0[i] = rd*up_factor; sum += rd; } // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) for( int i=0; i<=d_; i++ ) rank[i] = 0; for( int i=0; i<d_; i++ ){ double di = elevated[i] - rem0[i]; for( int j=i+1; j<=d_; j++ ) if ( di < elevated[j] - rem0[j]) rank[i]++; else rank[j]++; } // If the point doesn't lie on the plane (sum != 0) bring it back for( int i=0; i<=d_; i++ ){ rank[i] += sum; if ( rank[i] < 0 ){ rank[i] += d_+1; rem0[i] += d_+1; } else if ( rank[i] > d_ ){ rank[i] -= d_+1; rem0[i] -= d_+1; } } // Compute the barycentric coordinates (p.10 in [Adams etal 2010]) for( int i=0; i<=d_+1; i++ ) barycentric[i] = 0; for( int i=0; i<=d_; i++ ){ float v = (elevated[i] - rem0[i])*down_factor; barycentric[d_-rank[i] ] += v; barycentric[d_-rank[i]+1] -= v; } // Wrap around barycentric[0] += 1.0 + barycentric[d_+1]; // Compute all vertices and their offset for( int remainder=0; remainder<=d_; remainder++ ){ for( int i=0; i<d_; i++ ) key[i] = rem0[i] + canonical[ remainder*(d_+1) + rank[i] ]; offset_[ k*(d_+1)+remainder ] = hash_table.find( key, true ); barycentric_[ k*(d_+1)+remainder ] = barycentric[ remainder ]; } } delete [] scale_factor; delete [] elevated; delete [] rem0; delete [] barycentric; delete [] rank; delete [] canonical; delete [] key; // Find the Neighbors of each lattice point // Get the number of vertices in the lattice M_ = hash_table.size(); // Create the neighborhood structure if(blur_neighbors_) delete[] blur_neighbors_; blur_neighbors_ = new Neighbors[ (d_+1)*M_ ]; short * n1 = new short[d_+1]; short * n2 = new short[d_+1]; // For each of d+1 axes, for( int j = 0; j <= d_; j++ ){ for( int i=0; i<M_; i++ ){ const short * key = hash_table.getKey( i ); for( int k=0; k<d_; k++ ){ n1[k] = key[k] - 1; n2[k] = key[k] + 1; } n1[j] = key[j] + d_; n2[j] = key[j] - d_; blur_neighbors_[j*M_+i].n1 = hash_table.find( n1 ); blur_neighbors_[j*M_+i].n2 = hash_table.find( n2 ); } } delete[] n1; delete[] n2; }
hash_table &operator = (const hash_table &other) { hash_table(other).swap(*this); return *this; }