Ejemplo n.º 1
0
int sum_x(int vx=1, int tlx=0, int trx=n-1,int lx,int rx,int ly,int ry) {
  if (lx > rx) return 0;
  if (lx == tlx && trx == rx) return sum_y(vx, 1, 0, m-1, ly, ry);
  int tmx = (tlx + trx) / 2;
  return sum_x(vx*2, tlx, tmx, lx, min(rx,tmx), ly, ry)
    + sum_x(vx*2+1, tmx+1, trx, max(lx,tmx+1), rx, ly, ry);
}
Ejemplo n.º 2
0
  __device__ inline
  void contributeResidualJacobian( const unsigned ielem ) const
  {
    extern __shared__ WorkSpace work_data[] ;

    sum_x_clear(); // Make sure summation scratch is zero

    // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ 
    // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ 

    const unsigned iInt = threadIdx.x ;

    if ( iInt < IntegrationCount ) {

      const double value_at_integ = work_data->value_at_integ[ iInt ] ;
      const double gradx_at_integ = work_data->gradx_at_integ[ iInt ] ;
      const double grady_at_integ = work_data->grady_at_integ[ iInt ] ;
      const double gradz_at_integ = work_data->gradz_at_integ[ iInt ] ;

      const float detJweight     = work_data->detJweight[ iInt ] ;
      const float coeff_K_detJweight = coeff_K * detJweight ;

      for ( unsigned iRow = threadIdx.y ;
                     iRow < FunctionCount ; iRow += blockDim.y ) {

        const float value_row  = elem_data.values[ iInt ][ iRow ] * detJweight ;
        const float dpsidx_row = work_data->dpsidx[ iRow ][ iInt ] * coeff_K_detJweight ;
        const float dpsidy_row = work_data->dpsidy[ iRow ][ iInt ] * coeff_K_detJweight ;
        const float dpsidz_row = work_data->dpsidz[ iRow ][ iInt ] * coeff_K_detJweight ;

        const double res_del = dpsidx_row * gradx_at_integ +
                               dpsidy_row * grady_at_integ +
                               dpsidz_row * gradz_at_integ ;

        const double res_val = value_at_integ * value_at_integ * value_row ;
        const double jac_val_row = 2 * value_at_integ * value_row ;

        sum_x( element_vectors( ielem , iRow ) , res_del + res_val );

        for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) {

          const float jac_del = 
            dpsidx_row * work_data->dpsidx[iCol][iInt] +
            dpsidy_row * work_data->dpsidy[iCol][iInt] +
            dpsidz_row * work_data->dpsidz[iCol][iInt] ;

          const double jac_val =
            jac_val_row * elem_data.values[ iInt ][ iCol ] ;

          sum_x( element_matrices( ielem , iRow , iCol ) , jac_del + jac_val );
        }
      }
    }

    __syncthreads(); // All warps finish before refilling shared data 
  }
Ejemplo n.º 3
0
void fill_dp_matrix(const std::vector<double> & x,
                    std::vector< std::vector< double > > & S,
                    std::vector< std::vector< size_t > > & J)
  /*
   x: One dimension vector to be clustered, must be sorted (in any order).
   S: K x N matrix. S[k][i] is the sum of squares of the distance from
   each x[i] to its cluster mean when there are exactly x[i] is the
   last point in cluster k
   J: K x N backtrack matrix

   NOTE: All vector indices in this program start at position 0
   */
{
  const int K = S.size();
  const int N = S[0].size();

  std::vector<double> sum_x(N), sum_x_sq(N);

  double shift = x[N/2]; // median. used to shift the values of x to
  //  improve numerical stability

  for(int i = 0; i < N; ++i) {
    if(i == 0) {
      sum_x[0] = x[0] - shift;
      sum_x_sq[0] = (x[0] - shift) * (x[0] - shift);
    } else {
      sum_x[i] = sum_x[i-1] + x[i] - shift;
      sum_x_sq[i] = sum_x_sq[i-1] + (x[i] - shift) * (x[i] - shift);
    }

    // Initialize for k = 0
    S[0][i] = ssq(0, i, sum_x, sum_x_sq);
    J[0][i] = 0;
  }

  for(int k = 1; k < K; ++k) {
    int imin;
    if(k < K - 1) {
      imin = std::max((size_t)1, (size_t)k);
    } else {
      // No need to compute S[K-1][0] ... S[K-1][N-2]
      imin = N-1;
    }

#ifdef DEBUG
    std::cout << std::endl << "k=" << k << ":";
#endif
    fill_row_k(imin, N-1, k, S, J, sum_x, sum_x_sq);
  }
}
Ejemplo n.º 4
0
  __device__ inline
  void evaluateFunctions( const unsigned ielem ) const
  {
    extern __shared__ WorkSpace work_data[] ;

    // Each warp (threadIdx.y) computes an integration point
    // Each thread is responsible for a node / function.

    const unsigned iFunc = threadIdx.x ;
    const bool     hasFunc = iFunc < FunctionCount ;

    //------------------------------------
    // Each warp gathers a different variable into 'elem_mat' shared memory.

    if ( hasFunc ) {

      const unsigned node = elem_node_ids( ielem , iFunc );

      for ( unsigned iy = threadIdx.y ; iy < 4 ; iy += blockDim.y ) {
      switch( iy ) {
      case 0 : work_data->sum[0][iFunc] = node_coords(node,0); break ;
      case 1 : work_data->sum[1][iFunc] = node_coords(node,1); break ;
      case 2 : work_data->sum[2][iFunc] = node_coords(node,2); break ;
      case 3 : work_data->sum[3][iFunc] = nodal_values(node); break ;
      default: break ;
      }
      }
    }

    __syncthreads(); // Wait for all warps to finish gathering

    // now get local 'const' copies in register space:

    const double x       = work_data->sum[0][ iFunc ];
    const double y       = work_data->sum[1][ iFunc ];
    const double z       = work_data->sum[2][ iFunc ];
    const double dof_val = work_data->sum[3][ iFunc ];

    __syncthreads(); // Wait for all warps to finish extracting

    sum_x_clear(); // Make sure summation scratch is zero

    //------------------------------------
    // Each warp is now on its own computing an integration point
    // so no further explicit synchronizations are required.

    if ( hasFunc ) {

      float * const J    = work_data->spaceJac[    threadIdx.y ];
      float * const invJ = work_data->spaceInvJac[ threadIdx.y ];

      for ( unsigned iInt = threadIdx.y ;
                     iInt < IntegrationCount ; iInt += blockDim.y ) {

        const float val = elem_data.values[iInt][iFunc] ;
        const float gx  = elem_data.gradients[iInt][0][iFunc] ;
        const float gy  = elem_data.gradients[iInt][1][iFunc] ;
        const float gz  = elem_data.gradients[iInt][2][iFunc] ;

        sum_x( J[j11], gx * x );
        sum_x( J[j12], gx * y );
        sum_x( J[j13], gx * z );

        sum_x( J[j21], gy * x );
        sum_x( J[j22], gy * y );
        sum_x( J[j23], gy * z );

        sum_x( J[j31], gz * x );
        sum_x( J[j32], gz * y );
        sum_x( J[j33], gz * z );

        // Inverse jacobian, only enough parallel work for 9 threads in the warp

        if ( iFunc < TensorDim ) {

          invJ[ iFunc ] =
            J[ invJacIndex[iFunc][0] ] * J[ invJacIndex[iFunc][1] ] -
            J[ invJacIndex[iFunc][2] ] * J[ invJacIndex[iFunc][3] ] ;

          // Let all threads in the warp compute determinant into a register

          const float detJ = J[j11] * invJ[j11] +
                             J[j21] * invJ[j12] +
                             J[j31] * invJ[j13] ;

          invJ[ iFunc ] /= detJ ;

          if ( 0 == iFunc ) {
            work_data->detJweight[ iInt ] = detJ * elem_data.weights[ iInt ] ;
          }
        }

        // Transform bases gradients and compute value and gradient

        const float dx = gx * invJ[j11] + gy * invJ[j12] + gz * invJ[j13];
        const float dy = gx * invJ[j21] + gy * invJ[j22] + gz * invJ[j23];
        const float dz = gx * invJ[j31] + gy * invJ[j32] + gz * invJ[j33];

        work_data->dpsidx[iFunc][iInt] = dx ;
        work_data->dpsidy[iFunc][iInt] = dy ;
        work_data->dpsidz[iFunc][iInt] = dz ;

        sum_x( work_data->gradx_at_integ[iInt] , dof_val * dx );
        sum_x( work_data->grady_at_integ[iInt] , dof_val * dy );
        sum_x( work_data->gradz_at_integ[iInt] , dof_val * dz );
        sum_x( work_data->value_at_integ[iInt] , dof_val * val );
      }
    }

    __syncthreads(); // All shared data must be populated at return.
  }
Ejemplo n.º 5
0
/*****************************************************************************************
 *  vector< int > K_MeansPredict::Train( const vector< vector< float > >& Data, const float stopDist, const int stopIter, const int fast )
 *
 *  Purpose: Train predictor
 *  input:
 *	Data: vector of data
 *	stopDist: Distance stopping criteria
 *	stopIter: Max Iteration stopping criteria
 *
 *  return:
 *	vector of cluster membership
 *
 *  01.07.2006 	djh	added stoping criterion parameters
 *  			stopDist minimum euclidean distance
 * 			stopIter maximum iterations
 *			extra error output
 * 03.06.2006	djh	replaced _totalUpper/_totalLowerConfBound with _totalBoundStub
 *
 ******************************************************************************************/
 vector< int > K_MeansPredict::Train( const vector< vector< float > >& Data, const float stopDist, const int stopIter, const int fast ){
   // create vector of example coordinates
   vector< Coord< float > > coordData( Data.size() );
   // create vector of example key values
   vector< float > dataKey( Data.size() );
   //
   for( int i=0; i<Data.size(); i++)
   {
     vector< float > tempCoords( Data[i].size()-1 );
     dataKey[i]=Data[i][0];
     for( int j=1; j<Data[i].size(); j++ )
     {
       tempCoords[j-1] = Data[i][j];
     }
     
     coordData[i] = Coord< float >( tempCoords );
   }
   //  calculate clusters
   float dist;
   int numIter;
   vector<int> clusterMap = CreateClusters( coordData, stopDist, stopIter, dist, numIter );
   if( fast == 1 ){
     return( clusterMap );
   }
   cout << "# Training:\n";
   cout << "#   Training required " << numIter << " rounds, the max Euclid. Dist. is: " << dist << endl;
   //  calculate cluster stats
   vector< float > sum_x( _k, 0. );
   vector< float > sum_x2( _k, 0. );
   _key_supports = vector< int >( _k, 0);
   // find n and sums
   for( int i=0; i<Data.size(); i++ ){
     _key_supports[ clusterMap[i] ]++;
     sum_x[ clusterMap[i] ] += dataKey[ clusterMap[i] ]; 
     sum_x2[ clusterMap[i] ] += pow( dataKey[ clusterMap[i] ], 2); 
   }
   // compute mean and variance
   _key_means = vector< float >(_k,0.);
   _key_variances = vector< float >(_k,0.);
   for( int i=0; i<_k; i++ ){
     _key_means[i]=sum_x[i]/_key_supports[i];
     _key_variances[i] = ( sum_x2[i] - (sum_x[i]/float(_key_supports[i])) )/float( _key_supports[i]-1 );
   }
   //
   // Calc error means and variances
   sum_x = vector<float>( _k, 0.);
   sum_x2 = vector<float>( _k, 0.);
   float tot_sum_x = 0.0;
   float tot_sum_x2 = 0.0;
   for( int i=0; i<coordData.size(); i++ ){
     int clusterIdx = FindClusterIdx( coordData[i] );
     float err = _key_means[ clusterIdx ] - dataKey[i];
     sum_x[ clusterIdx ] += err;
     sum_x2[ clusterIdx ] += pow( err, 2 );
     tot_sum_x += err;
     tot_sum_x2 += pow( err, 2 );
   }
   _errMean = vector< float >( _k );
   _lowerConfBound = vector< float >( _k );
   _upperConfBound = vector< float >( _k );
   for( int i=0; i< _k; i++ ){
     _errMean[i] = sum_x[i]/( float( _key_supports[i] ) );
     float errVar = ( sum_x2[i] - (sum_x[i]/float(_key_supports[i])) )/float( _key_supports[i]-1 );
     float t_val = TDist( _key_supports[i] );
     _lowerConfBound[i] = _errMean[i] - t_val*sqrt( errVar * (1.0+( 1.0/float(_key_supports[i]) )) );
     _upperConfBound[i] = _errMean[i] + t_val*sqrt( errVar * (1.0+( 1.0/float(_key_supports[i]) )) );
   }
   //
   _totalErrMean = tot_sum_x / coordData.size();
   float totalErrVar = ( tot_sum_x2 - (tot_sum_x/float(coordData.size())) )/float( coordData.size()-1 );
   _totalBoundStub = sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //_totalLowerConfBound = _totalErrMean - TDist( coordData.size() )*sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //_totalUpperConfBound = _totalErrMean + TDist( coordData.size() )*sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //  return labels
   cout << "#   Error:\n";
   cout << "#     Mean Squared Error (MSE) is: " << tot_sum_x2/float(coordData.size() ) << endl;
   cout << "#     Error Mean is : " << _totalErrMean << endl;
   cout << "#     Error Variance is : " << totalErrVar << endl;
   //
   return( clusterMap );
 }  
Ejemplo n.º 6
0
extern double test_points() {
  point a, b;
  a.x = 40;
  b.x = 1;
  return sum_x(&a, &b);
}
Ejemplo n.º 7
0
void CalcCorr( const int nvar, const int tgtIdx, const vector< int >& delay, const vector< int >& nlags, vector< vector< float > >& Examples, vector< vector< float > >& CorrelationVect )
{
  for(int k=0; k<nvar; k++)
  {
    if(k != tgtIdx && nlags[k] != (nlags[tgtIdx]+1) )
    {
      cout << "Assert: Error!\n";
      exit( -1 );
    }
  }
  int nl = nlags[tgtIdx]+1;
  vector< vector< float > > tempData( Examples.size() );
  vector< float > target( Examples.size() );
  for(int i=0; i<tempData.size(); i++)
  {
    target[i] = Examples[i][0];
    tempData[i]=vector< float >( Examples[i].size() );
    int aCtr = 0;
    for(int j=1; j<Examples[i].size(); j++)
    {
      if( aCtr == tgtIdx*nl)
      {
        tempData[i][aCtr] = target[i];
        aCtr++;
      }
      tempData[i][aCtr] = Examples[i][j];
      aCtr++;
    }
  }

  //for(int i=0; i<tempData.size(); i++)
  //{
  //  for(int j=0; j<Examples[i].size(); j++)
  //  {
  //    cout << setw(7) << Examples[i][j];
  //  }
  //  cout << endl;
  //
  //  for(int j=0; j<tempData[i].size(); j++)
  //  {
  //    cout << setw(7) << tempData[i][j];
  //  }
  //  cout << endl << endl;
    
  //}
  
    
  CorrelationVect = vector< vector< float > >(nvar);
  for( int k=0; k<nvar; k++)
  {
    CorrelationVect[k] = vector< float >( nl );
    
    cout << "# Calculate correlation for all lags" << endl;
    int n = Examples.size();
    vector<float> sum_x(nl);
    vector<float> sum_y(nl);
    vector<float> sum_xsq(nl);
    vector<float> sum_ysq(nl);
    vector<float> sum_xy(nl);
    for(int j=0; j<nl; j++)
    {
      sum_x[j] = 0.0;
      sum_y[j] = 0.0;
      sum_xsq[j] = 0.0;
      sum_ysq[j] = 0.0;
      sum_xy[j] = 0.0;
      for(int i=0; i<Examples.size(); i++)
      {
        sum_x[j] += target[i];
        sum_y[j] += tempData[i][k*nl+j];
        sum_xsq[j] += target[i]*target[i];
        sum_ysq[j] += tempData[i][k*nl+j]*tempData[i][k*nl+j];
        sum_xy[j] += target[i]*tempData[i][k*nl+j];
        //sum_x[j] += Examples[i][0];
        //sum_y[j] += Examples[i][k*nl+j];
        //sum_xsq[j] += Examples[i][0]*Examples[i][0];
        //sum_ysq[j] += Examples[i][k*nl+j]*Examples[i][k*nl+j];
        //sum_xy[j] += Examples[i][0]*Examples[i][k*nl+j];
      }
      float numerator = n*sum_xy[j]-sum_x[j]*sum_y[j];
      float denominator = sqrt(n*sum_xsq[j] - sum_x[j]*sum_x[j] ) * sqrt(n*sum_ysq[j] - sum_y[j]*sum_y[j] );
      CorrelationVect[k][j] = numerator/denominator;
    }
  }
  vector< vector< float > > temp( nl );
  for(int i=0; i< nl; i++)
  {
    temp[i] = vector< float >(nvar);
    for(int j=0; j<nvar; j++)
    {
      temp[i][j] = CorrelationVect[j][i];
    }
  }
  //cout << "# CorrelationVect is : " << CorrelationVect.size() << " by " << CorrelationVect[0].size() << endl;;
  //cout << "# temp is : " << temp.size() << " by " << temp[0].size() << endl;
  CorrelationVect = temp;
  //cout << "# CorrelationVect is : " << CorrelationVect.size() << " by " << CorrelationVect[0].size() << endl;;
}