Example #1
0
/*------------------------------------------------------------------*/
void AzOptOnTree::updateTreeWeights(AzRgfTreeEnsemble *ens) const
{
  int dtree_num = ens->size(); 
  int tx; 
  for (tx = 0; tx < dtree_num; ++tx) {
    ens->tree_u(tx)->resetWeights(); 
  }

  const double *weight = weights()->point(); 
  double const_val = constant(); 
  ens->set_constant(const_val); 

  int num = tree_feat->featNum(); 
  int fx; 
  
  for (fx = 0; fx < num; ++fx) {
    if (weight[fx] != 0) {
      const AzTrTreeFeatInfo *fp = tree_feat->featInfo(fx); 
      ens->tree_u(fp->tx)->setWeight(fp->nx, weight[fx]); 
    }
  }
}
Example #2
0
// This is inherently inefficient as this function will be called n times (up to 4 times)
// for every overlapping pixel. I'm doing this as to reduce memory use. Since it will
// only be done once, it may not matter too much.
float IdMap::calculateWeights(int x, int y, FrameStack* frame, 
			      std::vector<FrameStack*> stacks, float pixelFactor){
  std::vector<float> weights(stacks.size());
  std::vector<int> border_distances(stacks.size());
  int maxBorderDistance = 0;
  for(uint i=0; i < stacks.size(); ++i){
    border_distances[i] = stacks[i]->nearestBorderGlobal(x, y);
    maxBorderDistance = border_distances[i] > maxBorderDistance ? border_distances[i] : maxBorderDistance;
  }
  // override pixelFactor with MaxBorderDistance
  pixelFactor = maxBorderDistance ? (float)maxBorderDistance : pixelFactor;
  int offset = -1;
  float w_sum = 0;
  for(uint i=0; i < stacks.size(); ++i){
    if(stacks[i] == frame)
      offset = i;
    int border_distance = stacks[i]->nearestBorderGlobal(x, y);
    weights[i] = border_distance > (int)pixelFactor ? 1.0 : (float)(1 + border_distance) / (1.00 + pixelFactor);
    weights[i] = weights[i] < 0 ? 0 : weights[i];
    w_sum += weights[i];
  }
  // float w_sum = 0;
  // for(uint i=0; i < weights.size(); ++i)
  //   w_sum += weights[i];
  if(!w_sum){
    std::cerr << "idMap::calculateWeights: w_sum is 0" << std::endl;
    std::cerr << "\tx,y: " << x << "," << y << " offset : " << offset
	      << "\tstacks.size() " << stacks.size() << std::endl;
    exit(1);
  }
  for(uint i=0; i < weights.size(); ++i)
    weights[i] /= w_sum;
  if(offset < 0){
    std::cerr << "idMap::calculateWeights: offset not defined returning 0 weighting" << std::endl;
    return(0);
  }
  return(weights[offset]);
}
	//! Static function to calculate the weights.
	static std::vector<double> calculate_weights(const std::vector< std::vector<double> >& acceptance_probabilities_optimization)
	{
	  // Abbreviation for the number of optimization steps
	  unsigned int optimization_steps = acceptance_probabilities_optimization.size();

	  // Calculate the weights
	  std::vector<double> weights(optimization_steps, 0.0);
	  double weights_sum = 0.0;
	  for (unsigned int r = 0; r < optimization_steps; ++r)
	  {
	    double sigma_squared = 0.0;
	    for (unsigned int i = 0; i < acceptance_probabilities_optimization[r].size(); ++i)
	      sigma_squared += 1.0/pow(acceptance_probabilities_optimization[r][i], 2);
	  
	    weights[r] = 1.0 / sqrt(sigma_squared);
	    weights_sum += weights[r];
	  }
	  
	  // Normalize the weights
	  for (unsigned int r = 0; r < optimization_steps; ++r) weights[r] /= weights_sum;

	  return weights;
	}
std::vector<double> MultiSequence::ComputePositionBasedSequenceWeights() const
{
    std::vector<double> weights(sequences.size(), 0.0);
    std::vector<int> counts(256);

    for (int i = 1; i <= sequences[0]->GetLength(); i++)
    {
        int diversity = 0;
        std::fill(counts.begin(), counts.end(), 0);
        
        for (size_t j = 0; j < sequences.size(); j++)
        {
            if (counts[BYTE(sequences[j]->GetData()[i])] == 0) diversity++;
            ++(counts[BYTE(sequences[j]->GetData()[i])]);
        }
        
        for (size_t j = 0; j < sequences.size(); j++)
            weights[j] += 1.0 / (diversity * counts[BYTE(sequences[j]->GetData()[i])]);
    }

    weights /= Sum(weights);
    return weights;
}
Example #5
0
void LandmarkSelectionBase::selectLandmarks( MultiReferenceBase* myframes ) {
  // Select landmarks
  myframes->clearFrames(); select( myframes );
  plumed_assert( myframes->getNumberOfReferenceFrames()==nlandmarks );

  // Now calculate voronoi weights
  if( !novoronoi ) {
    unsigned rank=action->comm.Get_rank();
    unsigned size=action->comm.Get_size();
    std::vector<double> weights( nlandmarks, 0.0 );
    for(unsigned i=rank; i<action->data.size(); i+=size) {
      unsigned closest=0;
      double mindist=distance( action->getPbc(), action->getArguments(), action->data[i], myframes->getFrame(0), false );
      for(unsigned j=1; j<nlandmarks; ++j) {
        double dist=distance( action->getPbc(), action->getArguments(), action->data[i], myframes->getFrame(j), false );
        if( dist<mindist ) { mindist=dist; closest=j; }
      }
      weights[closest] += getWeightOfFrame(i);
    }
    action->comm.Sum( &weights[0], weights.size() );
    myframes->setWeights( weights );
  }
}
Example #6
0
void
testTranslationRotationMatrix (const IMATH_INTERNAL_NAMESPACE::M44d& mat)
{
    std::cout << "Testing known translate/rotate matrix:\n " << mat;
    typedef IMATH_INTERNAL_NAMESPACE::Vec3<T> Vec;

    static IMATH_INTERNAL_NAMESPACE::Rand48 rand (2047);

    size_t numPoints = 7;
    std::vector<Vec> from;  from.reserve (numPoints);
    std::vector<Vec> to;    to.reserve (numPoints);
    for (size_t i = 0; i < numPoints; ++i)
    {
        IMATH_INTERNAL_NAMESPACE::V3d a (rand.nextf(), rand.nextf(), rand.nextf());
        IMATH_INTERNAL_NAMESPACE::V3d b = a * mat;

        from.push_back (Vec(a));
        to.push_back (Vec(b));
    }

    std::vector<T> weights (numPoints, T(1));
    const IMATH_INTERNAL_NAMESPACE::M44d m1 = procrustesRotationAndTranslation (&from[0], &to[0], &weights[0], numPoints);
    const IMATH_INTERNAL_NAMESPACE::M44d m2 = procrustesRotationAndTranslation (&from[0], &to[0], numPoints);

    const T eps = sizeof(T) == 8 ? 1e-8 : 1e-4;
    for (size_t i = 0; i < numPoints; ++i)
    {
        const IMATH_INTERNAL_NAMESPACE::V3d a = from[i];
        const IMATH_INTERNAL_NAMESPACE::V3d b = to[i];
        const IMATH_INTERNAL_NAMESPACE::V3d b1 = a * m1;
        const IMATH_INTERNAL_NAMESPACE::V3d b2 = a * m2;

        assert ((b - b1).length() < eps);
        assert ((b - b2).length() < eps);
    }
    std::cout << "  OK\n";
}
Example #7
0
void move_particles (std::vector <Particle> *particles,
        std::vector <double> *field)
{
    auto num_particles = particles->size();
    std::vector <double> weights (2);
    std::vector <int> points (2);

    for (auto& particle : *particles)
    {
        if (CIC){
            weighing (&particle, &weights[0]);
        }
        else if (ZERO_ORDER){
            zero_order_weighing (&particle, &weights[0]);
        }

        adjacent_points (&particle, &points[0]);
        auto left_field = field->at (points [0]) * weights [0];
        auto right_field = field->at (points [1]) * weights [1];
        auto accel_0 = find_accel(left_field + right_field, &particle);

        particle.inc_pos (accel_0);
        if (CIC){
            weighing (&particle, &weights[0]);
        }
        else if (ZERO_ORDER){
            zero_order_weighing (&particle, &weights[0]);
        }

        adjacent_points (&particle, &points[0]);
        left_field = field->at (points [0]) * weights [0];
        right_field = field->at (points [1]) * weights [1];
        auto accel_1 = find_accel(left_field + right_field, &particle);

        particle.inc_vel (accel_0, accel_1);
    }
}
void Foam::domainDecomposition::distributeCells()
{
    Info<< "\nCalculating distribution of cells" << endl;

    cpuTime decompositionTime;

    autoPtr<decompositionMethod> decomposePtr = decompositionMethod::New
    (
        decompositionDict_
    );

    scalarField cellWeights;
    if (decompositionDict_.found("weightField"))
    {
        word weightName = decompositionDict_.lookup("weightField");

        volScalarField weights
        (
            IOobject
            (
                weightName,
                time().timeName(),
                *this,
                IOobject::MUST_READ,
                IOobject::NO_WRITE
            ),
            *this
        );
        cellWeights = weights.primitiveField();
    }

    cellToProc_ = decomposePtr().decompose(*this, cellWeights);

    Info<< "\nFinished decomposition in "
        << decompositionTime.elapsedCpuTime()
        << " s" << endl;
}
void TestWeightMatrix_Forward() {
  int n_outputs = 2;
  InputLayer input_layer(3);
  OutputLayer output_layer(n_outputs);

  WeightMatrix weights(input_layer, output_layer);
  weights.set(0, 0, 0.5);
  weights.set(1, 0, -2.0);
  weights.set(2, 0, 1.5);
  weights.set(0, 1, 1.0);
  weights.set(1, 1, 0.7);
  weights.set(2, 1, -1.0);
  weights.setBias(0, 0.8);
  weights.setBias(1, -0.3);

  std::vector<double> inputs;
  inputs.push_back(-2.0);
  inputs.push_back(1.0);
  inputs.push_back(3.0);
  input_layer.receiveInput(inputs);

  std::vector<double> transition = weights.fire(input_layer);

  assert(transition.size() == 2);
  assert(transition[0] == 2.3);
  assert(transition[1] == -4.6);

  output_layer.receiveInput(transition);

  assert(output_layer.getInput(0) == 2.3);
  assert(output_layer.getInput(1) == -4.6);

  assert(output_layer.getOutput(0) == 2.3);
  assert(output_layer.getOutput(1) == -4.6);

  printPass("TestWeightMatrix_Forward()");
}
Example #10
0
Double KMAlgo::ComputeMssc(IPartition const & x, KMInstance const & instance) {
	RectMatrix centers(x.maxNbLabels(), instance.nbAtt());
	centers.assign(0);
	DoubleVector weights(x.maxNbLabels(), 0);
	for (auto const & l : x.usedLabels()) {
		for (auto const & i : x.observations(l)) {
			weights[l] += instance.weight(i);
			for (size_t d(0); d < instance.nbAtt(); ++d)
				centers.plus(l, d, instance.get(i, d) * instance.weight(i));
		}
	}
	Double result(0);
	for (size_t i(0); i < instance.nbObs(); ++i) {
		size_t const l(x.label(i));
		for (size_t d(0); d < instance.nbAtt(); ++d)
			result += instance.weight(i)
					* std::pow(
							instance.get(i, d) - centers.get(l, d) / weights[l],
							2);

	}

	return result;
}
Example #11
0
    double eval(const std::vector<double> &at, bool &valid){
        //Allow for the weights to be part of the optimization
		//by assuming theyre tacked on to the end of the at vector
		//NOTE: the last weight is the one implied by the 1-sum(others)

		//Determine where the weights start
		int offset = this->P->dimDesign;

		//Separate the set of "Problem" variables
		const Homotopy::designVars_t vars(at.begin(), at.begin() + offset);
        Homotopy::designVars_t weights( at.begin() + offset, at.end() );

        //Evaluate the objectives with the designVars
        Homotopy::objVars_t result( this->e.eval( vars, valid ) );
    
        //Return immediately if result is unavailable
        if(!valid) return 0.0;
        else {
            //Calculate the remaining weight and add it
            weights.push_back( 1.0 - std::accumulate(weights.begin(), weights.end(), 0.0) );
            //Return the weighted sum
            return std::inner_product( weights.begin(), weights.end(), result.begin(), 0.0 );
        }
	}
std::vector<double>  computeInterpolationWeights2d(const GenericPoint&  thepoint, 
                                                   const Container&     pt_v) 
{
   if (pt_v.size() !=3)
   {
      std::cout << " Compute interpolation weights.. error.. wrong number of points: " << pt_v.size() << std::endl;
      return std::vector<double>();
   }
   double  c1[2], c2[2];

   c1[0] = pt_v[1][0] - pt_v[0][0];
   c1[1] = pt_v[1][1] - pt_v[0][1];
        
   c2[0] = pt_v[2][0] - pt_v[0][0];
   c2[1] = pt_v[2][1] - pt_v[0][1];
                
   double  det(det2x2(c1, c2));

   if (det == 0.0)
   {
      throw gsse::numerical_calculation_error(":: interpolation :: 2D :: coefficients.. determinant to small.. ");
   }
        
   double  rhs[2];
   std::vector<double> weights(3);
        
   rhs[0] = thepoint[0] - pt_v[0][0];
   rhs[1] = thepoint[1] - pt_v[0][1];

   weights[1] = det2x2(rhs, c2)  / det;
   weights[2] = det2x2(c1,  rhs) / det;
   weights[0] = 1.0 - weights[1] - weights[2];

   return weights;   // here .. copy constructor // think about it.. [RH]

}
Example #13
0
Layer::Matrix Layer::getFlattenedWeights() const
{
	Matrix weights(1, totalWeights());
	
	size_t position = 0;

	for(auto matrix = begin(); matrix != end(); ++matrix)
	{
		std::memcpy(&weights.data()[position], &matrix->data()[0],
			matrix->size() * sizeof(float));

		position += matrix->size();
	}

	for(auto matrix = begin_bias(); matrix != end_bias(); ++matrix)
	{
		std::memcpy(&weights.data()[position], &matrix->data()[0],
			matrix->size() * sizeof(float));

		position += matrix->size();
	}
	
	return weights;
}
Example #14
0
void ScoreIndexManager::Debug_PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const
{
	std::vector<float> weights(scc.m_scores.size(), 1.0f);
	Debug_PrintLabeledWeightedScores(os, scc, weights);
}
void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
        float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr,
        const memory_tracking::grantor_t &scratchpad) const {
    const auto &jcp = kernel_->jcp;
    const auto &p_ops = attr_->post_ops_;

    const int inph = is_fwd ? jcp.ih : jcp.oh;
    const int inpw = is_fwd ? jcp.iw : jcp.ow;
    const int outh = is_fwd ? jcp.oh : jcp.ih;
    const int outw = is_fwd ? jcp.ow : jcp.iw;

    /* Notation:
       FWD: dimM:oc, dimN:ntiles, dimK:ic,
       BWD: dimM:ic, dimN:ntiles, dimK:oc,
       FWD/BWD: V: src/diff_dst transform, U:weight transform,
                M:dst/diff_src transform  */
    array_offset_calculator<float, 5> input(inp_ptr,
            jcp.mb, jcp.dimK/jcp.dimK_reg_block, inph, inpw,
            jcp.dimK_reg_block);
    array_offset_calculator<float, 5> output(out_ptr,
            jcp.mb, jcp.dimM/jcp.dimM_simd_block, outh, outw,
            jcp.dimM_simd_block);
    array_offset_calculator<float, 6> weights(wei_ptr,
            jcp.oc/jcp.oc_simd_block, jcp.ic/jcp.ic_simd_block, jcp.kh, jcp.kw,
            jcp.ic_simd_block, jcp.oc_simd_block);
    array_offset_calculator<float, 2> bias(bias_ptr,
            jcp.dimM/jcp.dimM_simd_block, jcp.dimM_simd_block);

    array_offset_calculator<float, 8> M(is_fwd
            ? scratchpad.template get<float>(key_wino_M)
            : scratchpad.template get<float>(key_wino_V),
            jcp.dimN_nb_block, jcp.dimM_nb_block,
            alpha, alpha,
            jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block,
            jcp.dimN_reg_block, jcp.dimM_simd_block);

    auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference)
            ? wei_ptr
            : scratchpad.template get<float>(key_wino_U);

    array_offset_calculator<float, 8> U(wino_wei,
            jcp.dimM_nb_block,
            alpha, alpha,
            jcp.dimK_nb_block,
            jcp.dimM_block * jcp.dimM_reg_block, jcp.dimK_block,
            jcp.dimK_reg_block, jcp.dimM_simd_block);
    array_offset_calculator<float, 8> V(is_fwd
            ? scratchpad.template get<float>(key_wino_V)
            : scratchpad.template get<float>(key_wino_M),
            jcp.dimN_nb_block, alpha, alpha,
            jcp.dimN_block, jcp.dimK_nb_block,
            jcp.dimK_block, jcp.dimN_reg_block, jcp.dimK_reg_block);

    const bool wants_padded_bias = jcp.with_bias
        && jcp.oc_without_padding != jcp.oc;
    float last_slice_bias[simd_w] = {0};
    if (wants_padded_bias) {
        for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
            last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
    }

PRAGMA_OMP(parallel)
    {

        parallel_nd_in_omp(jcp.mb, jcp.dimK_nb_block, jcp.dimK_block,
                [&](int img, int K_blk1, int K_blk2) {
                input_transform_data(img, jcp,
                    &(input(img, K_blk1 * jcp.dimK_block + K_blk2,
                            0, 0, 0)),
                        &(V(0, 0, 0, 0, K_blk1, K_blk2, 0, 0)));
                });

        if (jcp.prop_kind != prop_kind::forward_inference) {
            parallel_nd_in_omp(jcp.nb_oc, jcp.nb_ic, (jcp.oc_block * jcp.oc_reg_block),
                (jcp.ic_block * jcp.ic_reg_block),
                [&](int ofm1, int ifm1, int ofm2, int ifm2) {
                    float *U_base_ptr = is_fwd
                        ? &(U(ofm1, 0, 0, ifm1, ofm2, ifm2, 0, 0))
                        : &(U(ifm1, 0, 0, ofm1, ifm2, ofm2, 0, 0));
                    weight_transform_data(jcp,
                        &(weights(
                                ofm1 * jcp.oc_block * jcp.oc_reg_block + ofm2,
                                ifm1 * jcp.ic_block * jcp.ic_reg_block + ifm2,
                                0, 0, 0, 0)),
                        U_base_ptr);
            });
        }

PRAGMA_OMP(barrier)

        parallel_nd_in_omp(jcp.dimN_nb_block, alpha, alpha, jcp.dimM_nb_block,
            [&](int N_blk1, int oj, int oi, int M_blk1) {
            for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block;
                 K_blk1++)
            for (int N_blk2 = 0; N_blk2 < jcp.dimN_block; N_blk2++)
                kernel_->gemm_loop_ker(
                        (float *)&(M(N_blk1, M_blk1, oj, oi,
                            N_blk2, 0, 0, 0)),
                        (const float *)&(U(M_blk1, oj, oi,
                            K_blk1, 0, 0, 0, 0)),
                        (const float *)&(V(N_blk1, oj, oi,
                            N_blk2, K_blk1, 0, 0, 0)), K_blk1);
        });

PRAGMA_OMP(barrier)

        parallel_nd_in_omp(jcp.mb, jcp.dimM_nb_block, (jcp.dimM_block * jcp.dimM_reg_block),
                    [&](int img, int M_blk1, int M_blk2) {
            const int M_blk =
                M_blk1 * jcp.dimM_block  * jcp.dimM_reg_block + M_blk2;

            float *bias_ptr = wants_padded_bias
                && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
                ? last_slice_bias : &bias(M_blk, 0);
            output_transform_data(img, jcp, p_ops,
                    &(M(0, M_blk1, 0, 0, 0, M_blk2, 0, 0)),
                    &(output(img, M_blk, 0, 0, 0)), bias_ptr);
        });

    }
}
Example #16
0
// GET INTERPOLATOR
//-------------------------------------------------------------------------
GridInterface::Interpolator RectilinearGrid::getInterpolator(Index elem, const Vector &point, DataBase::Mapping mapping, GridInterface::InterpolationMode mode) const {

   vassert(inside(elem, point));

#ifdef INTERPOL_DEBUG
   if (!inside(elem, point)) {
      return Interpolator();
   }
#endif

   if (mapping == DataBase::Element) {
       std::vector<Scalar> weights(1, 1.);
       std::vector<Index> indices(1, elem);

       return Interpolator(weights, indices);
   }

   std::array<Index,3> n = cellCoordinates(elem, m_numDivisions);
   std::array<Index,8> cl = cellVertices(elem, m_numDivisions);

   Vector corner0(m_coords[0][n[0]], m_coords[1][n[1]], m_coords[2][n[2]]);
   Vector corner1(m_coords[0][n[0]+1], m_coords[1][n[1]+1], m_coords[2][n[2]+1]);
   const Vector diff = point-corner0;
   const Vector size = corner1-corner0;

   const Index nvert = 8;
   std::vector<Index> indices((mode==Linear || mode==Mean) ? nvert : 1);
   std::vector<Scalar> weights((mode==Linear || mode==Mean) ? nvert : 1);

   if (mode == Mean) {
       const Scalar w = Scalar(1)/nvert;
       for (Index i=0; i<nvert; ++i) {
           indices[i] = cl[i];
           weights[i] = w;
       }
   } else if (mode == Linear) {
       vassert(nvert == 8);
       for (Index i=0; i<nvert; ++i) {
           indices[i] = cl[i];
       }
       Vector ss = diff;
       for (int c=0; c<3; ++c) {
           ss[c] /= size[c];
       }
       weights[0] = (1-ss[0])*(1-ss[1])*(1-ss[2]);
       weights[1] = ss[0]*(1-ss[1])*(1-ss[2]);
       weights[2] = ss[0]*ss[1]*(1-ss[2]);
       weights[3] = (1-ss[0])*ss[1]*(1-ss[2]);
       weights[4] = (1-ss[0])*(1-ss[1])*ss[2];
       weights[5] = ss[0]*(1-ss[1])*ss[2];
       weights[6] = ss[0]*ss[1]*ss[2];
       weights[7] = (1-ss[0])*ss[1]*ss[2];
   } else {
      weights[0] = 1;

      if (mode == First) {
         indices[0] = cl[0];
      } else if(mode == Nearest) {
          Vector ss = diff;
          int nearest=0;
          for (int c=0; c<3; ++c) {
              nearest <<= 1;
              ss[c] /= size[c];
              if (ss[c] < 0.5)
                  nearest |= 1;
          }
          indices[0] = cl[nearest];
      }
   }

   return Interpolator(weights, indices);

}
int Dmc_method::calcBranch() { 
  int totwalkers=mpi_info.nprocs*nconfig;
  Array1 <doublevar> weights(totwalkers);
  Array1 <doublevar> my_weights(nconfig);
  
  for(int walker=0; walker < nconfig; walker++)
    my_weights(walker)=pts(walker).weight;
#ifdef USE_MPI
  MPI_Allgather(my_weights.v,nconfig, MPI_DOUBLE, weights.v,nconfig,MPI_DOUBLE, MPI_Comm_grp);
#else
  weights=my_weights;
#endif
  Array1 <int> my_branch(nconfig);
  Array1 <int> nwalkers(mpi_info.nprocs);
  nwalkers=0;
  int root=0;
  if(mpi_info.node==root) {  //this if/else clause may be refactored out

    Array1 <int> branch(totwalkers);
    //----Find which walkers branch/die
    //we do it on one node since otherwise we'll have different random numbers!
    //we'll assign the weight for each copy that will be produced
    //this is the core of the branching algorithm..
    //my homegrown algo, based on Umrigar, Nightingale, and Runge
    branch=-1;

    long int time_a=clock();
    match_walkers(weights,branch);
    long int time_b=clock();
    single_write(cout,"matching: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n");
    for(int w=0; w< totwalkers; w++) {
      if(branch(w)==-1) branch(w)=1;
    }
    //----end homegrown algo
    //count how many walkers each node will have
    //without balancing
    int walk=0;
    for(int n=0; n< mpi_info.nprocs; n++) { 
      for(int i=0; i< nconfig; i++) {
        nwalkers(n)+=branch(walk);
        walk++;
      }
      //cout << "nwalkers " << n << " " << nwalkers(n) << endl;
    }
    //now send nwalkers and which to branch to all processors
    for(int i=0; i< nconfig; i++) { 
      my_branch(i)=branch(i);
      my_weights(i)=weights(i);
    }
    time_a=clock();
#ifdef USE_MPI
    MPI_Bcast(nwalkers.v, mpi_info.nprocs, MPI_INT, mpi_info.node, MPI_Comm_grp);
    for(int i=1; i< mpi_info.nprocs; i++) {
      MPI_Send(branch.v+i*nconfig,nconfig,MPI_INT,i,0,MPI_Comm_grp);
      MPI_Send(weights.v+i*nconfig, nconfig, MPI_DOUBLE, i,0,MPI_Comm_grp);
    }
#endif
    time_b=clock();
    single_write(cout,"sending branch: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n");

               
  }
  else { 
#ifdef USE_MPI
    MPI_Bcast(nwalkers.v, mpi_info.nprocs, MPI_INT, root, MPI_Comm_grp);
    MPI_Status status;
    MPI_Recv(my_branch.v,nconfig, MPI_INT,root,0,MPI_Comm_grp, &status);
    MPI_Recv(my_weights.v,nconfig, MPI_DOUBLE,root,0,MPI_Comm_grp, &status);
#endif	
  }
  //--end if/else clause

  long int time_a=clock();
  for(int i=0; i< nconfig; i++) { 
    pts(i).weight=my_weights(i);
  }
  
  //Now we all have my_branch and nwalkers..we need to figure out who 
  //needs to send walkers to whom--after this, nwalkers should be a flat array equal to 
  //nconfig(so don't try to use it for anything useful afterwards)
  vector <Queue_element> send_queue;
  int curr_needs_walker=0;
  int nnwalkers=nwalkers(mpi_info.node); //remember how many total we should have
  for(int i=0; i< mpi_info.nprocs; i++) { 
    while(nwalkers(i) > nconfig) {
      if(nwalkers(curr_needs_walker) < nconfig) { 
        nwalkers(curr_needs_walker)++;
        nwalkers(i)--;
        send_queue.push_back(Queue_element(i,curr_needs_walker));
        //cout << mpi_info.node << ":nwalkers " << nwalkers(i) << "  " << nwalkers(curr_needs_walker) << endl;
        //cout << mpi_info.node << ":send " << i << "  " << curr_needs_walker << endl;
      }
      else { 
        curr_needs_walker++;
      }
    }
  }
  
  for(int i=0; i< mpi_info.nprocs; i++) assert(nwalkers(i)==nconfig);
  int killsize=0;
  for(int i=0; i< nconfig; i++) {
    //cout << mpi_info.node << ":branch " << i << "  " << my_branch(i) << " weight " << pts(i).weight <<  endl;
    if(my_branch(i)==0) killsize++;
  }
  //cout << mpi_info.node << ": send queue= " << send_queue.size() << endl;
  //now do branching for the walkers that we get to keep
  Array1 <Dmc_point> savepts=pts;
  int curr=0; //what walker we're currently copying from
  int curr_copy=0; //what walker we're currently copying to
  while(curr_copy < min(nnwalkers,nconfig)) { 
    if(my_branch(curr)>0) { 
      //cout << mpi_info.node << ": copying " << curr << " to " << curr_copy << " branch " << my_branch(curr) << endl;
      my_branch(curr)--;
      pts(curr_copy)=savepts(curr);
      //pts(curr_copy).weight=1;
      curr_copy++;
    }
    else curr++;
  }
  long int time_b=clock();
  single_write(cout,"Finding out where to send: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n");
  
  time_a=clock(); 
  //Finally, send or receive spillover walkers 
  if(nnwalkers > nconfig) { 
    vector<Queue_element>::iterator queue_pos=send_queue.begin();
    while(curr < nconfig) { 
      if(my_branch(curr) > 0) { 
        my_branch(curr)--;
        while(queue_pos->from_node != mpi_info.node) { 
          queue_pos++;
        }
        //cout << mpi_info.node << ":curr " << curr << " my_branch " << my_branch(curr) << endl;
        //cout << mpi_info.node << ":sending " << queue_pos->from_node << " to " << queue_pos->to_node << endl;
        savepts(curr).mpiSend(queue_pos->to_node);
        queue_pos++;
      }
      else curr++;
    }
    
  }
  else { //if nnwalkers == nconfig, then this will just get skipped immediately
    vector <Queue_element>::iterator queue_pos=send_queue.begin();
    while(curr_copy < nconfig) { 
      while(queue_pos->to_node != mpi_info.node) queue_pos++;
      //cout << mpi_info.node <<":receiving from " << queue_pos->from_node << " to " << curr_copy << endl;
      pts(curr_copy).mpiReceive(queue_pos->from_node);
      //pts(curr_copy).weight=1;
      curr_copy++;
      queue_pos++;
    }
  }
  time_b=clock();
  single_write(cout,"sending walkers:",double(time_b-time_a)/CLOCKS_PER_SEC,"\n");
  
  return killsize;
  //exit(0);
}
Example #18
0
int 
DispBeamColumn2d::getResponse(int responseID, Information &eleInfo)
{
  double V;
  double L = crdTransf->getInitialLength();

  if (responseID == 1)
    return eleInfo.setVector(this->getResistingForce());

  else if (responseID == 12) {
    P.Zero();
    P.addVector(1.0, this->getRayleighDampingForces(), 1.0);
    return eleInfo.setVector(P);

  } else if (responseID == 2) {
      P(3) =  q(0);
      P(0) = -q(0)+p0[0];
      P(2) = q(1);
      P(5) = q(2);
      V = (q(1)+q(2))/L;
      P(1) =  V+p0[1];
      P(4) = -V+p0[2];
      return eleInfo.setVector(P);
  }

  else if (responseID == 9) {
    return eleInfo.setVector(q);
  }

  else if (responseID == 19) {
    static Matrix kb(3,3);
    this->getBasicStiff(kb);
    return eleInfo.setMatrix(kb);
  }

  // Chord rotation
  else if (responseID == 3) {
    return eleInfo.setVector(crdTransf->getBasicTrialDisp());
  }

  // Plastic rotation
  else if (responseID == 4) {
    static Vector vp(3);
    static Vector ve(3);
    const Matrix &kb = this->getInitialBasicStiff();
    kb.Solve(q, ve);
    vp = crdTransf->getBasicTrialDisp();
    vp -= ve;
    return eleInfo.setVector(vp);
  }

  // Curvature sensitivity
  else if (responseID == 5) {
    /*
      Vector curv(numSections);
      const Vector &v = crdTransf->getBasicDispGradient(1);
      
      double L = crdTransf->getInitialLength();
      double oneOverL = 1.0/L;
      //const Matrix &pts = quadRule.getIntegrPointCoords(numSections);
      double pts[2];
      pts[0] = 0.0;
      pts[1] = 1.0;
      
      // Loop over the integration points
      for (int i = 0; i < numSections; i++) {
	int order = theSections[i]->getOrder();
	const ID &code = theSections[i]->getType();
	//double xi6 = 6.0*pts(i,0);
	double xi6 = 6.0*pts[i];
	curv(i) = oneOverL*((xi6-4.0)*v(1) + (xi6-2.0)*v(2));
      }
      
      return eleInfo.setVector(curv);
    */

    Vector curv(numSections);

    /*
    // Loop over the integration points
    for (int i = 0; i < numSections; i++) {
      int order = theSections[i]->getOrder();
      const ID &code = theSections[i]->getType();
      const Vector &dedh = theSections[i]->getdedh();
      for (int j = 0; j < order; j++) {
	if (code(j) == SECTION_RESPONSE_MZ)
	  curv(i) = dedh(j);
      }
    }
    */

    return eleInfo.setVector(curv);
  }

  // Basic deformation sensitivity
  else if (responseID == 6) {  
    const Vector &dvdh = crdTransf->getBasicDisplSensitivity(1);
    return eleInfo.setVector(dvdh);
  }

  else if (responseID == 7) {
    //const Matrix &pts = quadRule.getIntegrPointCoords(numSections);
    double xi[maxNumSections];
    beamInt->getSectionLocations(numSections, L, xi);
    Vector locs(numSections);
    for (int i = 0; i < numSections; i++)
      locs(i) = xi[i]*L;
    return eleInfo.setVector(locs);
  }

  else if (responseID == 8) {
    //const Vector &wts = quadRule.getIntegrPointWeights(numSections);
    double wt[maxNumSections];
    beamInt->getSectionWeights(numSections, L, wt);
    Vector weights(numSections);
    for (int i = 0; i < numSections; i++)
      weights(i) = wt[i]*L;
    return eleInfo.setVector(weights);
  }

  else
    return Element::getResponse(responseID, eleInfo);
}
	void withoutReplacementImpl(withoutReplacementArgs& args)
	{
		boost::numeric::ublas::matrix<double>& matrix = args.matrix;
		int n = args.n;
		int seed = args.seed;
		double alpha = args.alpha;
		if(matrix.size1() != matrix.size2())
		{
			throw std::runtime_error("Matrix must be square");
		}
		if(n < 1)
		{
			throw std::runtime_error("Input n must be at least 1");
		}
		int dimension = matrix.size1();
		boost::mt19937 randomSource;
		randomSource.seed(seed);
		std::vector<double> columnSums(dimension,0);
		for(int row = 0; row < dimension; row++)
		{
			for(int column = 0; column < dimension; column++)
			{
				columnSums[column] += std::fabs(matrix(row, column));
			}
		}
		
		std::vector<double> currentColumnSums(dimension), newCurrentColumnSums;
		std::vector<int> availableColumns(dimension), newAvailableColumns;
		std::vector<int> availableRows(dimension), newAvailableRows;
		std::vector<int> previousEntry(1), newPreviousEntry;
		std::vector<bool> usedRows(dimension), newUsedRows;
		std::vector<mpfr_class> weights(1, 1), newWeights;
		//Get out the choices at the start. 
		std::vector<possibility> possibilities;
		for(int i = 0; i < dimension; i++)
		{
			for(int j = 0; j < dimension; j++)
			{
				possibility nextPos;
				nextPos.parent = 0;
				nextPos.previousEntry = i;
				nextPos.newEntry = j;
				possibilities.push_back(nextPos);
			}
		}
		//These choices are all derived from a single particle at the start
		std::copy(columnSums.begin(), columnSums.end(), currentColumnSums.begin());
		for(int i = 0; i < dimension; i++)
		{
			availableColumns[i] = i;
			availableRows[i] = i;
		}
		std::fill(usedRows.begin(), usedRows.end(), false);

		sampling::sampfordFromParetoNaiveArgs samplingArgs;
		samplingArgs.n = n;

		samplingArgs.weights.resize(dimension*dimension);
		for(int i = 0; i < (int)possibilities.size(); i++)
		{
			possibility& pos = possibilities[i];
			if(pos.previousEntry == pos.newEntry)
			{
				samplingArgs.weights[i] = alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)) / (dimension*(dimension - 1));
			}
			else 
			{
				samplingArgs.weights[i] = std::fabs(matrix(pos.previousEntry, pos.newEntry)) / dimension;
			}
		}

		int currentSamples = 1;
		for(int permutationCounter = 0; permutationCounter < dimension; permutationCounter++)
		{
			//Draw sample
			if((int)possibilities.size() <= n)
			{
				newCurrentColumnSums.resize(possibilities.size()*dimension);
				newAvailableColumns.resize(possibilities.size()*(dimension - permutationCounter-1));
				newAvailableRows.resize(possibilities.size()*(dimension - permutationCounter-1));
				newUsedRows.resize(possibilities.size()*dimension);
				newPreviousEntry.resize(possibilities.size());
				int newAvailableColumnsIndex = 0;
				int newAvailableRowsIndex = 0;
				for(int i = 0; i < (int)possibilities.size(); i++)
				{
					possibility& pos = possibilities[i];
					std::copy(currentColumnSums.begin() + pos.parent * dimension, currentColumnSums.begin() + (pos.parent + 1) * dimension, newCurrentColumnSums.begin() + dimension * i);
					newCurrentColumnSums[dimension*i + pos.newEntry] -= std::fabs(matrix(pos.previousEntry, pos.newEntry));
					std::copy(usedRows.begin() + pos.parent*dimension, usedRows.begin() + (pos.parent + 1) * dimension, newUsedRows.begin() + dimension*i);
					newUsedRows[dimension*i + pos.previousEntry] = true;
					for(int j = 0; j < dimension - permutationCounter; j++)
					{
						if(availableColumns[j + pos.parent*(dimension - permutationCounter)] != pos.newEntry)
						{
							newAvailableColumns.at(newAvailableColumnsIndex) = availableColumns.at(j + pos.parent*(dimension - permutationCounter));
							newAvailableColumnsIndex++;
						}
						if(availableRows[j + pos.parent*(dimension - permutationCounter)] != pos.previousEntry)
						{
							newAvailableRows.at(newAvailableRowsIndex) = availableRows.at(j + pos.parent*(dimension - permutationCounter));
							newAvailableRowsIndex++;
						}
					}
					if(newUsedRows[i*dimension + pos.newEntry])
					{
						newPreviousEntry[i] = -1;
					}
					else
					{
						newPreviousEntry[i] = pos.newEntry;
					}
				}
				newWeights.swap(samplingArgs.weights);
				currentSamples = possibilities.size();
			}
			else
			{
				sampfordFromParetoNaive(samplingArgs, randomSource);
				newCurrentColumnSums.resize(n*dimension);
				newAvailableColumns.resize(n*(dimension - permutationCounter-1));
				newAvailableRows.resize(n*(dimension - permutationCounter-1));
				newUsedRows.resize(n*dimension);
				newWeights.resize(n);
				newPreviousEntry.resize(n);
				int newAvailableColumnsIndex = 0;
				int newAvailableRowsIndex = 0;
				for(int i = 0; i < n; i++)
				{
					possibility& pos = possibilities[samplingArgs.indices[i]];
					std::copy(currentColumnSums.begin() + pos.parent * dimension, currentColumnSums.begin() + (pos.parent + 1) * dimension, newCurrentColumnSums.begin() + dimension * i);
					newCurrentColumnSums[dimension*i + pos.newEntry] -= std::fabs(matrix(pos.previousEntry, pos.newEntry));
					std::copy(usedRows.begin() + pos.parent*dimension, usedRows.begin() + (pos.parent + 1) * dimension, newUsedRows.begin() + dimension*i);
					newUsedRows[dimension*i + pos.previousEntry] = true;
					for(int j = 0; j < dimension - permutationCounter; j++)
					{
						if(availableColumns[j + pos.parent*(dimension - permutationCounter)] != pos.newEntry)
						{
							newAvailableColumns.at(newAvailableColumnsIndex) = availableColumns.at(j + pos.parent*(dimension - permutationCounter));
							newAvailableColumnsIndex++;
						}
						if(availableRows[j + pos.parent*(dimension - permutationCounter)] != pos.previousEntry)
						{
							newAvailableRows.at(newAvailableRowsIndex) = availableRows.at(j + pos.parent*(dimension - permutationCounter));
							newAvailableRowsIndex++;
						}
					}
					if(newUsedRows[i*dimension + pos.newEntry])
					{
						newPreviousEntry[i] = -1;
					}
					else
					{
						newPreviousEntry[i] = pos.newEntry;
					}
					newWeights[i] = samplingArgs.weights[samplingArgs.indices[i]] / samplingArgs.rescaledWeights[samplingArgs.indices[i]];
				}
				currentSamples = n;
			}
			previousEntry.swap(newPreviousEntry);
			currentColumnSums.swap(newCurrentColumnSums);
			availableColumns.swap(newAvailableColumns);
			availableRows.swap(newAvailableRows);
			usedRows.swap(newUsedRows);
			weights.swap(newWeights);

			//If we're not at the end, get out the list of possibilities and also weights
			if(permutationCounter != dimension - 1)
			{
				possibilities.clear();
				samplingArgs.weights.clear();
				for(int i = 0; i < currentSamples; i++)
				{
					if(previousEntry[i] == -1)
					{
						for(int j = 0; j < dimension - permutationCounter - 1; j++)
						{
							for(int k = 0; k < dimension - permutationCounter - 1; k++)
							{
								possibility pos;
								pos.parent = i;
								pos.previousEntry = availableRows[(dimension - permutationCounter - 1) * i + j];
								pos.newEntry = availableColumns[(dimension - permutationCounter - 1) * i + k];
								possibilities.push_back(pos);
								if(j == k || usedRows[pos.newEntry + dimension * i])
								{
									if(dimension - 2 != permutationCounter)
									{
										samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry))/ (dimension - permutationCounter - 2));
									}
									else
									{
										samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)));
									}
								}
								else
								{
									samplingArgs.weights.push_back(weights[i] * std::fabs(matrix(pos.previousEntry, pos.newEntry)));
								}
							}
						}

					}
					else
					{
						for(int j = 0; j < dimension - permutationCounter - 1; j++)
						{
							possibility pos;
							pos.parent = i;
							pos.previousEntry = previousEntry[i];
							pos.newEntry = availableColumns[(dimension - permutationCounter - 1) * i + j];
							possibilities.push_back(pos);
							if(j == previousEntry[i] || usedRows[pos.newEntry + dimension * i])
							{
								if(dimension - 2 != permutationCounter)
								{
									samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)) / (dimension - permutationCounter - 2));
								}
								else
								{
									samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)));
								}
							}
							else
							{
								samplingArgs.weights.push_back(weights[i] * std::fabs(matrix(pos.previousEntry, pos.newEntry)));
							}
						}
					}
				}
			}
		}
		args.estimate = 0;
		for(int i = 0; i < (int)weights.size(); i++)
		{
			args.estimate += weights[i];
		}
	}
Example #20
0
int main(int argc, char** argv) {

	try {

		util::ProgramOptions::init(argc, argv);
		logger::LogManager::init();

		Hdf5CragStore   cragStore(optionProjectFile.as<std::string>());
		Hdf5VolumeStore volumeStore(optionProjectFile.as<std::string>());

		Crag crag;
		cragStore.retrieveCrag(crag);

		NodeFeatures nodeFeatures(crag);
		EdgeFeatures edgeFeatures(crag);

		LOG_USER(logger::out) << "reading features" << std::endl;
		cragStore.retrieveNodeFeatures(crag, nodeFeatures);
		cragStore.retrieveEdgeFeatures(crag, edgeFeatures);

		BundleOptimizer::Parameters parameters;
		parameters.lambda      = optionRegularizerWeight;
		parameters.epsStrategy = BundleOptimizer::EpsFromGap;
		BundleOptimizer optimizer(parameters);

		BestEffort*  bestEffort  = 0;
		OverlapLoss* overlapLoss = 0;

		if (optionBestEffortFromProjectFile) {

			LOG_USER(logger::out) << "reading best-effort" << std::endl;

			bestEffort = new BestEffort(crag);

			vigra::HDF5File project(
					optionProjectFile.as<std::string>(),
					vigra::HDF5File::OpenMode::ReadWrite);
			project.cd("best_effort");
			vigra::ArrayVector<int> beNodes;
			vigra::MultiArray<2, int> beEdges;
			project.readAndResize("nodes", beNodes);
			project.readAndResize("edges", beEdges);

			std::set<int> nodes;
			for (int n : beNodes)
				nodes.insert(n);

			std::set<std::pair<int, int>> edges;
			for (int i = 0; i < beEdges.shape(1); i++)
				edges.insert(
						std::make_pair(
							std::min(beEdges(i, 0), beEdges(i, 1)),
							std::max(beEdges(i, 0), beEdges(i, 1))));

			for (Crag::NodeIt n(crag); n != lemon::INVALID; ++n)
				bestEffort->node[n] = nodes.count(crag.id(n));

			for (Crag::EdgeIt e(crag); e != lemon::INVALID; ++e)
				bestEffort->edge[e] = edges.count(
						std::make_pair(
								std::min(crag.id(crag.u(e)), crag.id(crag.v(e))),
								std::max(crag.id(crag.u(e)), crag.id(crag.v(e)))));

		} else {

			LOG_USER(logger::out) << "reading ground-truth" << std::endl;
			ExplicitVolume<int> groundTruth;
			volumeStore.retrieveGroundTruth(groundTruth);

			LOG_USER(logger::out) << "finding best-effort solution" << std::endl;
			overlapLoss = new OverlapLoss(crag, groundTruth);
			bestEffort = new BestEffort(crag, *overlapLoss);
		}

		Loss* loss = 0;
		bool  destructLoss = false;

		if (optionLoss.as<std::string>() == "hamming") {

			LOG_USER(logger::out) << "using Hamming loss" << std::endl;

			loss = new HammingLoss(crag, *bestEffort);
			destructLoss = true;

		} else if (optionLoss.as<std::string>() == "overlap") {

			LOG_USER(logger::out) << "using overlap loss" << std::endl;

			if (!overlapLoss) {

				LOG_USER(logger::out) << "reading ground-truth" << std::endl;
				ExplicitVolume<int> groundTruth;
				volumeStore.retrieveGroundTruth(groundTruth);

				LOG_USER(logger::out) << "finding best-effort solution" << std::endl;
				overlapLoss = new OverlapLoss(crag, groundTruth);
			}

			loss = overlapLoss;

		} else {

			LOG_ERROR(logger::out)
					<< "unknown loss: "
					<< optionLoss.as<std::string>()
					<< std::endl;
			return 1;
		}

		if (optionNormalizeLoss) {

			LOG_USER(logger::out) << "normalizing loss..." << std::endl;
			loss->normalize(crag, MultiCut::Parameters());
		}

		Oracle oracle(
				crag,
				nodeFeatures,
				edgeFeatures,
				*loss,
				*bestEffort);

		std::vector<double> weights(nodeFeatures.dims() + edgeFeatures.dims(), 0);
		optimizer.optimize(oracle, weights);

		storeVector(weights, optionFeatureWeights);

		if (destructLoss && loss != 0)
			delete loss;

		if (overlapLoss)
			delete overlapLoss;

		if (bestEffort)
			delete bestEffort;

	} catch (boost::exception& e) {

		handleException(e, std::cerr);
	}
}
void QCAD::ResponseFieldIntegral<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  // Zero out local response
  for (typename PHX::MDField<ScalarT>::size_type i=0; 
       i<this->local_response.size(); i++)
    this->local_response[i] = 0.0;

  typename std::vector<PHX::MDField<ScalarT,Cell,QuadPoint> >::const_iterator it;

  if(opRegion->elementBlockIsInRegion(workset.EBName)) {

    ScalarT term, val; //, dbI = 0.0;
    std::size_t n, max, nExtraMinuses, nOneBits, nBits = fields.size();

    //DEBUG
    //std::size_t nContrib1 = 0, nContrib2 = 0;
    //ScalarT dbMaxRe[10], dbMaxIm[10];
    //for(std::size_t i=0; i<10; i++) dbMaxRe[i] = dbMaxIm[i] = 0.0;

    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
      if(!opRegion->cellIsInRegion(cell)) continue;

      for (std::size_t qp=0; qp < numQPs; ++qp) {
	val = 0.0;

	//Loop over all possible combinations of Re/Im parts which form product terms and 
	// add the relevant ones (depending on whether we're returning the overall real or
	// imaginary part of the integral) to get the integrand value for this (cell,qp).
	// We do this by mapping the Re/Im choice onto a string of N bits, where N is the 
	// number of fields being multiplied together. (0 = RePart, 1 = ImPart)

	//nContrib1++; //DEBUG

	//for(it = fields.begin(); it != fields.end(); ++it)
	max = (std::size_t)std::pow(2.,(int)nBits);
//	max = pow(2.0,static_cast<int>(nBits));
	for(std::size_t i=0; i<max; i++) {

	  // Count the number of 1 bits, and exit early if 
	  //  there's a 1 bit for a field that is not complex
	  nOneBits = nExtraMinuses = 0;
	  for(n=0; n<nBits; n++) {
	    if( (0x1 << n) & i ) { // if n-th bit of i is set (use Im part of n-th field)
	      if(!fieldIsComplex[n]) break;
	      if(conjugateFieldFlag[n]) nExtraMinuses++;
	      nOneBits++;
	    }	
	  }
	  if(n < nBits) continue;  // we exited early, signaling this product can't contribute

	  //check if this combination of Re/Im parts contributes to the overall Re or Im part we return
	  if( (bReturnImagPart && nOneBits % 2) || (!bReturnImagPart && nOneBits % 2 == 0)) {
	    term = (nOneBits % 4 >= 2) ? -1.0 : 1.0; //apply minus sign if nOneBits % 4 == 2 (-1) or == 3 (-i)
	    if(nExtraMinuses % 2) term *= -1.0;      //apply minus sign due to conjugations
	    //nContrib2++;

	    //multiply fields together
	    for(std::size_t m=0; m<nBits; m++) { 
	      if( (0x1 << m) & i ) {
		term *= fields_Imag[m](cell,qp);
		//if( abs(fields_Imag[m](cell,qp)) > dbMaxIm[m]) dbMaxIm[m] = abs(fields_Imag[m](cell,qp));
	      }	
	      else {
		term *= fields[m](cell,qp);
		//if( abs(fields[m](cell,qp)) > dbMaxRe[m]) dbMaxRe[m] = abs(fields[m](cell,qp));
	      }
	    }

	    val += term;  //add term to overall integrand
	  }
	}
	val *= weights(cell,qp) * scaling; //multiply integrand by volume

	//dbI += val; //DEBUG
        this->local_response(cell) += val;
	this->global_response(0) += val;
      }
    }

    //DEBUG
    /*if(fieldNames.size() > 1) {
      std::cout << "DB: " << (bReturnImagPart == true ? "Im" : "Re") << " Field Integral - int(";
      for(std::size_t i=0; i<fieldNames.size(); i++) 
	std::cout << fieldNames[i] << "," << (conjugateFieldFlag[i] ? "-" : "") << (fieldIsComplex[i] ? fieldNames_Imag[i] : "X") << " * ";
      std::cout << " dV) -- I += " << dbI << "  (ebName = " << workset.EBName << 
	" contrib1=" << nContrib1 << " contrib2=" << nContrib2 << ")" << std::endl;
      std::cout << "DB MAX of Fields Re: " << dbMaxRe[0] << "," << dbMaxRe[1] << "," << dbMaxRe[2] << "," << dbMaxRe[3] << std::endl;
      std::cout << "DB MAX of Fields Im: " << dbMaxIm[0] << "," << dbMaxIm[1] << "," << dbMaxIm[2] << "," << dbMaxIm[3] << std::endl;
      }*/
  }

  // Do any local-scattering necessary
  PHAL::SeparableScatterScalarResponse<EvalT,Traits>::evaluateFields(workset);
}
void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
        float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr,
        const memory_tracking::grantor_t &scratchpad) const {
    const auto &jcp = kernel_->jcp;
    const auto &p_ops = attr_->post_ops_;

    const int inph = is_fwd ? jcp.ih : jcp.oh;
    const int inpw = is_fwd ? jcp.iw : jcp.ow;
    const int outh = is_fwd ? jcp.oh : jcp.ih;
    const int outw = is_fwd ? jcp.ow : jcp.iw;

    array_offset_calculator<float, 5> input(inp_ptr,
        jcp.mb, jcp.dimK/jcp.dimK_reg_block, inph, inpw, jcp.dimK_reg_block);
    array_offset_calculator<float, 5> output(out_ptr,
        jcp.mb, jcp.dimM/jcp.dimM_simd_block, outh, outw, jcp.dimM_simd_block);
    array_offset_calculator<float, 6> weights(wei_ptr,
        jcp.oc/jcp.oc_simd_block, jcp.ic/jcp.ic_simd_block, jcp.kh, jcp.kw,
        jcp.ic_simd_block, jcp.oc_simd_block);
    array_offset_calculator<float, 2> bias(bias_ptr,
        jcp.oc/jcp.oc_simd_block, jcp.oc_simd_block);

    auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference)
                ? wei_ptr
                : scratchpad.template get<float>(key_wino_U);

    array_offset_calculator<float, 8> U(wino_wei,
            jcp.dimM_nb_block,
            alpha, alpha,
            jcp.dimK_nb_block,
            jcp.dimM_block  * jcp.dimM_reg_block, jcp.dimK_block,
            jcp.dimK_reg_block, jcp.dimM_simd_block);

    array_offset_calculator<float, 8> M(is_fwd
            ? scratchpad.template get<float>(key_wino_M)
            : scratchpad.template get<float>(key_wino_V),
            0, jcp.dimM_nb_block, alpha, alpha,
            jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block,
            jcp.dimN_reg_block, jcp.dimM_simd_block);
    array_offset_calculator<float, 8> V(is_fwd
            ? scratchpad.template get<float>(key_wino_V)
            : scratchpad.template get<float>(key_wino_M),
            0, alpha, alpha, jcp.dimN_block,
            jcp.dimK_nb_block, jcp.dimK_block,
            jcp.dimN_reg_block, jcp.dimK_reg_block);

    const bool wants_padded_bias = jcp.with_bias
        && jcp.oc_without_padding != jcp.oc;
    float last_slice_bias[simd_w] = {0};
    if (wants_padded_bias) {
        for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
            last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
    }

    if (jcp.prop_kind != prop_kind::forward_inference) {

        parallel_nd(jcp.nb_oc, jcp.nb_ic, (jcp.oc_block * jcp.oc_reg_block), (jcp.ic_block * jcp.ic_reg_block),
                    [&](int ofm1, int ifm1, int ofm2, int ifm2) {
            float *U_base_ptr = is_fwd
                              ? &(U(ofm1, 0, 0, ifm1, ofm2, ifm2, 0, 0))
                              : &(U(ifm1, 0, 0, ofm1, ifm2, ofm2, 0, 0));
            weight_transform_data(jcp,
                    &(weights(
                        ofm1 * jcp.oc_block * jcp.oc_reg_block + ofm2,
                        ifm1 * jcp.ic_block * jcp.ic_reg_block + ifm2,
                        0, 0, 0, 0)),
                    U_base_ptr);
        });
    }

PRAGMA_OMP(parallel)
    {

    int ithr = mkldnn_get_thread_num();

PRAGMA_OMP(for schedule(static))
    for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) {
        for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) {
            for (int K_blk2 = 0; K_blk2 < jcp.dimK_block; K_blk2++) {

                input_transform_tileblock_data(
                        tile_block, jcp,
                        &(input(0, K_blk1 * jcp.dimK_block + K_blk2, 0, 0, 0)),
                        &(V(ithr, 0, 0, 0, K_blk1, K_blk2, 0, 0)));
            }
        }

        for (int oj = 0; oj < alpha; oj++) {
            for (int oi = 0; oi < alpha; oi++) {
                for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++)
                for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++)
                for (int N_blk = 0; N_blk < jcp.dimN_block; N_blk++)
                    kernel_->gemm_loop_ker(
                            (float *)&(M(ithr, M_blk1, oj, oi,
                                    N_blk, 0, 0, 0)),
                            (const float *)&(U(M_blk1, oj, oi, K_blk1,
                                    0, 0, 0, 0)),
                            (const float *)&(V(ithr, oj, oi,
                                    N_blk, K_blk1, 0, 0, 0)), K_blk1);
            }
        }

        for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++) {
            for (int M_blk2 = 0; M_blk2 < jcp.dimM_block * jcp.dimM_reg_block;
                  M_blk2++) {
                const int M_blk =
                    M_blk1 * jcp.dimM_block  * jcp.dimM_reg_block + M_blk2;

                float *bias_ptr = wants_padded_bias
                    && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
                    ? last_slice_bias : &bias(M_blk, 0);

                output_transform_tileblock_data(tile_block, jcp, p_ops,
                        &(M(ithr, M_blk1, 0, 0, 0, M_blk2, 0, 0)),
                        &(output(0, M_blk, 0, 0, 0)), bias_ptr);
            }
        }
    }
    }
}
void vtkMitkThickSlicesFilterExecute(vtkMitkThickSlicesFilter *self,
                             vtkImageData *inData, T *inPtr,
                             vtkImageData *outData, T *outPtr,
                             int outExt[6], int /*id*/)
{
  int idxX, idxY;
  int maxX, maxY;
  vtkIdType inIncX, inIncY, inIncZ;
  vtkIdType outIncX, outIncY, outIncZ;
  //int axesNum;
  int *inExt = inData->GetExtent();
  int *wholeExtent;
  vtkIdType *inIncs;
  //int useYMin, useYMax, useXMin, useXMax;

  // find the region to loop over
  maxX = outExt[1] - outExt[0];
  maxY = outExt[3] - outExt[2];

//  maxZ = outExt[5] - outExt[4];

  // Get the dimensionality of the gradient.
  //axesNum = self->GetDimensionality();

  // Get increments to march through data
  inData->GetContinuousIncrements(outExt, inIncX, inIncY, inIncZ);
  outData->GetContinuousIncrements(outExt, outIncX, outIncY, outIncZ);
                   /*
  // The data spacing is important for computing the gradient.
  // central differences (2 * ratio).
  // Negative because below we have (min - max) for dx ...
  inData->GetSpacing(r);
  r[0] = -0.5 / r[0];
  r[1] = -0.5 / r[1];
  r[2] = -0.5 / r[2];
                     */
  // get some other info we need
  inIncs = inData->GetIncrements();
  wholeExtent = inData->GetExtent();

  // Move the pointer to the correct starting position.
  inPtr += (outExt[0]-inExt[0])*inIncs[0] +
           (outExt[2]-inExt[2])*inIncs[1] +
           (outExt[4]-inExt[4])*inIncs[2];

  // Loop through ouput pixels

  int _minZ = /*-5 + outExt[4];  if( _minZ  < wholeExtent[4]) _minZ=*/wholeExtent[4];
  int _maxZ = /* 5 + outExt[4];  if( _maxZ  > wholeExtent[5]) _maxZ=*/wholeExtent[5];

  if(_maxZ<_minZ)
    return;

  double invNum = 1.0 / (_maxZ-_minZ+1) ;


  switch(self->GetThickSliceMode())
  {
    default:
    case vtkMitkThickSlicesFilter::MIP:
      {
        //MIP
        for (idxY = 0; idxY <= maxY; idxY++)
        {
          //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1];
          //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1];
          for (idxX = 0; idxX <= maxX; idxX++)
          {
            //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0];
            //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0];

            T mip = inPtr[_minZ*inIncs[2]];

            for(int z = _minZ+1; z<= _maxZ;z++)
            {
              T value = inPtr[z*inIncs[2]];
              if(value > mip)
                mip=value;
            }

            // do X axis
            *outPtr = mip;
            outPtr++;
            inPtr++;
          }
          outPtr += outIncY;
          inPtr += inIncY;
        }
      }
      break;

    case vtkMitkThickSlicesFilter::SUM:
      {
        //MIP
        for (idxY = 0; idxY <= maxY; idxY++)
        {
          //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1];
          //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1];
          for (idxX = 0; idxX <= maxX; idxX++)
          {
            //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0];
            //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0];

            double sum = 0;

            for(int z = _minZ; z<= _maxZ;z++)
            {
              T value = inPtr[z*inIncs[2]];
              sum += value;
            }

            // do X axis
            *outPtr = static_cast<T>(invNum*sum);
            outPtr++;
            inPtr++;
          }
          outPtr += outIncY;
          inPtr += inIncY;
        }
      }
      break;

  case vtkMitkThickSlicesFilter::WEIGHTED:
    {
      const int size = _maxZ-_minZ;
      std::vector<double> weights(size);
      double mean = 0.5 * double(_minZ + _maxZ);
      double sigma_sq = double(size) / 6.0;
      sigma_sq *= sigma_sq;
      double sum = 0;
      int i=0;
      for(int z = _minZ+1; z<= _maxZ;z++)
      {
        double val = exp(-(((double)z-mean)/sigma_sq));
        weights[i++] = val;
        sum += val;
      }
      for(i=0; i<size; i++)
      {
        weights[i] /= sum;
      }

      for (idxY = 0; idxY <= maxY; idxY++)
      {
        //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1];
        //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1];
        for (idxX = 0; idxX <= maxX; idxX++)
        {
          //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0];
          //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0];

          T mip = inPtr[_minZ*inIncs[2]];
          i=0;
          double mymip = 0;
          for(int z = _minZ+1; z<= _maxZ;z++)
          {
            double value = inPtr[z*inIncs[2]];
            mymip+=value*weights[i++];
          }
          mip = static_cast<T>(mymip);
          // do X axis
          *outPtr = mip;
          outPtr++;
          inPtr++;
        }
        outPtr += outIncY;
        inPtr += inIncY;
      }
    }
    break;

  case vtkMitkThickSlicesFilter::MINIP:
    {
      for (idxY = 0; idxY <= maxY; idxY++)
      {
        for (idxX = 0; idxX <= maxX; idxX++)
        {
          T mip = inPtr[_minZ*inIncs[2]];

          for(int z = _minZ+1; z<= _maxZ;z++)
          {
            T value = inPtr[z*inIncs[2]];
            if(value < mip)
              mip=value;
          }

          // do X axis
          *outPtr = mip;
          outPtr++;
          inPtr++;
        }
        outPtr += outIncY;
        inPtr += inIncY;
      }
    }
    break;

  case vtkMitkThickSlicesFilter::MEAN:
    {
      const int size = _maxZ-_minZ;

      //MEAN
      for (idxY = 0; idxY <= maxY; idxY++)
      {
        for (idxX = 0; idxX <= maxX; idxX++)
        {
          T sum = 0;
          for(int z = _minZ; z <= _maxZ;z++)
          {
            T value = inPtr[z*inIncs[2]];
            sum += value;
          }

          T mip = sum/size;

          // do X axis
          *outPtr = mip;
          outPtr++;
          inPtr++;
        }
        outPtr += outIncY;
        inPtr += inIncY;
      }
    }
    break;


  }

}
Example #24
0
// The main program
int main(int argc, char** argv)
{
    // Initialize libMesh
    LibMeshInit init(argc, argv);

    // Parameters
    GetPot infile("fem_system_params.in");
    const Real global_tolerance          = infile("global_tolerance", 0.);
    const unsigned int nelem_target      = infile("n_elements", 400);
    const bool transient                 = infile("transient", true);
    const Real deltat                    = infile("deltat", 0.005);
    unsigned int n_timesteps             = infile("n_timesteps", 1);
    //const unsigned int coarsegridsize    = infile("coarsegridsize", 1);
    const unsigned int coarserefinements = infile("coarserefinements", 0);
    const unsigned int max_adaptivesteps = infile("max_adaptivesteps", 10);
    //const unsigned int dim               = 2;

#ifdef LIBMESH_HAVE_EXODUS_API
    const unsigned int write_interval    = infile("write_interval", 5);
#endif

    // Create a mesh, with dimension to be overridden later, distributed
    // across the default MPI communicator.
    Mesh mesh(init.comm());
    GetPot infileForMesh("convdiff_mprime.in");
    std::string find_mesh_here = infileForMesh("mesh","psiLF_mesh.xda");
    mesh.read(find_mesh_here);

    std::cout << "Read in mesh from: " << find_mesh_here << "\n\n";

    // And an object to refine it
    MeshRefinement mesh_refinement(mesh);
    mesh_refinement.coarsen_by_parents() = true;
    mesh_refinement.absolute_global_tolerance() = global_tolerance;
    mesh_refinement.nelem_target() = nelem_target;
    mesh_refinement.refine_fraction() = 0.3;
    mesh_refinement.coarsen_fraction() = 0.3;
    mesh_refinement.coarsen_threshold() = 0.1;

    //mesh_refinement.uniformly_refine(coarserefinements);

    // Print information about the mesh to the screen.
    mesh.print_info();

    // Create an equation systems object.
    EquationSystems equation_systems (mesh);

    // Name system
    ConvDiff_MprimeSys & system =
        equation_systems.add_system<ConvDiff_MprimeSys>("Diff_ConvDiff_MprimeSys");

    // Steady-state problem
    system.time_solver =
        AutoPtr<TimeSolver>(new SteadySolver(system));

    // Sanity check that we are indeed solving a steady problem
    libmesh_assert_equal_to (n_timesteps, 1);

    // Read in all the equation systems data from the LF solve (system, solutions, rhs, etc)
    std::string find_psiLF_here = infileForMesh("psiLF_file","psiLF.xda");
    std::cout << "Looking for psiLF at: " << find_psiLF_here << "\n\n";

    equation_systems.read(find_psiLF_here, READ,
                          EquationSystems::READ_HEADER |
                          EquationSystems::READ_DATA |
                          EquationSystems::READ_ADDITIONAL_DATA);

    // Check that the norm of the solution read in is what we expect it to be
    Real readin_L2 = system.calculate_norm(*system.solution, 0, L2);
    std::cout << "Read in solution norm: "<< readin_L2 << std::endl << std::endl;

    //DEBUG
    //equation_systems.write("right_back_out.xda", WRITE, EquationSystems::WRITE_DATA |
    //		 EquationSystems::WRITE_ADDITIONAL_DATA);
#ifdef LIBMESH_HAVE_GMV
    //GMVIO(equation_systems.get_mesh()).write_equation_systems(std::string("right_back_out.gmv"), equation_systems);
#endif

    // Initialize the system
    //equation_systems.init ();  //already initialized by read-in

    // And the nonlinear solver options
    NewtonSolver *solver = new NewtonSolver(system);
    system.time_solver->diff_solver() = AutoPtr<DiffSolver>(solver);
    solver->quiet = infile("solver_quiet", true);
    solver->verbose = !solver->quiet;
    solver->max_nonlinear_iterations =
        infile("max_nonlinear_iterations", 15);
    solver->relative_step_tolerance =
        infile("relative_step_tolerance", 1.e-3);
    solver->relative_residual_tolerance =
        infile("relative_residual_tolerance", 0.0);
    solver->absolute_residual_tolerance =
        infile("absolute_residual_tolerance", 0.0);

    // And the linear solver options
    solver->max_linear_iterations =
        infile("max_linear_iterations", 50000);
    solver->initial_linear_tolerance =
        infile("initial_linear_tolerance", 1.e-3);

    // Print information about the system to the screen.
    equation_systems.print_info();

    // Now we begin the timestep loop to compute the time-accurate
    // solution of the equations...not that this is transient, but eh, why not...
    for (unsigned int t_step=0; t_step != n_timesteps; ++t_step)
    {
        // A pretty update message
        std::cout << "\n\nSolving time step " << t_step << ", time = "
                  << system.time << std::endl;

        // Adaptively solve the timestep
        unsigned int a_step = 0;
        for (; a_step != max_adaptivesteps; ++a_step)
        {   // VESTIGIAL for now ('vestigial' eh ? ;) )

            std::cout << "\n\n I should be skipped what are you doing here lalalalalalala *!**!*!*!*!*!* \n\n";

            system.solve();
            system.postprocess();
            ErrorVector error;
            AutoPtr<ErrorEstimator> error_estimator;

            // To solve to a tolerance in this problem we
            // need a better estimator than Kelly
            if (global_tolerance != 0.)
            {
                // We can't adapt to both a tolerance and a mesh
                // size at once
                libmesh_assert_equal_to (nelem_target, 0);

                UniformRefinementEstimator *u =
                    new UniformRefinementEstimator;

                // The lid-driven cavity problem isn't in H1, so
                // lets estimate L2 error
                u->error_norm = L2;

                error_estimator.reset(u);
            }
            else
            {
                // If we aren't adapting to a tolerance we need a
                // target mesh size
                libmesh_assert_greater (nelem_target, 0);

                // Kelly is a lousy estimator to use for a problem
                // not in H1 - if we were doing more than a few
                // timesteps we'd need to turn off or limit the
                // maximum level of our adaptivity eventually
                error_estimator.reset(new KellyErrorEstimator);
            }

            // Calculate error
            std::vector<Real> weights(9,1.0);  // based on u, v, p, c, their adjoints, and source parameter

            // Keep the same default norm type.
            std::vector<FEMNormType>
            norms(1, error_estimator->error_norm.type(0));
            error_estimator->error_norm = SystemNorm(norms, weights);

            error_estimator->estimate_error(system, error);

            // Print out status at each adaptive step.
            Real global_error = error.l2_norm();
            std::cout << "Adaptive step " << a_step << ": " << std::endl;
            if (global_tolerance != 0.)
                std::cout << "Global_error = " << global_error
                          << std::endl;
            if (global_tolerance != 0.)
                std::cout << "Worst element error = " << error.maximum()
                          << ", mean = " << error.mean() << std::endl;

            if (global_tolerance != 0.)
            {
                // If we've reached our desired tolerance, we
                // don't need any more adaptive steps
                if (global_error < global_tolerance)
                    break;
                mesh_refinement.flag_elements_by_error_tolerance(error);
            }
            else
            {
                // If flag_elements_by_nelem_target returns true, this
                // should be our last adaptive step.
                if (mesh_refinement.flag_elements_by_nelem_target(error))
                {
                    mesh_refinement.refine_and_coarsen_elements();
                    equation_systems.reinit();
                    a_step = max_adaptivesteps;
                    break;
                }
            }

            // Carry out the adaptive mesh refinement/coarsening
            mesh_refinement.refine_and_coarsen_elements();
            equation_systems.reinit();

            std::cout << "Refined mesh to "
                      << mesh.n_active_elem()
                      << " active elements and "
                      << equation_systems.n_active_dofs()
                      << " active dofs." << std::endl;
        } // End loop over adaptive steps

        // Do one last solve if necessary
        if (a_step == max_adaptivesteps)
        {
            QoISet qois;
            std::vector<unsigned int> qoi_indices;

            qoi_indices.push_back(0);
            qois.add_indices(qoi_indices);

            qois.set_weight(0, 1.0);

            system.assemble_qoi_sides = true; //QoI doesn't involve sides

            std::cout << "\n~*~*~*~*~*~*~*~*~ adjoint solve start ~*~*~*~*~*~*~*~*~\n" << std::endl;
            std::pair<unsigned int, Real> adjsolve = system.adjoint_solve();
            std::cout << "number of iterations to solve adjoint: " << adjsolve.first << std::endl;
            std::cout << "final residual of adjoint solve: " << adjsolve.second << std::endl;
            std::cout << "\n~*~*~*~*~*~*~*~*~ adjoint solve end ~*~*~*~*~*~*~*~*~" << std::endl;

            NumericVector<Number> &dual_solution = system.get_adjoint_solution(0);
            NumericVector<Number> &primal_solution = *system.solution;

            primal_solution.swap(dual_solution);
            ExodusII_IO(mesh).write_timestep("super_adjoint.exo",
                                             equation_systems,
                                             1, /* This number indicates how many time steps
	                                       are being written to the file */
                                             system.time);
            primal_solution.swap(dual_solution);

            system.assemble(); //overwrite residual read in from psiLF solve

            // The total error estimate
            system.postprocess(); //to compute M_HF(psiLF) and M_LF(psiLF) terms
            Real QoI_error_estimate = (-0.5*(system.rhs)->dot(dual_solution)) + system.get_MHF_psiLF() - system.get_MLF_psiLF();
            std::cout << "\n\n 0.5*M'_HF(psiLF)(superadj): " << std::setprecision(17) << 0.5*(system.rhs)->dot(dual_solution) << "\n";
            std::cout << " M_HF(psiLF): " << std::setprecision(17) << system.get_MHF_psiLF() << "\n";
            std::cout << " M_LF(psiLF): " << std::setprecision(17) << system.get_MLF_psiLF() << "\n";
            std::cout << "\n\n Residual L2 norm: " << system.calculate_norm(*system.rhs, L2) << "\n";
            std::cout << " Residual discrete L2 norm: " << system.calculate_norm(*system.rhs, DISCRETE_L2) << "\n";
            std::cout << " Super-adjoint L2 norm: " << system.calculate_norm(dual_solution, L2) << "\n";
            std::cout << " Super-adjoint discrete L2 norm: " << system.calculate_norm(dual_solution, DISCRETE_L2) << "\n";
            std::cout << "\n\n QoI error estimate: " << std::setprecision(17) << QoI_error_estimate << "\n\n";

            //DEBUG
            std::cout << "\n------------ herp derp ------------" << std::endl;
            //libMesh::out.precision(16);
            //dual_solution.print();
            //system.get_adjoint_rhs().print();

            AutoPtr<NumericVector<Number> > adjresid = system.solution->clone();
            (system.matrix)->vector_mult(*adjresid,system.get_adjoint_solution(0));
            SparseMatrix<Number>& adjmat = *system.matrix;
            (system.matrix)->get_transpose(adjmat);
            adjmat.vector_mult(*adjresid,system.get_adjoint_solution(0));
            //std::cout << "******************** matrix-superadj product (libmesh) ************************" << std::endl;
            //adjresid->print();
            adjresid->add(-1.0, system.get_adjoint_rhs(0));
            //std::cout << "******************** superadjoint system residual (libmesh) ***********************" << std::endl;
            //adjresid->print();
            std::cout << "\n\nadjoint system residual (discrete L2): " << system.calculate_norm(*adjresid,DISCRETE_L2) << std::endl;
            std::cout << "adjoint system residual (L2, all): " << system.calculate_norm(*adjresid,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 0): " << system.calculate_norm(*adjresid,0,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 1): " << system.calculate_norm(*adjresid,1,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 2): " << system.calculate_norm(*adjresid,2,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 3): " << system.calculate_norm(*adjresid,3,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 4): " << system.calculate_norm(*adjresid,4,L2) << std::endl;
            std::cout << "adjoint system residual (L2, 5): " << system.calculate_norm(*adjresid,5,L2) << std::endl;
            /*
            	AutoPtr<NumericVector<Number> > sadj_matlab = system.solution->clone();
            	AutoPtr<NumericVector<Number> > adjresid_matlab = system.solution->clone();
            	if(FILE *fp=fopen("superadj_matlab.txt","r")){
              	Real value;
              	int counter = 0;
              	int flag = 1;
              	while(flag != -1){
              		flag = fscanf(fp,"%lf",&value);
              		if(flag != -1){
            				sadj_matlab->set(counter, value);
            				counter += 1;
              		}
              	}
              	fclose(fp);
            	}
            	(system.matrix)->vector_mult(*adjresid_matlab,*sadj_matlab);
            	//std::cout << "******************** matrix-superadj product (matlab) ***********************" << std::endl;
            	//adjresid_matlab->print();
            	adjresid_matlab->add(-1.0, system.get_adjoint_rhs(0));
            	//std::cout << "******************** superadjoint system residual (matlab) ***********************" << std::endl;
            	//adjresid_matlab->print();
            	std::cout << "\n\nmatlab import adjoint system residual (discrete L2): " << system.calculate_norm(*adjresid_matlab,DISCRETE_L2) << "\n" << std::endl;
            */
            /*
            	AutoPtr<NumericVector<Number> > sadj_fwd_hack = system.solution->clone();
            	AutoPtr<NumericVector<Number> > adjresid_fwd_hack = system.solution->clone();
            	if(FILE *fp=fopen("superadj_forward_hack.txt","r")){
              	Real value;
              	int counter = 0;
              	int flag = 1;
              	while(flag != -1){
              		flag = fscanf(fp,"%lf",&value);
              		if(flag != -1){
            				sadj_fwd_hack->set(counter, value);
            				counter += 1;
              		}
              	}
              	fclose(fp);
            	}
            	(system.matrix)->vector_mult(*adjresid_fwd_hack,*sadj_fwd_hack);
            	//std::cout << "******************** matrix-superadj product (fwd_hack) ***********************" << std::endl;
            	//adjresid_fwd_hack->print();
            	adjresid_fwd_hack->add(-1.0, system.get_adjoint_rhs(0));
            	//std::cout << "******************** superadjoint system residual (fwd_hack) ***********************" << std::endl;
            	//adjresid_fwd_hack->print();
            	std::cout << "\n\nfwd_hack import adjoint system residual (discrete L2): " << system.calculate_norm(*adjresid_fwd_hack,DISCRETE_L2) << "\n" << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 0): " << system.calculate_norm(*adjresid_fwd_hack,0,L2) << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 1): " << system.calculate_norm(*adjresid_fwd_hack,1,L2) << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 2): " << system.calculate_norm(*adjresid_fwd_hack,2,L2) << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 3): " << system.calculate_norm(*adjresid_fwd_hack,3,L2) << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 4): " << system.calculate_norm(*adjresid_fwd_hack,4,L2) << std::endl;
            	std::cout << "fwd_hack adjoint system residual (L2, 5): " << system.calculate_norm(*adjresid_fwd_hack,5,L2) << std::endl;
            */
            //std::cout << "************************ system.matrix ***********************" << std::endl;
            //system.matrix->print();

            std::cout << "\n------------ herp derp ------------" << std::endl;

            // The cell wise breakdown
            ErrorVector cell_wise_error;
            cell_wise_error.resize((system.rhs)->size());
            for(unsigned int i = 0; i < (system.rhs)->size() ; i++)
            {
                if(i < system.get_mesh().n_elem())
                    cell_wise_error[i] = fabs(-0.5*((system.rhs)->el(i) * dual_solution(i))
                                              + system.get_MHF_psiLF(i) - system.get_MLF_psiLF(i));
                else
                    cell_wise_error[i] = fabs(-0.5*((system.rhs)->el(i) * dual_solution(i)));

                /*csv from 'save data' from gmv output gives a few values at each node point (value
                for every element that shares that node), yet paraview display only seems to show one
                of them -> the value in an element is given at each of the nodes that it has, hence the
                repetition; what is displayed in paraview is each element's value; even though MHF_psiLF
                and MLF_psiLF are stored by element this seems to give elemental contributions that
                agree with if we had taken the superadj-residual dot product by integrating over elements*/

                /*at higher mesh resolutions and lower k, weird-looking artifacts start to appear and
                it no longer agrees with output from manual integration of superadj-residual...*/
            }
            // Plot it
            std::ostringstream error_gmv;
            error_gmv << "error.gmv";
            cell_wise_error.plot_error(error_gmv.str(), equation_systems.get_mesh());

            //alternate element-wise breakdown, outputed as values matched to element centroids; for matlab plotz
            primal_solution.swap(dual_solution);
            system.postprocess(1);
            primal_solution.swap(dual_solution);
            system.postprocess(2);
            std::cout << "\n\n -0.5*M'_HF(psiLF)(superadj): " << std::setprecision(17) << system.get_half_adj_weighted_resid() << "\n";
            primal_solution.swap(dual_solution);

            std::string write_error_here = infileForMesh("error_est_output_file", "error_est_breakdown.dat");
            std::ofstream output(write_error_here);
            for(unsigned int i = 0 ; i < system.get_mesh().n_elem(); i++) {
                Point elem_cent = system.get_mesh().elem(i)->centroid();
                if(output.is_open()) {
                    output << elem_cent(0) << " " << elem_cent(1) << " "
                           << fabs(system.get_half_adj_weighted_resid(i) + system.get_MHF_psiLF(i) - system.get_MLF_psiLF(i)) << "\n";
                }
            }
            output.close();

        } // End if at max adaptive steps

#ifdef LIBMESH_HAVE_EXODUS_API
        // Write out this timestep if we're requested to
        if ((t_step+1)%write_interval == 0)
        {
            std::ostringstream file_name;
            /*
                // We write the file in the ExodusII format.
                file_name << "out_"
                          << std::setw(3)
                          << std::setfill('0')
                          << std::right
                          << t_step+1
                          << ".e";
            			//this should write out the primal which should be the same as what's read in...
            			ExodusII_IO(mesh).write_timestep(file_name.str(),
            							                        equation_systems,
            							                        1, //number of time steps written to file
            							                        system.time);
            */
        }
#endif // #ifdef LIBMESH_HAVE_EXODUS_API
    }

    // All done.
    return 0;

} //end main
	void potential_and_gradient(const Eigen::VectorXd& parameters, const Eigen::VectorXd& hyperparameters, View& view, double& potential, Eigen::VectorXd& gradient)
	{
		// Loop over layers to calculate weights part of potential, and non-data part of gradient
		potential = 0;
		for (size_t layer_idx = 0; layer_idx < count_weights_layers(); layer_idx++) {
			//potential -= 0.5 * (hyperparameters[layer_idx * 2] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[layer_idx * 2 + 1] * biases(parameters, layer_idx).squaredNorm());
			potential -= 0.5 * (hyperparameters[0] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[1] * biases(parameters, layer_idx).squaredNorm());

			// TODO: Debugging here!
			//weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2]).matrix();
			//biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2 + 1]).matrix();
			weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[0]).matrix();
			biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[1]).matrix();
		}

		/*if (std::isnan(gradient[0])) {
			std::cout << gradient[0] << std::endl;
		}*/

		// Calculate output part of potential and gradient
		for (size_t data_idx = 0; data_idx < view.size(); data_idx++) {
			// Get the class label for this observation
			size_t class_idx = get_nonzero_idx(view.second(data_idx));

			// Calculate the output for this sample, and the gradient of the output with respect to the parameters
			// gradient_and_output(size_t variable_idx, const Eigen::VectorXd& inputs, const Eigen::VectorXd& parameters, Eigen::VectorXd& outputs, Eigen::VectorXd& gradient_vector)

			/*if (std::isnan(temp_gradient_[0])) {
				std::cout << temp_gradient_[0] << std::endl;
			}*/

			log_gradient_and_output(class_idx, view.first(data_idx), parameters, outputs(), temp_gradient_);

			//if (outputs()[class_idx] != 0.)
				gradient = gradient + temp_gradient_;

			
			/*if (std::isnan(temp_gradient_[0])) {
				std::cout << temp_gradient_[0] << std::endl;
			}

			if (std::isnan(gradient[0])) {
				std::cout << gradient[0] << std::endl;
			}*/

			//if ()
			// NOTE: Does it matter here when -E[theta] = -INF?
			//potential += log(outputs()[class_idx]);
			potential += outputs()[class_idx];
		}

		// DEBUG: Check that all entries are finite and not NaN
		/*if (!std::isfinite(potential)) {
			if (std::isnan(potential))
				std::cout << "NaN: Potential" << std::endl;
			else if (std::isinf(potential))
				std::cout << "INF: Potential" << std::endl;
		}
		for (size_t idx = 0; idx < static_cast<size_t>(gradient.size()); idx++) {
			if (!std::isfinite(gradient[idx])) {
				if (std::isnan(gradient[idx]))
					std::cout << "NaN: Gradient[" << idx << "]" << std::endl;
				else if (std::isinf(gradient[idx]))
					std::cout << "NaN: Gradient[" << idx << "]" << std::endl;
			}
		}*/
	}
RooWorkspace* makeInvertedANFit(TTree* tree, float forceSigma=-1, bool constrainMu=false, float forceMu=-1) {
  RooWorkspace *ws = new RooWorkspace("ws","");

  std::vector< TString (*)(TString, RooRealVar&, RooWorkspace&) > bkgPdfList;
  bkgPdfList.push_back(makeSingleExp);
  bkgPdfList.push_back(makeDoubleExp);
#if DEBUG==0
  //bkgPdfList.push_back(makeTripleExp);
  bkgPdfList.push_back(makeModExp);
  bkgPdfList.push_back(makeSinglePow);
  bkgPdfList.push_back(makeDoublePow);
  bkgPdfList.push_back(makePoly2);
  bkgPdfList.push_back(makePoly3);
#endif



  RooRealVar mgg("mgg","m_{#gamma#gamma}",103,160,"GeV");
  mgg.setBins(38);

  mgg.setRange("sideband_low", 103,120);
  mgg.setRange("sideband_high",131,160);
  mgg.setRange("signal",120,131);

  RooRealVar MR("MR","",0,3000,"GeV");
  MR.setBins(60);
  
  RooRealVar Rsq("t1Rsq","",0,1,"GeV");
  Rsq.setBins(20);

  RooRealVar hem1_M("hem1_M","",-1,2000,"GeV");
  hem1_M.setBins(40);

  RooRealVar hem2_M("hem2_M","",-1,2000,"GeV");
  hem2_M.setBins(40);

  RooRealVar ptgg("ptgg","p_{T}^{#gamma#gamma}",0,500,"GeV");
  ptgg.setBins(50);

  RooDataSet data("data","",tree,RooArgSet(mgg,MR,Rsq,hem1_M,hem2_M,ptgg));

  RooDataSet* blind_data = (RooDataSet*)data.reduce("mgg<121 || mgg>130");

  std::vector<TString> tags;
  //fit many different background models
  for(auto func = bkgPdfList.begin(); func != bkgPdfList.end(); func++) {
    TString tag = (*func)("bonly",mgg,*ws);
    tags.push_back(tag);
    ws->pdf("bonly_"+tag+"_ext")->fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high"));
    RooFitResult* bres = ws->pdf("bonly_"+tag+"_ext")->fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high"));
    bres->SetName(tag+"_bonly_fitres");
    ws->import(*bres);

    //make blinded fit
    RooPlot *fmgg_b = mgg.frame();
    blind_data->plotOn(fmgg_b,RooFit::Range("sideband_low,sideband_high"));
    TBox blindBox(121,fmgg_b->GetMinimum()-(fmgg_b->GetMaximum()-fmgg_b->GetMinimum())*0.015,130,fmgg_b->GetMaximum());
    blindBox.SetFillColor(kGray);
    fmgg_b->addObject(&blindBox);
    ws->pdf("bonly_"+tag+"_ext")->plotOn(fmgg_b,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("sideband_low,sideband_high"));
    fmgg_b->SetName(tag+"_blinded_frame");
    ws->import(*fmgg_b);
    delete fmgg_b;
    

    //set all the parameters constant
    RooArgSet* vars = ws->pdf("bonly_"+tag)->getVariables();
    RooFIter iter = vars->fwdIterator();
    RooAbsArg* a;
    while( (a = iter.next()) ){
      if(string(a->GetName()).compare("mgg")==0) continue;
      static_cast<RooRealVar*>(a)->setConstant(kTRUE);
    }

    //make the background portion of the s+b fit
    (*func)("b",mgg,*ws);

    RooRealVar sigma(tag+"_s_sigma","",5,0,100);
    if(forceSigma!=-1) {
      sigma.setVal(forceSigma);
      sigma.setConstant(true);
    }
    RooRealVar mu(tag+"_s_mu","",126,120,132);
    if(forceMu!=-1) {
      mu.setVal(forceMu);
      mu.setConstant(true);
    }
    RooGaussian sig(tag+"_sig_model","",mgg,mu,sigma);
    RooRealVar Nsig(tag+"_sb_Ns","",5,0,100);
    RooRealVar Nbkg(tag+"_sb_Nb","",100,0,100000);
    

    RooRealVar HiggsMass("HiggsMass","",125.1);
    RooRealVar HiggsMassError("HiggsMassError","",0.24);
    RooGaussian HiggsMassConstraint("HiggsMassConstraint","",mu,HiggsMass,HiggsMassError);


    RooAddPdf fitModel(tag+"_sb_model","",RooArgList( *ws->pdf("b_"+tag), sig ),RooArgList(Nbkg,Nsig));

    RooFitResult* sbres;
    RooAbsReal* nll;
    if(constrainMu) {
      fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
      sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
      nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
    } else {
      fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE));
      sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE));
      nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE));
    }
    sbres->SetName(tag+"_sb_fitres");
    ws->import(*sbres);
    ws->import(fitModel);

    RooPlot *fmgg = mgg.frame();
    data.plotOn(fmgg);
    fitModel.plotOn(fmgg);
    ws->pdf("b_"+tag+"_ext")->plotOn(fmgg,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("Full"));
    fmgg->SetName(tag+"_frame");
    ws->import(*fmgg);
    delete fmgg;


    RooMinuit(*nll).migrad();

    RooPlot *fNs = Nsig.frame(0,25);
    fNs->SetName(tag+"_Nsig_pll");
    RooAbsReal *pll = nll->createProfile(Nsig);
    //nll->plotOn(fNs,RooFit::ShiftToZero(),RooFit::LineColor(kRed));
    pll->plotOn(fNs);
    ws->import(*fNs);

    delete fNs;

    RooPlot *fmu = mu.frame(125,132);
    fmu->SetName(tag+"_mu_pll");
    RooAbsReal *pll_mu = nll->createProfile(mu);
    pll_mu->plotOn(fmu);
    ws->import(*fmu);

    delete fmu;

  }

  RooArgSet weights("weights");
  RooArgSet pdfs_bonly("pdfs_bonly");
  RooArgSet pdfs_b("pdfs_b");

  RooRealVar minAIC("minAIC","",1E10);
  //compute AIC stuff
  for(auto t = tags.begin(); t!=tags.end(); t++) {
    RooAbsPdf *p_bonly = ws->pdf("bonly_"+*t);
    RooAbsPdf *p_b = ws->pdf("b_"+*t);
    RooFitResult *sb = (RooFitResult*)ws->obj(*t+"_bonly_fitres");
    RooRealVar k(*t+"_b_k","",p_bonly->getParameters(RooArgSet(mgg))->getSize());
    RooRealVar nll(*t+"_b_minNll","",sb->minNll());
    RooRealVar Npts(*t+"_b_N","",blind_data->sumEntries());
    RooFormulaVar AIC(*t+"_b_AIC","2*@0+2*@1+2*@1*(@1+1)/(@2-@1-1)",RooArgSet(nll,k,Npts));
    ws->import(AIC);
    if(AIC.getVal() < minAIC.getVal()) {
      minAIC.setVal(AIC.getVal());
    }
    //aicExpSum+=TMath::Exp(-0.5*AIC.getVal()); //we will need this precomputed  for the next step
    pdfs_bonly.add(*p_bonly);
    pdfs_b.add(*p_b);
  }
  ws->import(minAIC);
  //compute the AIC weight
  float aicExpSum=0;
  for(auto t = tags.begin(); t!=tags.end(); t++) {
    RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC");
    aicExpSum+=TMath::Exp(-0.5*(AIC->getVal()-minAIC.getVal())); //we will need this precomputed  for the next step    
  }
  std::cout << "aicExpSum: " << aicExpSum << std::endl;

  for(auto t = tags.begin(); t!=tags.end(); t++) {
    RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC");
    RooRealVar *AICw = new RooRealVar(*t+"_b_AICWeight","",TMath::Exp(-0.5*(AIC->getVal()-minAIC.getVal()))/aicExpSum);
    if( TMath::IsNaN(AICw->getVal()) ) {AICw->setVal(0);}
    ws->import(*AICw);
    std::cout << *t << ":  " << AIC->getVal()-minAIC.getVal() << "    " << AICw->getVal() << std::endl;
    weights.add(*AICw);
  }
  RooAddPdf bonly_AIC("bonly_AIC","",pdfs_bonly,weights);
  RooAddPdf b_AIC("b_AIC","",pdfs_b,weights);

  //b_AIC.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high"));
  //RooFitResult* bres = b_AIC.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high"));
  //bres->SetName("AIC_b_fitres");
  //ws->import(*bres);

  //make blinded fit
  RooPlot *fmgg_b = mgg.frame(RooFit::Range("sideband_low,sideband_high"));
  blind_data->plotOn(fmgg_b,RooFit::Range("sideband_low,sideband_high"));
  TBox blindBox(121,fmgg_b->GetMinimum()-(fmgg_b->GetMaximum()-fmgg_b->GetMinimum())*0.015,130,fmgg_b->GetMaximum());
  blindBox.SetFillColor(kGray);
  fmgg_b->addObject(&blindBox);
  bonly_AIC.plotOn(fmgg_b,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("sideband_low,sideband_high"));
  fmgg_b->SetName("AIC_blinded_frame");
  ws->import(*fmgg_b);
  delete fmgg_b;
    
#if 1

  RooRealVar sigma("AIC_s_sigma","",5,0,100);
  if(forceSigma!=-1) {
    sigma.setVal(forceSigma);
    sigma.setConstant(true);
  }
  RooRealVar mu("AIC_s_mu","",126,120,132);
  if(forceMu!=-1) {
    mu.setVal(forceMu);
    mu.setConstant(true);
  }
  RooGaussian sig("AIC_sig_model","",mgg,mu,sigma);
  RooRealVar Nsig("AIC_sb_Ns","",5,0,100);
  RooRealVar Nbkg("AIC_sb_Nb","",100,0,100000);
  
  
  RooRealVar HiggsMass("HiggsMass","",125.1);
  RooRealVar HiggsMassError("HiggsMassError","",0.24);
  RooGaussian HiggsMassConstraint("HiggsMassConstraint","",mu,HiggsMass,HiggsMassError);
  
  
  RooAddPdf fitModel("AIC_sb_model","",RooArgList( b_AIC, sig ),RooArgList(Nbkg,Nsig));

  RooFitResult* sbres;
  RooAbsReal *nll;

  if(constrainMu) {
    fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
    sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
    nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint)));
  } else {
    fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE));
    sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE));
    nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE));
  }

  assert(nll!=0);
  
  sbres->SetName("AIC_sb_fitres");
  ws->import(*sbres);
  ws->import(fitModel);
  
  RooPlot *fmgg = mgg.frame();
  data.plotOn(fmgg);
  fitModel.plotOn(fmgg);
  ws->pdf("b_AIC")->plotOn(fmgg,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("Full"));
  fmgg->SetName("AIC_frame");
  ws->import(*fmgg);
  delete fmgg;

  RooMinuit(*nll).migrad();
  
  RooPlot *fNs = Nsig.frame(0,25);
  fNs->SetName("AIC_Nsig_pll");
  RooAbsReal *pll = nll->createProfile(Nsig);
  //nll->plotOn(fNs,RooFit::ShiftToZero(),RooFit::LineColor(kRed));
  pll->plotOn(fNs);
  ws->import(*fNs);
  delete fNs;


  RooPlot *fmu = mu.frame(125,132);
  fmu->SetName("AIC_mu_pll");
  RooAbsReal *pll_mu = nll->createProfile(mu);
  pll_mu->plotOn(fmu);
  ws->import(*fmu);

  delete fmu;

  std::cout << "min AIC: " << minAIC.getVal() << std::endl;
  for(auto t = tags.begin(); t!=tags.end(); t++) {
    RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC");
    RooRealVar *AICw = ws->var(*t+"_b_AICWeight");
    RooRealVar* k = ws->var(*t+"_b_k");
    printf("%s & %0.0f & %0.2f & %0.2f \\\\\n",t->Data(),k->getVal(),AIC->getVal()-minAIC.getVal(),AICw->getVal());
    //std::cout << k->getVal() << " " << AIC->getVal()-minAIC.getVal() << " " << AICw->getVal() << std::endl;
  }
#endif
  return ws;
}
Example #27
0
bool CollisionModel::sampleForR(int seed, const std::string &link_name, const Eigen::Vector2d &r, Eigen::Vector3d &p, Eigen::Vector4d &q) const {
    std::map<std::string, LinkCollisionModel >::const_iterator it = link_models_map_.find( link_name );
    if (it == link_models_map_.end()) {
        return false;
    }
    const std::vector<Feature > &features = it->second.features_;
    std::mt19937 gen_(seed);

    const int n_points = features.size();
    std::vector<double > weights(n_points, 0.0);

    double result_x, result_y, result_z;
    Eigen::Vector4d result_q;

    // sample the x coordinate
    double sum = 0.0;
    for (int pidx = 0; pidx < n_points; pidx++) {
//            if (std::fabs(r(0) - features[pidx].pc1) > r_dist_max_ || std::fabs(r(1) - features[pidx].pc2) > r_dist_max_ || features[pidx].weight < 0.0000001) {
//                weights[pidx] = 0.0;
//            }
//            else {
        weights[pidx] = features[pidx].weight * biVariateIsotropicGaussianKernel(r, Eigen::Vector2d(features[pidx].pc1, features[pidx].pc2), sigma_r_);
//            }
        sum += weights[pidx];
    }

    Feature random_kernel;
    double rr = randomUniform(0.0, sum);
    for (int pidx = 0; pidx < n_points; pidx++) {
        rr -= weights[pidx];
        if (rr <= 0.0) {
            random_kernel = features[pidx];
            break;
        }
    }

    Eigen::Vector4d mean_q;
    result_x = random_kernel.T_C_F.p.x();
    result_y = random_kernel.T_C_F.p.y();
    result_z = random_kernel.T_C_F.p.z();
    random_kernel.T_C_F.M.GetQuaternion(mean_q(0), mean_q(1), mean_q(2), mean_q(3));

    std::normal_distribution<> d = std::normal_distribution<>(result_x, sigma_p_);
    result_x = d(gen_);
    d = std::normal_distribution<>(result_y, sigma_p_);
    result_y = d(gen_);
    d = std::normal_distribution<>(result_z, sigma_p_);
    result_z = d(gen_);

    int iterations = orientationNormalSample(mean_q, sigma_q_, result_q);
    if (iterations < 0) {
        std::cout << "ERROR: orientationNormalSample" << std::endl;
    }

    p(0) = result_x;
    p(1) = result_y;
    p(2) = result_z;
    q = result_q;

    return true;
}
Example #28
0
void AdaBoost::train(const vector<vector<Mat> > &features)
{
  vector <vector<Mat> > cells = features;

  int neg = 0;
  for (int i = 0; i < triplessize; i++)
    if (triples[i].label == 0)
      neg++;
//  vector<double> weights(labels.size(), 1/(double)(labels.size()));
  vector<double> weights(triplessize, 1/2.0/(triplessize-neg));
  for (int i = 0; i < triplessize; i++)
    if (triples[i].label == 0)
      weights[i] = 1/2.0/neg;

  int preferred = 0;
  double preverror = 0;
  size = 50;
  vector<int> used;

  for (int i = 0; i < size; i++)
  {
    double norm = 0;
    for (int t = 0; t < weights.size(); t++)
      norm += weights[t];

    for (int t = 0; t < weights.size(); t++)
      weights[t] = weights[t] / norm;

    double error = DBL_MAX;
    vector<int> correctness(triplessize);

    bool flag = false;
#pragma omp parallel for
    for (int k = 0; k < weak.size(); k++)
    {
      double currerror = 0;
      for (int j = 0; j < triplessize; j++)
      {
        double corr = weak[k].classify(cells[j][k]);
        currerror += weights[j] * std::fabs(corr - triples[j].label);
      }
      if (currerror < error)
      {
        error = currerror;
        preferred = k;
      }
    }
    for (int j = 0; j < triplessize; j++)
    {
      double corr = weak[preferred].classify(cells[j][preferred]);
      if (corr > 1/2.0 && triples[j].label == 1)
        correctness[j] = 1;
      else if (corr < 1/2.0 && triples[j].label == 0)
        correctness[j] = 1;
      else correctness[j] = 0;
    }
for (int g = 0; g < used.size(); g++)
      if (used[g] == preferred)
        flag = true;
    
    if (!flag) std::cout << "round " << i << " error: " << error << " preferred: " << preferred << " " << std::endl;
//    if (flag) std::cout << "not used" << std::endl;

    if (!flag)
    {
      prefclass.push_back(weak[preferred]);
      betha.push_back(error / (1 - error));
      used.push_back(preferred);
    }
    for (int j = 0; j < weights.size(); j++)
      if (correctness[j])
        weights[j] = weights[j] * (error / (1 - error));
  }
  alphas = 0;
  for (int i = 0; i < betha.size(); i++)
  {
    alpha.push_back(-std::log(betha[i]));
    alphas += alpha[i];
  }
  params.erase(params.begin(), params.end());
  for (int i = 0; i < prefclass.size(); i++)
  {
    params.push_back(prefclass[i].getParams());
//    if (i < 10)
//      prefclass[i].drawLut();
  }
  size = prefclass.size();
}
Example #29
0
int test_Matrix::serialtest3()
{
  testData* testdata = new testData(localProc_, numProcs_);
  std::vector<int>& fieldIDs = testdata->fieldIDs;
  std::vector<int>& fieldSizes = testdata->fieldSizes;
  std::vector<int>& idTypes = testdata->idTypes;
  std::vector<int>& ids = testdata->ids;

  fei::SharedPtr<fei::VectorSpace> vspc(new fei::VectorSpace(comm_, "sU_Mat3"));

  vspc->defineFields(fieldIDs.size(), &fieldIDs[0], &fieldSizes[0]);

  vspc->defineIDTypes(idTypes.size(), &idTypes[0]);

  fei::SharedPtr<fei::MatrixGraph>
    matgraph(new fei::MatrixGraph_Impl2(vspc, vspc, "sU_Mat3"));

  int numIDs = ids.size();
  int fieldID = fieldIDs[0];
  int idType = idTypes[0];

  int patternID = matgraph->definePattern(numIDs, idType, fieldID);

  CHK_ERR( matgraph->initConnectivityBlock(0, 1, patternID) );

  CHK_ERR( matgraph->initConnectivity(0, 0, &ids[0]) );

  //set up a slave constraint that defines id 2, field 0 to be equal to
  //id 1, field 0.
  int offsetOfSlave = 1;
  int offsetIntoSlaveField = 0;
  std::vector<double> weights(2);
  weights[0] = 1.0;
  weights[1] = -1.0;
  double rhsValue = 0.0;
  std::vector<int> cr_idtypes(2, idTypes[0]);
  std::vector<int> cr_fieldIDs(2, fieldIDs[0]);

  CHK_ERR( matgraph->initSlaveConstraint(2, //numIDs
					&cr_idtypes[0],
					&ids[1],
					&cr_fieldIDs[0],
					offsetOfSlave,
					offsetIntoSlaveField,
					&weights[0],
					rhsValue) );

  CHK_ERR( matgraph->initComplete() );

  fei::SharedPtr<fei::FillableMat> ssmat(new fei::FillableMat);
  int localsize = matgraph->getRowSpace()->getNumIndices_Owned();
  localsize -= 1;//subtract the slave
  fei::Matrix* matrix = new fei::Matrix_Impl<fei::FillableMat>(ssmat, matgraph, localsize);

  if (matrix == NULL) {
    ERReturn(-1);
  }

  std::vector<int> indices(numIDs);
  CHK_ERR( matgraph->getConnectivityIndices(0, 0, numIDs,
					   &indices[0], numIDs) );

  std::vector<double> data1(numIDs*numIDs);
  std::vector<double*> data2d(numIDs);

  int i;
  for(i=0; i<numIDs; ++i) {
    data2d[i] = &(data1[i*numIDs]);
  }

  for(i=0; i<numIDs*numIDs; ++i) {
    data1[i] = 1.0*i;
  }

  CHK_ERR( matrix->sumIn(numIDs, &indices[0],
			 numIDs, &indices[0], &data2d[0], 0) );

  CHK_ERR( matrix->sumIn(0, 0, &data2d[0], 0) );

  delete matrix;
  delete testdata;

  return(0);
}
void QCAD::ResponseFieldValue<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  ScalarT opVal, qpVal, cellVol;

  if(!opRegion->elementBlockIsInRegion(workset.EBName))
    return;

  for (std::size_t cell=0; cell < workset.numCells; ++cell)
  {
    if(!opRegion->cellIsInRegion(cell)) continue;

    // Get the cell volume, used for averaging over a cell
    cellVol = 0.0;
    for (std::size_t qp=0; qp < numQPs; ++qp)
      cellVol += weights(cell,qp);

    // Get the scalar value of the field being operated on which will be used
    //  in the operation (all operations just deal with scalar data so far)
    opVal = 0.0;
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      qpVal = 0.0;
      if(bOpFieldIsVector) {
        if(opX) qpVal += opField(cell,qp,0) * opField(cell,qp,0);
        if(opY) qpVal += opField(cell,qp,1) * opField(cell,qp,1);
        if(opZ) qpVal += opField(cell,qp,2) * opField(cell,qp,2);
      }
      else qpVal = opField(cell,qp);
      opVal += qpVal * weights(cell,qp);
    }
    opVal /= cellVol;
    // opVal = the average value of the field operated on over the current cell


    // Check if the currently stored min/max value needs to be updated
    if( (operation == "Maximize" && opVal > this->global_response[1]) ||
        (operation == "Minimize" && opVal < this->global_response[1]) ) {
      max_nodeID = workset.wsElNodeEqID[cell];

      // set g[0] = value of return field at the current cell (avg)
      this->global_response[0]=0.0;
      if(bReturnOpField) {
        for (std::size_t qp=0; qp < numQPs; ++qp) {
          qpVal = 0.0;
          if(bOpFieldIsVector) {
            for(std::size_t i=0; i<numDims; i++) {
              qpVal += opField(cell,qp,i)*opField(cell,qp,i);
            }
          }
          else qpVal = opField(cell,qp);
          this->global_response[0] += qpVal * weights(cell,qp);
        }
      }
      else {
        for (std::size_t qp=0; qp < numQPs; ++qp) {
          qpVal = 0.0;
          if(bRetFieldIsVector) {
            for(std::size_t i=0; i<numDims; i++) {
              qpVal += retField(cell,qp,i)*retField(cell,qp,i);
            }
          }
          else qpVal = retField(cell,qp);
          this->global_response[0] += qpVal * weights(cell,qp);
        }
      }
      this->global_response[0] /= cellVol;

      // set g[1] = value of the field operated on at the current cell (avg)
      this->global_response[1] = opVal;

      // set g[2+] = average qp coordinate values of the current cell
      for(std::size_t i=0; i<numDims; i++) {
        this->global_response[i+2] = 0.0;
        for (std::size_t qp=0; qp < numQPs; ++qp)
          this->global_response[i+2] += coordVec(cell,qp,i);
        this->global_response[i+2] /= numQPs;
      }
    }

  } // end of loop over cells

  // No local scattering
}