/*------------------------------------------------------------------*/ void AzOptOnTree::updateTreeWeights(AzRgfTreeEnsemble *ens) const { int dtree_num = ens->size(); int tx; for (tx = 0; tx < dtree_num; ++tx) { ens->tree_u(tx)->resetWeights(); } const double *weight = weights()->point(); double const_val = constant(); ens->set_constant(const_val); int num = tree_feat->featNum(); int fx; for (fx = 0; fx < num; ++fx) { if (weight[fx] != 0) { const AzTrTreeFeatInfo *fp = tree_feat->featInfo(fx); ens->tree_u(fp->tx)->setWeight(fp->nx, weight[fx]); } } }
// This is inherently inefficient as this function will be called n times (up to 4 times) // for every overlapping pixel. I'm doing this as to reduce memory use. Since it will // only be done once, it may not matter too much. float IdMap::calculateWeights(int x, int y, FrameStack* frame, std::vector<FrameStack*> stacks, float pixelFactor){ std::vector<float> weights(stacks.size()); std::vector<int> border_distances(stacks.size()); int maxBorderDistance = 0; for(uint i=0; i < stacks.size(); ++i){ border_distances[i] = stacks[i]->nearestBorderGlobal(x, y); maxBorderDistance = border_distances[i] > maxBorderDistance ? border_distances[i] : maxBorderDistance; } // override pixelFactor with MaxBorderDistance pixelFactor = maxBorderDistance ? (float)maxBorderDistance : pixelFactor; int offset = -1; float w_sum = 0; for(uint i=0; i < stacks.size(); ++i){ if(stacks[i] == frame) offset = i; int border_distance = stacks[i]->nearestBorderGlobal(x, y); weights[i] = border_distance > (int)pixelFactor ? 1.0 : (float)(1 + border_distance) / (1.00 + pixelFactor); weights[i] = weights[i] < 0 ? 0 : weights[i]; w_sum += weights[i]; } // float w_sum = 0; // for(uint i=0; i < weights.size(); ++i) // w_sum += weights[i]; if(!w_sum){ std::cerr << "idMap::calculateWeights: w_sum is 0" << std::endl; std::cerr << "\tx,y: " << x << "," << y << " offset : " << offset << "\tstacks.size() " << stacks.size() << std::endl; exit(1); } for(uint i=0; i < weights.size(); ++i) weights[i] /= w_sum; if(offset < 0){ std::cerr << "idMap::calculateWeights: offset not defined returning 0 weighting" << std::endl; return(0); } return(weights[offset]); }
//! Static function to calculate the weights. static std::vector<double> calculate_weights(const std::vector< std::vector<double> >& acceptance_probabilities_optimization) { // Abbreviation for the number of optimization steps unsigned int optimization_steps = acceptance_probabilities_optimization.size(); // Calculate the weights std::vector<double> weights(optimization_steps, 0.0); double weights_sum = 0.0; for (unsigned int r = 0; r < optimization_steps; ++r) { double sigma_squared = 0.0; for (unsigned int i = 0; i < acceptance_probabilities_optimization[r].size(); ++i) sigma_squared += 1.0/pow(acceptance_probabilities_optimization[r][i], 2); weights[r] = 1.0 / sqrt(sigma_squared); weights_sum += weights[r]; } // Normalize the weights for (unsigned int r = 0; r < optimization_steps; ++r) weights[r] /= weights_sum; return weights; }
std::vector<double> MultiSequence::ComputePositionBasedSequenceWeights() const { std::vector<double> weights(sequences.size(), 0.0); std::vector<int> counts(256); for (int i = 1; i <= sequences[0]->GetLength(); i++) { int diversity = 0; std::fill(counts.begin(), counts.end(), 0); for (size_t j = 0; j < sequences.size(); j++) { if (counts[BYTE(sequences[j]->GetData()[i])] == 0) diversity++; ++(counts[BYTE(sequences[j]->GetData()[i])]); } for (size_t j = 0; j < sequences.size(); j++) weights[j] += 1.0 / (diversity * counts[BYTE(sequences[j]->GetData()[i])]); } weights /= Sum(weights); return weights; }
void LandmarkSelectionBase::selectLandmarks( MultiReferenceBase* myframes ) { // Select landmarks myframes->clearFrames(); select( myframes ); plumed_assert( myframes->getNumberOfReferenceFrames()==nlandmarks ); // Now calculate voronoi weights if( !novoronoi ) { unsigned rank=action->comm.Get_rank(); unsigned size=action->comm.Get_size(); std::vector<double> weights( nlandmarks, 0.0 ); for(unsigned i=rank; i<action->data.size(); i+=size) { unsigned closest=0; double mindist=distance( action->getPbc(), action->getArguments(), action->data[i], myframes->getFrame(0), false ); for(unsigned j=1; j<nlandmarks; ++j) { double dist=distance( action->getPbc(), action->getArguments(), action->data[i], myframes->getFrame(j), false ); if( dist<mindist ) { mindist=dist; closest=j; } } weights[closest] += getWeightOfFrame(i); } action->comm.Sum( &weights[0], weights.size() ); myframes->setWeights( weights ); } }
void testTranslationRotationMatrix (const IMATH_INTERNAL_NAMESPACE::M44d& mat) { std::cout << "Testing known translate/rotate matrix:\n " << mat; typedef IMATH_INTERNAL_NAMESPACE::Vec3<T> Vec; static IMATH_INTERNAL_NAMESPACE::Rand48 rand (2047); size_t numPoints = 7; std::vector<Vec> from; from.reserve (numPoints); std::vector<Vec> to; to.reserve (numPoints); for (size_t i = 0; i < numPoints; ++i) { IMATH_INTERNAL_NAMESPACE::V3d a (rand.nextf(), rand.nextf(), rand.nextf()); IMATH_INTERNAL_NAMESPACE::V3d b = a * mat; from.push_back (Vec(a)); to.push_back (Vec(b)); } std::vector<T> weights (numPoints, T(1)); const IMATH_INTERNAL_NAMESPACE::M44d m1 = procrustesRotationAndTranslation (&from[0], &to[0], &weights[0], numPoints); const IMATH_INTERNAL_NAMESPACE::M44d m2 = procrustesRotationAndTranslation (&from[0], &to[0], numPoints); const T eps = sizeof(T) == 8 ? 1e-8 : 1e-4; for (size_t i = 0; i < numPoints; ++i) { const IMATH_INTERNAL_NAMESPACE::V3d a = from[i]; const IMATH_INTERNAL_NAMESPACE::V3d b = to[i]; const IMATH_INTERNAL_NAMESPACE::V3d b1 = a * m1; const IMATH_INTERNAL_NAMESPACE::V3d b2 = a * m2; assert ((b - b1).length() < eps); assert ((b - b2).length() < eps); } std::cout << " OK\n"; }
void move_particles (std::vector <Particle> *particles, std::vector <double> *field) { auto num_particles = particles->size(); std::vector <double> weights (2); std::vector <int> points (2); for (auto& particle : *particles) { if (CIC){ weighing (&particle, &weights[0]); } else if (ZERO_ORDER){ zero_order_weighing (&particle, &weights[0]); } adjacent_points (&particle, &points[0]); auto left_field = field->at (points [0]) * weights [0]; auto right_field = field->at (points [1]) * weights [1]; auto accel_0 = find_accel(left_field + right_field, &particle); particle.inc_pos (accel_0); if (CIC){ weighing (&particle, &weights[0]); } else if (ZERO_ORDER){ zero_order_weighing (&particle, &weights[0]); } adjacent_points (&particle, &points[0]); left_field = field->at (points [0]) * weights [0]; right_field = field->at (points [1]) * weights [1]; auto accel_1 = find_accel(left_field + right_field, &particle); particle.inc_vel (accel_0, accel_1); } }
void Foam::domainDecomposition::distributeCells() { Info<< "\nCalculating distribution of cells" << endl; cpuTime decompositionTime; autoPtr<decompositionMethod> decomposePtr = decompositionMethod::New ( decompositionDict_ ); scalarField cellWeights; if (decompositionDict_.found("weightField")) { word weightName = decompositionDict_.lookup("weightField"); volScalarField weights ( IOobject ( weightName, time().timeName(), *this, IOobject::MUST_READ, IOobject::NO_WRITE ), *this ); cellWeights = weights.primitiveField(); } cellToProc_ = decomposePtr().decompose(*this, cellWeights); Info<< "\nFinished decomposition in " << decompositionTime.elapsedCpuTime() << " s" << endl; }
void TestWeightMatrix_Forward() { int n_outputs = 2; InputLayer input_layer(3); OutputLayer output_layer(n_outputs); WeightMatrix weights(input_layer, output_layer); weights.set(0, 0, 0.5); weights.set(1, 0, -2.0); weights.set(2, 0, 1.5); weights.set(0, 1, 1.0); weights.set(1, 1, 0.7); weights.set(2, 1, -1.0); weights.setBias(0, 0.8); weights.setBias(1, -0.3); std::vector<double> inputs; inputs.push_back(-2.0); inputs.push_back(1.0); inputs.push_back(3.0); input_layer.receiveInput(inputs); std::vector<double> transition = weights.fire(input_layer); assert(transition.size() == 2); assert(transition[0] == 2.3); assert(transition[1] == -4.6); output_layer.receiveInput(transition); assert(output_layer.getInput(0) == 2.3); assert(output_layer.getInput(1) == -4.6); assert(output_layer.getOutput(0) == 2.3); assert(output_layer.getOutput(1) == -4.6); printPass("TestWeightMatrix_Forward()"); }
Double KMAlgo::ComputeMssc(IPartition const & x, KMInstance const & instance) { RectMatrix centers(x.maxNbLabels(), instance.nbAtt()); centers.assign(0); DoubleVector weights(x.maxNbLabels(), 0); for (auto const & l : x.usedLabels()) { for (auto const & i : x.observations(l)) { weights[l] += instance.weight(i); for (size_t d(0); d < instance.nbAtt(); ++d) centers.plus(l, d, instance.get(i, d) * instance.weight(i)); } } Double result(0); for (size_t i(0); i < instance.nbObs(); ++i) { size_t const l(x.label(i)); for (size_t d(0); d < instance.nbAtt(); ++d) result += instance.weight(i) * std::pow( instance.get(i, d) - centers.get(l, d) / weights[l], 2); } return result; }
double eval(const std::vector<double> &at, bool &valid){ //Allow for the weights to be part of the optimization //by assuming theyre tacked on to the end of the at vector //NOTE: the last weight is the one implied by the 1-sum(others) //Determine where the weights start int offset = this->P->dimDesign; //Separate the set of "Problem" variables const Homotopy::designVars_t vars(at.begin(), at.begin() + offset); Homotopy::designVars_t weights( at.begin() + offset, at.end() ); //Evaluate the objectives with the designVars Homotopy::objVars_t result( this->e.eval( vars, valid ) ); //Return immediately if result is unavailable if(!valid) return 0.0; else { //Calculate the remaining weight and add it weights.push_back( 1.0 - std::accumulate(weights.begin(), weights.end(), 0.0) ); //Return the weighted sum return std::inner_product( weights.begin(), weights.end(), result.begin(), 0.0 ); } }
std::vector<double> computeInterpolationWeights2d(const GenericPoint& thepoint, const Container& pt_v) { if (pt_v.size() !=3) { std::cout << " Compute interpolation weights.. error.. wrong number of points: " << pt_v.size() << std::endl; return std::vector<double>(); } double c1[2], c2[2]; c1[0] = pt_v[1][0] - pt_v[0][0]; c1[1] = pt_v[1][1] - pt_v[0][1]; c2[0] = pt_v[2][0] - pt_v[0][0]; c2[1] = pt_v[2][1] - pt_v[0][1]; double det(det2x2(c1, c2)); if (det == 0.0) { throw gsse::numerical_calculation_error(":: interpolation :: 2D :: coefficients.. determinant to small.. "); } double rhs[2]; std::vector<double> weights(3); rhs[0] = thepoint[0] - pt_v[0][0]; rhs[1] = thepoint[1] - pt_v[0][1]; weights[1] = det2x2(rhs, c2) / det; weights[2] = det2x2(c1, rhs) / det; weights[0] = 1.0 - weights[1] - weights[2]; return weights; // here .. copy constructor // think about it.. [RH] }
Layer::Matrix Layer::getFlattenedWeights() const { Matrix weights(1, totalWeights()); size_t position = 0; for(auto matrix = begin(); matrix != end(); ++matrix) { std::memcpy(&weights.data()[position], &matrix->data()[0], matrix->size() * sizeof(float)); position += matrix->size(); } for(auto matrix = begin_bias(); matrix != end_bias(); ++matrix) { std::memcpy(&weights.data()[position], &matrix->data()[0], matrix->size() * sizeof(float)); position += matrix->size(); } return weights; }
void ScoreIndexManager::Debug_PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const { std::vector<float> weights(scc.m_scores.size(), 1.0f); Debug_PrintLabeledWeightedScores(os, scc, weights); }
void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D( float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr, const memory_tracking::grantor_t &scratchpad) const { const auto &jcp = kernel_->jcp; const auto &p_ops = attr_->post_ops_; const int inph = is_fwd ? jcp.ih : jcp.oh; const int inpw = is_fwd ? jcp.iw : jcp.ow; const int outh = is_fwd ? jcp.oh : jcp.ih; const int outw = is_fwd ? jcp.ow : jcp.iw; /* Notation: FWD: dimM:oc, dimN:ntiles, dimK:ic, BWD: dimM:ic, dimN:ntiles, dimK:oc, FWD/BWD: V: src/diff_dst transform, U:weight transform, M:dst/diff_src transform */ array_offset_calculator<float, 5> input(inp_ptr, jcp.mb, jcp.dimK/jcp.dimK_reg_block, inph, inpw, jcp.dimK_reg_block); array_offset_calculator<float, 5> output(out_ptr, jcp.mb, jcp.dimM/jcp.dimM_simd_block, outh, outw, jcp.dimM_simd_block); array_offset_calculator<float, 6> weights(wei_ptr, jcp.oc/jcp.oc_simd_block, jcp.ic/jcp.ic_simd_block, jcp.kh, jcp.kw, jcp.ic_simd_block, jcp.oc_simd_block); array_offset_calculator<float, 2> bias(bias_ptr, jcp.dimM/jcp.dimM_simd_block, jcp.dimM_simd_block); array_offset_calculator<float, 8> M(is_fwd ? scratchpad.template get<float>(key_wino_M) : scratchpad.template get<float>(key_wino_V), jcp.dimN_nb_block, jcp.dimM_nb_block, alpha, alpha, jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block, jcp.dimN_reg_block, jcp.dimM_simd_block); auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference) ? wei_ptr : scratchpad.template get<float>(key_wino_U); array_offset_calculator<float, 8> U(wino_wei, jcp.dimM_nb_block, alpha, alpha, jcp.dimK_nb_block, jcp.dimM_block * jcp.dimM_reg_block, jcp.dimK_block, jcp.dimK_reg_block, jcp.dimM_simd_block); array_offset_calculator<float, 8> V(is_fwd ? scratchpad.template get<float>(key_wino_V) : scratchpad.template get<float>(key_wino_M), jcp.dimN_nb_block, alpha, alpha, jcp.dimN_block, jcp.dimK_nb_block, jcp.dimK_block, jcp.dimN_reg_block, jcp.dimK_reg_block); const bool wants_padded_bias = jcp.with_bias && jcp.oc_without_padding != jcp.oc; float last_slice_bias[simd_w] = {0}; if (wants_padded_bias) { for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc) last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc); } PRAGMA_OMP(parallel) { parallel_nd_in_omp(jcp.mb, jcp.dimK_nb_block, jcp.dimK_block, [&](int img, int K_blk1, int K_blk2) { input_transform_data(img, jcp, &(input(img, K_blk1 * jcp.dimK_block + K_blk2, 0, 0, 0)), &(V(0, 0, 0, 0, K_blk1, K_blk2, 0, 0))); }); if (jcp.prop_kind != prop_kind::forward_inference) { parallel_nd_in_omp(jcp.nb_oc, jcp.nb_ic, (jcp.oc_block * jcp.oc_reg_block), (jcp.ic_block * jcp.ic_reg_block), [&](int ofm1, int ifm1, int ofm2, int ifm2) { float *U_base_ptr = is_fwd ? &(U(ofm1, 0, 0, ifm1, ofm2, ifm2, 0, 0)) : &(U(ifm1, 0, 0, ofm1, ifm2, ofm2, 0, 0)); weight_transform_data(jcp, &(weights( ofm1 * jcp.oc_block * jcp.oc_reg_block + ofm2, ifm1 * jcp.ic_block * jcp.ic_reg_block + ifm2, 0, 0, 0, 0)), U_base_ptr); }); } PRAGMA_OMP(barrier) parallel_nd_in_omp(jcp.dimN_nb_block, alpha, alpha, jcp.dimM_nb_block, [&](int N_blk1, int oj, int oi, int M_blk1) { for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) for (int N_blk2 = 0; N_blk2 < jcp.dimN_block; N_blk2++) kernel_->gemm_loop_ker( (float *)&(M(N_blk1, M_blk1, oj, oi, N_blk2, 0, 0, 0)), (const float *)&(U(M_blk1, oj, oi, K_blk1, 0, 0, 0, 0)), (const float *)&(V(N_blk1, oj, oi, N_blk2, K_blk1, 0, 0, 0)), K_blk1); }); PRAGMA_OMP(barrier) parallel_nd_in_omp(jcp.mb, jcp.dimM_nb_block, (jcp.dimM_block * jcp.dimM_reg_block), [&](int img, int M_blk1, int M_blk2) { const int M_blk = M_blk1 * jcp.dimM_block * jcp.dimM_reg_block + M_blk2; float *bias_ptr = wants_padded_bias && M_blk == jcp.dimM / jcp.dimM_simd_block - 1 ? last_slice_bias : &bias(M_blk, 0); output_transform_data(img, jcp, p_ops, &(M(0, M_blk1, 0, 0, 0, M_blk2, 0, 0)), &(output(img, M_blk, 0, 0, 0)), bias_ptr); }); } }
// GET INTERPOLATOR //------------------------------------------------------------------------- GridInterface::Interpolator RectilinearGrid::getInterpolator(Index elem, const Vector &point, DataBase::Mapping mapping, GridInterface::InterpolationMode mode) const { vassert(inside(elem, point)); #ifdef INTERPOL_DEBUG if (!inside(elem, point)) { return Interpolator(); } #endif if (mapping == DataBase::Element) { std::vector<Scalar> weights(1, 1.); std::vector<Index> indices(1, elem); return Interpolator(weights, indices); } std::array<Index,3> n = cellCoordinates(elem, m_numDivisions); std::array<Index,8> cl = cellVertices(elem, m_numDivisions); Vector corner0(m_coords[0][n[0]], m_coords[1][n[1]], m_coords[2][n[2]]); Vector corner1(m_coords[0][n[0]+1], m_coords[1][n[1]+1], m_coords[2][n[2]+1]); const Vector diff = point-corner0; const Vector size = corner1-corner0; const Index nvert = 8; std::vector<Index> indices((mode==Linear || mode==Mean) ? nvert : 1); std::vector<Scalar> weights((mode==Linear || mode==Mean) ? nvert : 1); if (mode == Mean) { const Scalar w = Scalar(1)/nvert; for (Index i=0; i<nvert; ++i) { indices[i] = cl[i]; weights[i] = w; } } else if (mode == Linear) { vassert(nvert == 8); for (Index i=0; i<nvert; ++i) { indices[i] = cl[i]; } Vector ss = diff; for (int c=0; c<3; ++c) { ss[c] /= size[c]; } weights[0] = (1-ss[0])*(1-ss[1])*(1-ss[2]); weights[1] = ss[0]*(1-ss[1])*(1-ss[2]); weights[2] = ss[0]*ss[1]*(1-ss[2]); weights[3] = (1-ss[0])*ss[1]*(1-ss[2]); weights[4] = (1-ss[0])*(1-ss[1])*ss[2]; weights[5] = ss[0]*(1-ss[1])*ss[2]; weights[6] = ss[0]*ss[1]*ss[2]; weights[7] = (1-ss[0])*ss[1]*ss[2]; } else { weights[0] = 1; if (mode == First) { indices[0] = cl[0]; } else if(mode == Nearest) { Vector ss = diff; int nearest=0; for (int c=0; c<3; ++c) { nearest <<= 1; ss[c] /= size[c]; if (ss[c] < 0.5) nearest |= 1; } indices[0] = cl[nearest]; } } return Interpolator(weights, indices); }
int Dmc_method::calcBranch() { int totwalkers=mpi_info.nprocs*nconfig; Array1 <doublevar> weights(totwalkers); Array1 <doublevar> my_weights(nconfig); for(int walker=0; walker < nconfig; walker++) my_weights(walker)=pts(walker).weight; #ifdef USE_MPI MPI_Allgather(my_weights.v,nconfig, MPI_DOUBLE, weights.v,nconfig,MPI_DOUBLE, MPI_Comm_grp); #else weights=my_weights; #endif Array1 <int> my_branch(nconfig); Array1 <int> nwalkers(mpi_info.nprocs); nwalkers=0; int root=0; if(mpi_info.node==root) { //this if/else clause may be refactored out Array1 <int> branch(totwalkers); //----Find which walkers branch/die //we do it on one node since otherwise we'll have different random numbers! //we'll assign the weight for each copy that will be produced //this is the core of the branching algorithm.. //my homegrown algo, based on Umrigar, Nightingale, and Runge branch=-1; long int time_a=clock(); match_walkers(weights,branch); long int time_b=clock(); single_write(cout,"matching: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n"); for(int w=0; w< totwalkers; w++) { if(branch(w)==-1) branch(w)=1; } //----end homegrown algo //count how many walkers each node will have //without balancing int walk=0; for(int n=0; n< mpi_info.nprocs; n++) { for(int i=0; i< nconfig; i++) { nwalkers(n)+=branch(walk); walk++; } //cout << "nwalkers " << n << " " << nwalkers(n) << endl; } //now send nwalkers and which to branch to all processors for(int i=0; i< nconfig; i++) { my_branch(i)=branch(i); my_weights(i)=weights(i); } time_a=clock(); #ifdef USE_MPI MPI_Bcast(nwalkers.v, mpi_info.nprocs, MPI_INT, mpi_info.node, MPI_Comm_grp); for(int i=1; i< mpi_info.nprocs; i++) { MPI_Send(branch.v+i*nconfig,nconfig,MPI_INT,i,0,MPI_Comm_grp); MPI_Send(weights.v+i*nconfig, nconfig, MPI_DOUBLE, i,0,MPI_Comm_grp); } #endif time_b=clock(); single_write(cout,"sending branch: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n"); } else { #ifdef USE_MPI MPI_Bcast(nwalkers.v, mpi_info.nprocs, MPI_INT, root, MPI_Comm_grp); MPI_Status status; MPI_Recv(my_branch.v,nconfig, MPI_INT,root,0,MPI_Comm_grp, &status); MPI_Recv(my_weights.v,nconfig, MPI_DOUBLE,root,0,MPI_Comm_grp, &status); #endif } //--end if/else clause long int time_a=clock(); for(int i=0; i< nconfig; i++) { pts(i).weight=my_weights(i); } //Now we all have my_branch and nwalkers..we need to figure out who //needs to send walkers to whom--after this, nwalkers should be a flat array equal to //nconfig(so don't try to use it for anything useful afterwards) vector <Queue_element> send_queue; int curr_needs_walker=0; int nnwalkers=nwalkers(mpi_info.node); //remember how many total we should have for(int i=0; i< mpi_info.nprocs; i++) { while(nwalkers(i) > nconfig) { if(nwalkers(curr_needs_walker) < nconfig) { nwalkers(curr_needs_walker)++; nwalkers(i)--; send_queue.push_back(Queue_element(i,curr_needs_walker)); //cout << mpi_info.node << ":nwalkers " << nwalkers(i) << " " << nwalkers(curr_needs_walker) << endl; //cout << mpi_info.node << ":send " << i << " " << curr_needs_walker << endl; } else { curr_needs_walker++; } } } for(int i=0; i< mpi_info.nprocs; i++) assert(nwalkers(i)==nconfig); int killsize=0; for(int i=0; i< nconfig; i++) { //cout << mpi_info.node << ":branch " << i << " " << my_branch(i) << " weight " << pts(i).weight << endl; if(my_branch(i)==0) killsize++; } //cout << mpi_info.node << ": send queue= " << send_queue.size() << endl; //now do branching for the walkers that we get to keep Array1 <Dmc_point> savepts=pts; int curr=0; //what walker we're currently copying from int curr_copy=0; //what walker we're currently copying to while(curr_copy < min(nnwalkers,nconfig)) { if(my_branch(curr)>0) { //cout << mpi_info.node << ": copying " << curr << " to " << curr_copy << " branch " << my_branch(curr) << endl; my_branch(curr)--; pts(curr_copy)=savepts(curr); //pts(curr_copy).weight=1; curr_copy++; } else curr++; } long int time_b=clock(); single_write(cout,"Finding out where to send: ",double(time_b-time_a)/CLOCKS_PER_SEC,"\n"); time_a=clock(); //Finally, send or receive spillover walkers if(nnwalkers > nconfig) { vector<Queue_element>::iterator queue_pos=send_queue.begin(); while(curr < nconfig) { if(my_branch(curr) > 0) { my_branch(curr)--; while(queue_pos->from_node != mpi_info.node) { queue_pos++; } //cout << mpi_info.node << ":curr " << curr << " my_branch " << my_branch(curr) << endl; //cout << mpi_info.node << ":sending " << queue_pos->from_node << " to " << queue_pos->to_node << endl; savepts(curr).mpiSend(queue_pos->to_node); queue_pos++; } else curr++; } } else { //if nnwalkers == nconfig, then this will just get skipped immediately vector <Queue_element>::iterator queue_pos=send_queue.begin(); while(curr_copy < nconfig) { while(queue_pos->to_node != mpi_info.node) queue_pos++; //cout << mpi_info.node <<":receiving from " << queue_pos->from_node << " to " << curr_copy << endl; pts(curr_copy).mpiReceive(queue_pos->from_node); //pts(curr_copy).weight=1; curr_copy++; queue_pos++; } } time_b=clock(); single_write(cout,"sending walkers:",double(time_b-time_a)/CLOCKS_PER_SEC,"\n"); return killsize; //exit(0); }
int DispBeamColumn2d::getResponse(int responseID, Information &eleInfo) { double V; double L = crdTransf->getInitialLength(); if (responseID == 1) return eleInfo.setVector(this->getResistingForce()); else if (responseID == 12) { P.Zero(); P.addVector(1.0, this->getRayleighDampingForces(), 1.0); return eleInfo.setVector(P); } else if (responseID == 2) { P(3) = q(0); P(0) = -q(0)+p0[0]; P(2) = q(1); P(5) = q(2); V = (q(1)+q(2))/L; P(1) = V+p0[1]; P(4) = -V+p0[2]; return eleInfo.setVector(P); } else if (responseID == 9) { return eleInfo.setVector(q); } else if (responseID == 19) { static Matrix kb(3,3); this->getBasicStiff(kb); return eleInfo.setMatrix(kb); } // Chord rotation else if (responseID == 3) { return eleInfo.setVector(crdTransf->getBasicTrialDisp()); } // Plastic rotation else if (responseID == 4) { static Vector vp(3); static Vector ve(3); const Matrix &kb = this->getInitialBasicStiff(); kb.Solve(q, ve); vp = crdTransf->getBasicTrialDisp(); vp -= ve; return eleInfo.setVector(vp); } // Curvature sensitivity else if (responseID == 5) { /* Vector curv(numSections); const Vector &v = crdTransf->getBasicDispGradient(1); double L = crdTransf->getInitialLength(); double oneOverL = 1.0/L; //const Matrix &pts = quadRule.getIntegrPointCoords(numSections); double pts[2]; pts[0] = 0.0; pts[1] = 1.0; // Loop over the integration points for (int i = 0; i < numSections; i++) { int order = theSections[i]->getOrder(); const ID &code = theSections[i]->getType(); //double xi6 = 6.0*pts(i,0); double xi6 = 6.0*pts[i]; curv(i) = oneOverL*((xi6-4.0)*v(1) + (xi6-2.0)*v(2)); } return eleInfo.setVector(curv); */ Vector curv(numSections); /* // Loop over the integration points for (int i = 0; i < numSections; i++) { int order = theSections[i]->getOrder(); const ID &code = theSections[i]->getType(); const Vector &dedh = theSections[i]->getdedh(); for (int j = 0; j < order; j++) { if (code(j) == SECTION_RESPONSE_MZ) curv(i) = dedh(j); } } */ return eleInfo.setVector(curv); } // Basic deformation sensitivity else if (responseID == 6) { const Vector &dvdh = crdTransf->getBasicDisplSensitivity(1); return eleInfo.setVector(dvdh); } else if (responseID == 7) { //const Matrix &pts = quadRule.getIntegrPointCoords(numSections); double xi[maxNumSections]; beamInt->getSectionLocations(numSections, L, xi); Vector locs(numSections); for (int i = 0; i < numSections; i++) locs(i) = xi[i]*L; return eleInfo.setVector(locs); } else if (responseID == 8) { //const Vector &wts = quadRule.getIntegrPointWeights(numSections); double wt[maxNumSections]; beamInt->getSectionWeights(numSections, L, wt); Vector weights(numSections); for (int i = 0; i < numSections; i++) weights(i) = wt[i]*L; return eleInfo.setVector(weights); } else return Element::getResponse(responseID, eleInfo); }
void withoutReplacementImpl(withoutReplacementArgs& args) { boost::numeric::ublas::matrix<double>& matrix = args.matrix; int n = args.n; int seed = args.seed; double alpha = args.alpha; if(matrix.size1() != matrix.size2()) { throw std::runtime_error("Matrix must be square"); } if(n < 1) { throw std::runtime_error("Input n must be at least 1"); } int dimension = matrix.size1(); boost::mt19937 randomSource; randomSource.seed(seed); std::vector<double> columnSums(dimension,0); for(int row = 0; row < dimension; row++) { for(int column = 0; column < dimension; column++) { columnSums[column] += std::fabs(matrix(row, column)); } } std::vector<double> currentColumnSums(dimension), newCurrentColumnSums; std::vector<int> availableColumns(dimension), newAvailableColumns; std::vector<int> availableRows(dimension), newAvailableRows; std::vector<int> previousEntry(1), newPreviousEntry; std::vector<bool> usedRows(dimension), newUsedRows; std::vector<mpfr_class> weights(1, 1), newWeights; //Get out the choices at the start. std::vector<possibility> possibilities; for(int i = 0; i < dimension; i++) { for(int j = 0; j < dimension; j++) { possibility nextPos; nextPos.parent = 0; nextPos.previousEntry = i; nextPos.newEntry = j; possibilities.push_back(nextPos); } } //These choices are all derived from a single particle at the start std::copy(columnSums.begin(), columnSums.end(), currentColumnSums.begin()); for(int i = 0; i < dimension; i++) { availableColumns[i] = i; availableRows[i] = i; } std::fill(usedRows.begin(), usedRows.end(), false); sampling::sampfordFromParetoNaiveArgs samplingArgs; samplingArgs.n = n; samplingArgs.weights.resize(dimension*dimension); for(int i = 0; i < (int)possibilities.size(); i++) { possibility& pos = possibilities[i]; if(pos.previousEntry == pos.newEntry) { samplingArgs.weights[i] = alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)) / (dimension*(dimension - 1)); } else { samplingArgs.weights[i] = std::fabs(matrix(pos.previousEntry, pos.newEntry)) / dimension; } } int currentSamples = 1; for(int permutationCounter = 0; permutationCounter < dimension; permutationCounter++) { //Draw sample if((int)possibilities.size() <= n) { newCurrentColumnSums.resize(possibilities.size()*dimension); newAvailableColumns.resize(possibilities.size()*(dimension - permutationCounter-1)); newAvailableRows.resize(possibilities.size()*(dimension - permutationCounter-1)); newUsedRows.resize(possibilities.size()*dimension); newPreviousEntry.resize(possibilities.size()); int newAvailableColumnsIndex = 0; int newAvailableRowsIndex = 0; for(int i = 0; i < (int)possibilities.size(); i++) { possibility& pos = possibilities[i]; std::copy(currentColumnSums.begin() + pos.parent * dimension, currentColumnSums.begin() + (pos.parent + 1) * dimension, newCurrentColumnSums.begin() + dimension * i); newCurrentColumnSums[dimension*i + pos.newEntry] -= std::fabs(matrix(pos.previousEntry, pos.newEntry)); std::copy(usedRows.begin() + pos.parent*dimension, usedRows.begin() + (pos.parent + 1) * dimension, newUsedRows.begin() + dimension*i); newUsedRows[dimension*i + pos.previousEntry] = true; for(int j = 0; j < dimension - permutationCounter; j++) { if(availableColumns[j + pos.parent*(dimension - permutationCounter)] != pos.newEntry) { newAvailableColumns.at(newAvailableColumnsIndex) = availableColumns.at(j + pos.parent*(dimension - permutationCounter)); newAvailableColumnsIndex++; } if(availableRows[j + pos.parent*(dimension - permutationCounter)] != pos.previousEntry) { newAvailableRows.at(newAvailableRowsIndex) = availableRows.at(j + pos.parent*(dimension - permutationCounter)); newAvailableRowsIndex++; } } if(newUsedRows[i*dimension + pos.newEntry]) { newPreviousEntry[i] = -1; } else { newPreviousEntry[i] = pos.newEntry; } } newWeights.swap(samplingArgs.weights); currentSamples = possibilities.size(); } else { sampfordFromParetoNaive(samplingArgs, randomSource); newCurrentColumnSums.resize(n*dimension); newAvailableColumns.resize(n*(dimension - permutationCounter-1)); newAvailableRows.resize(n*(dimension - permutationCounter-1)); newUsedRows.resize(n*dimension); newWeights.resize(n); newPreviousEntry.resize(n); int newAvailableColumnsIndex = 0; int newAvailableRowsIndex = 0; for(int i = 0; i < n; i++) { possibility& pos = possibilities[samplingArgs.indices[i]]; std::copy(currentColumnSums.begin() + pos.parent * dimension, currentColumnSums.begin() + (pos.parent + 1) * dimension, newCurrentColumnSums.begin() + dimension * i); newCurrentColumnSums[dimension*i + pos.newEntry] -= std::fabs(matrix(pos.previousEntry, pos.newEntry)); std::copy(usedRows.begin() + pos.parent*dimension, usedRows.begin() + (pos.parent + 1) * dimension, newUsedRows.begin() + dimension*i); newUsedRows[dimension*i + pos.previousEntry] = true; for(int j = 0; j < dimension - permutationCounter; j++) { if(availableColumns[j + pos.parent*(dimension - permutationCounter)] != pos.newEntry) { newAvailableColumns.at(newAvailableColumnsIndex) = availableColumns.at(j + pos.parent*(dimension - permutationCounter)); newAvailableColumnsIndex++; } if(availableRows[j + pos.parent*(dimension - permutationCounter)] != pos.previousEntry) { newAvailableRows.at(newAvailableRowsIndex) = availableRows.at(j + pos.parent*(dimension - permutationCounter)); newAvailableRowsIndex++; } } if(newUsedRows[i*dimension + pos.newEntry]) { newPreviousEntry[i] = -1; } else { newPreviousEntry[i] = pos.newEntry; } newWeights[i] = samplingArgs.weights[samplingArgs.indices[i]] / samplingArgs.rescaledWeights[samplingArgs.indices[i]]; } currentSamples = n; } previousEntry.swap(newPreviousEntry); currentColumnSums.swap(newCurrentColumnSums); availableColumns.swap(newAvailableColumns); availableRows.swap(newAvailableRows); usedRows.swap(newUsedRows); weights.swap(newWeights); //If we're not at the end, get out the list of possibilities and also weights if(permutationCounter != dimension - 1) { possibilities.clear(); samplingArgs.weights.clear(); for(int i = 0; i < currentSamples; i++) { if(previousEntry[i] == -1) { for(int j = 0; j < dimension - permutationCounter - 1; j++) { for(int k = 0; k < dimension - permutationCounter - 1; k++) { possibility pos; pos.parent = i; pos.previousEntry = availableRows[(dimension - permutationCounter - 1) * i + j]; pos.newEntry = availableColumns[(dimension - permutationCounter - 1) * i + k]; possibilities.push_back(pos); if(j == k || usedRows[pos.newEntry + dimension * i]) { if(dimension - 2 != permutationCounter) { samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry))/ (dimension - permutationCounter - 2)); } else { samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry))); } } else { samplingArgs.weights.push_back(weights[i] * std::fabs(matrix(pos.previousEntry, pos.newEntry))); } } } } else { for(int j = 0; j < dimension - permutationCounter - 1; j++) { possibility pos; pos.parent = i; pos.previousEntry = previousEntry[i]; pos.newEntry = availableColumns[(dimension - permutationCounter - 1) * i + j]; possibilities.push_back(pos); if(j == previousEntry[i] || usedRows[pos.newEntry + dimension * i]) { if(dimension - 2 != permutationCounter) { samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry)) / (dimension - permutationCounter - 2)); } else { samplingArgs.weights.push_back(weights[i] * alpha * std::fabs(matrix(pos.previousEntry, pos.newEntry))); } } else { samplingArgs.weights.push_back(weights[i] * std::fabs(matrix(pos.previousEntry, pos.newEntry))); } } } } } } args.estimate = 0; for(int i = 0; i < (int)weights.size(); i++) { args.estimate += weights[i]; } }
int main(int argc, char** argv) { try { util::ProgramOptions::init(argc, argv); logger::LogManager::init(); Hdf5CragStore cragStore(optionProjectFile.as<std::string>()); Hdf5VolumeStore volumeStore(optionProjectFile.as<std::string>()); Crag crag; cragStore.retrieveCrag(crag); NodeFeatures nodeFeatures(crag); EdgeFeatures edgeFeatures(crag); LOG_USER(logger::out) << "reading features" << std::endl; cragStore.retrieveNodeFeatures(crag, nodeFeatures); cragStore.retrieveEdgeFeatures(crag, edgeFeatures); BundleOptimizer::Parameters parameters; parameters.lambda = optionRegularizerWeight; parameters.epsStrategy = BundleOptimizer::EpsFromGap; BundleOptimizer optimizer(parameters); BestEffort* bestEffort = 0; OverlapLoss* overlapLoss = 0; if (optionBestEffortFromProjectFile) { LOG_USER(logger::out) << "reading best-effort" << std::endl; bestEffort = new BestEffort(crag); vigra::HDF5File project( optionProjectFile.as<std::string>(), vigra::HDF5File::OpenMode::ReadWrite); project.cd("best_effort"); vigra::ArrayVector<int> beNodes; vigra::MultiArray<2, int> beEdges; project.readAndResize("nodes", beNodes); project.readAndResize("edges", beEdges); std::set<int> nodes; for (int n : beNodes) nodes.insert(n); std::set<std::pair<int, int>> edges; for (int i = 0; i < beEdges.shape(1); i++) edges.insert( std::make_pair( std::min(beEdges(i, 0), beEdges(i, 1)), std::max(beEdges(i, 0), beEdges(i, 1)))); for (Crag::NodeIt n(crag); n != lemon::INVALID; ++n) bestEffort->node[n] = nodes.count(crag.id(n)); for (Crag::EdgeIt e(crag); e != lemon::INVALID; ++e) bestEffort->edge[e] = edges.count( std::make_pair( std::min(crag.id(crag.u(e)), crag.id(crag.v(e))), std::max(crag.id(crag.u(e)), crag.id(crag.v(e))))); } else { LOG_USER(logger::out) << "reading ground-truth" << std::endl; ExplicitVolume<int> groundTruth; volumeStore.retrieveGroundTruth(groundTruth); LOG_USER(logger::out) << "finding best-effort solution" << std::endl; overlapLoss = new OverlapLoss(crag, groundTruth); bestEffort = new BestEffort(crag, *overlapLoss); } Loss* loss = 0; bool destructLoss = false; if (optionLoss.as<std::string>() == "hamming") { LOG_USER(logger::out) << "using Hamming loss" << std::endl; loss = new HammingLoss(crag, *bestEffort); destructLoss = true; } else if (optionLoss.as<std::string>() == "overlap") { LOG_USER(logger::out) << "using overlap loss" << std::endl; if (!overlapLoss) { LOG_USER(logger::out) << "reading ground-truth" << std::endl; ExplicitVolume<int> groundTruth; volumeStore.retrieveGroundTruth(groundTruth); LOG_USER(logger::out) << "finding best-effort solution" << std::endl; overlapLoss = new OverlapLoss(crag, groundTruth); } loss = overlapLoss; } else { LOG_ERROR(logger::out) << "unknown loss: " << optionLoss.as<std::string>() << std::endl; return 1; } if (optionNormalizeLoss) { LOG_USER(logger::out) << "normalizing loss..." << std::endl; loss->normalize(crag, MultiCut::Parameters()); } Oracle oracle( crag, nodeFeatures, edgeFeatures, *loss, *bestEffort); std::vector<double> weights(nodeFeatures.dims() + edgeFeatures.dims(), 0); optimizer.optimize(oracle, weights); storeVector(weights, optionFeatureWeights); if (destructLoss && loss != 0) delete loss; if (overlapLoss) delete overlapLoss; if (bestEffort) delete bestEffort; } catch (boost::exception& e) { handleException(e, std::cerr); } }
void QCAD::ResponseFieldIntegral<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { // Zero out local response for (typename PHX::MDField<ScalarT>::size_type i=0; i<this->local_response.size(); i++) this->local_response[i] = 0.0; typename std::vector<PHX::MDField<ScalarT,Cell,QuadPoint> >::const_iterator it; if(opRegion->elementBlockIsInRegion(workset.EBName)) { ScalarT term, val; //, dbI = 0.0; std::size_t n, max, nExtraMinuses, nOneBits, nBits = fields.size(); //DEBUG //std::size_t nContrib1 = 0, nContrib2 = 0; //ScalarT dbMaxRe[10], dbMaxIm[10]; //for(std::size_t i=0; i<10; i++) dbMaxRe[i] = dbMaxIm[i] = 0.0; for (std::size_t cell=0; cell < workset.numCells; ++cell) { if(!opRegion->cellIsInRegion(cell)) continue; for (std::size_t qp=0; qp < numQPs; ++qp) { val = 0.0; //Loop over all possible combinations of Re/Im parts which form product terms and // add the relevant ones (depending on whether we're returning the overall real or // imaginary part of the integral) to get the integrand value for this (cell,qp). // We do this by mapping the Re/Im choice onto a string of N bits, where N is the // number of fields being multiplied together. (0 = RePart, 1 = ImPart) //nContrib1++; //DEBUG //for(it = fields.begin(); it != fields.end(); ++it) max = (std::size_t)std::pow(2.,(int)nBits); // max = pow(2.0,static_cast<int>(nBits)); for(std::size_t i=0; i<max; i++) { // Count the number of 1 bits, and exit early if // there's a 1 bit for a field that is not complex nOneBits = nExtraMinuses = 0; for(n=0; n<nBits; n++) { if( (0x1 << n) & i ) { // if n-th bit of i is set (use Im part of n-th field) if(!fieldIsComplex[n]) break; if(conjugateFieldFlag[n]) nExtraMinuses++; nOneBits++; } } if(n < nBits) continue; // we exited early, signaling this product can't contribute //check if this combination of Re/Im parts contributes to the overall Re or Im part we return if( (bReturnImagPart && nOneBits % 2) || (!bReturnImagPart && nOneBits % 2 == 0)) { term = (nOneBits % 4 >= 2) ? -1.0 : 1.0; //apply minus sign if nOneBits % 4 == 2 (-1) or == 3 (-i) if(nExtraMinuses % 2) term *= -1.0; //apply minus sign due to conjugations //nContrib2++; //multiply fields together for(std::size_t m=0; m<nBits; m++) { if( (0x1 << m) & i ) { term *= fields_Imag[m](cell,qp); //if( abs(fields_Imag[m](cell,qp)) > dbMaxIm[m]) dbMaxIm[m] = abs(fields_Imag[m](cell,qp)); } else { term *= fields[m](cell,qp); //if( abs(fields[m](cell,qp)) > dbMaxRe[m]) dbMaxRe[m] = abs(fields[m](cell,qp)); } } val += term; //add term to overall integrand } } val *= weights(cell,qp) * scaling; //multiply integrand by volume //dbI += val; //DEBUG this->local_response(cell) += val; this->global_response(0) += val; } } //DEBUG /*if(fieldNames.size() > 1) { std::cout << "DB: " << (bReturnImagPart == true ? "Im" : "Re") << " Field Integral - int("; for(std::size_t i=0; i<fieldNames.size(); i++) std::cout << fieldNames[i] << "," << (conjugateFieldFlag[i] ? "-" : "") << (fieldIsComplex[i] ? fieldNames_Imag[i] : "X") << " * "; std::cout << " dV) -- I += " << dbI << " (ebName = " << workset.EBName << " contrib1=" << nContrib1 << " contrib2=" << nContrib2 << ")" << std::endl; std::cout << "DB MAX of Fields Re: " << dbMaxRe[0] << "," << dbMaxRe[1] << "," << dbMaxRe[2] << "," << dbMaxRe[3] << std::endl; std::cout << "DB MAX of Fields Im: " << dbMaxIm[0] << "," << dbMaxIm[1] << "," << dbMaxIm[2] << "," << dbMaxIm[3] << std::endl; }*/ } // Do any local-scattering necessary PHAL::SeparableScatterScalarResponse<EvalT,Traits>::evaluateFields(workset); }
void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD( float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr, const memory_tracking::grantor_t &scratchpad) const { const auto &jcp = kernel_->jcp; const auto &p_ops = attr_->post_ops_; const int inph = is_fwd ? jcp.ih : jcp.oh; const int inpw = is_fwd ? jcp.iw : jcp.ow; const int outh = is_fwd ? jcp.oh : jcp.ih; const int outw = is_fwd ? jcp.ow : jcp.iw; array_offset_calculator<float, 5> input(inp_ptr, jcp.mb, jcp.dimK/jcp.dimK_reg_block, inph, inpw, jcp.dimK_reg_block); array_offset_calculator<float, 5> output(out_ptr, jcp.mb, jcp.dimM/jcp.dimM_simd_block, outh, outw, jcp.dimM_simd_block); array_offset_calculator<float, 6> weights(wei_ptr, jcp.oc/jcp.oc_simd_block, jcp.ic/jcp.ic_simd_block, jcp.kh, jcp.kw, jcp.ic_simd_block, jcp.oc_simd_block); array_offset_calculator<float, 2> bias(bias_ptr, jcp.oc/jcp.oc_simd_block, jcp.oc_simd_block); auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference) ? wei_ptr : scratchpad.template get<float>(key_wino_U); array_offset_calculator<float, 8> U(wino_wei, jcp.dimM_nb_block, alpha, alpha, jcp.dimK_nb_block, jcp.dimM_block * jcp.dimM_reg_block, jcp.dimK_block, jcp.dimK_reg_block, jcp.dimM_simd_block); array_offset_calculator<float, 8> M(is_fwd ? scratchpad.template get<float>(key_wino_M) : scratchpad.template get<float>(key_wino_V), 0, jcp.dimM_nb_block, alpha, alpha, jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block, jcp.dimN_reg_block, jcp.dimM_simd_block); array_offset_calculator<float, 8> V(is_fwd ? scratchpad.template get<float>(key_wino_V) : scratchpad.template get<float>(key_wino_M), 0, alpha, alpha, jcp.dimN_block, jcp.dimK_nb_block, jcp.dimK_block, jcp.dimN_reg_block, jcp.dimK_reg_block); const bool wants_padded_bias = jcp.with_bias && jcp.oc_without_padding != jcp.oc; float last_slice_bias[simd_w] = {0}; if (wants_padded_bias) { for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc) last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc); } if (jcp.prop_kind != prop_kind::forward_inference) { parallel_nd(jcp.nb_oc, jcp.nb_ic, (jcp.oc_block * jcp.oc_reg_block), (jcp.ic_block * jcp.ic_reg_block), [&](int ofm1, int ifm1, int ofm2, int ifm2) { float *U_base_ptr = is_fwd ? &(U(ofm1, 0, 0, ifm1, ofm2, ifm2, 0, 0)) : &(U(ifm1, 0, 0, ofm1, ifm2, ofm2, 0, 0)); weight_transform_data(jcp, &(weights( ofm1 * jcp.oc_block * jcp.oc_reg_block + ofm2, ifm1 * jcp.ic_block * jcp.ic_reg_block + ifm2, 0, 0, 0, 0)), U_base_ptr); }); } PRAGMA_OMP(parallel) { int ithr = mkldnn_get_thread_num(); PRAGMA_OMP(for schedule(static)) for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) { for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) { for (int K_blk2 = 0; K_blk2 < jcp.dimK_block; K_blk2++) { input_transform_tileblock_data( tile_block, jcp, &(input(0, K_blk1 * jcp.dimK_block + K_blk2, 0, 0, 0)), &(V(ithr, 0, 0, 0, K_blk1, K_blk2, 0, 0))); } } for (int oj = 0; oj < alpha; oj++) { for (int oi = 0; oi < alpha; oi++) { for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++) for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) for (int N_blk = 0; N_blk < jcp.dimN_block; N_blk++) kernel_->gemm_loop_ker( (float *)&(M(ithr, M_blk1, oj, oi, N_blk, 0, 0, 0)), (const float *)&(U(M_blk1, oj, oi, K_blk1, 0, 0, 0, 0)), (const float *)&(V(ithr, oj, oi, N_blk, K_blk1, 0, 0, 0)), K_blk1); } } for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++) { for (int M_blk2 = 0; M_blk2 < jcp.dimM_block * jcp.dimM_reg_block; M_blk2++) { const int M_blk = M_blk1 * jcp.dimM_block * jcp.dimM_reg_block + M_blk2; float *bias_ptr = wants_padded_bias && M_blk == jcp.dimM / jcp.dimM_simd_block - 1 ? last_slice_bias : &bias(M_blk, 0); output_transform_tileblock_data(tile_block, jcp, p_ops, &(M(ithr, M_blk1, 0, 0, 0, M_blk2, 0, 0)), &(output(0, M_blk, 0, 0, 0)), bias_ptr); } } } } }
void vtkMitkThickSlicesFilterExecute(vtkMitkThickSlicesFilter *self, vtkImageData *inData, T *inPtr, vtkImageData *outData, T *outPtr, int outExt[6], int /*id*/) { int idxX, idxY; int maxX, maxY; vtkIdType inIncX, inIncY, inIncZ; vtkIdType outIncX, outIncY, outIncZ; //int axesNum; int *inExt = inData->GetExtent(); int *wholeExtent; vtkIdType *inIncs; //int useYMin, useYMax, useXMin, useXMax; // find the region to loop over maxX = outExt[1] - outExt[0]; maxY = outExt[3] - outExt[2]; // maxZ = outExt[5] - outExt[4]; // Get the dimensionality of the gradient. //axesNum = self->GetDimensionality(); // Get increments to march through data inData->GetContinuousIncrements(outExt, inIncX, inIncY, inIncZ); outData->GetContinuousIncrements(outExt, outIncX, outIncY, outIncZ); /* // The data spacing is important for computing the gradient. // central differences (2 * ratio). // Negative because below we have (min - max) for dx ... inData->GetSpacing(r); r[0] = -0.5 / r[0]; r[1] = -0.5 / r[1]; r[2] = -0.5 / r[2]; */ // get some other info we need inIncs = inData->GetIncrements(); wholeExtent = inData->GetExtent(); // Move the pointer to the correct starting position. inPtr += (outExt[0]-inExt[0])*inIncs[0] + (outExt[2]-inExt[2])*inIncs[1] + (outExt[4]-inExt[4])*inIncs[2]; // Loop through ouput pixels int _minZ = /*-5 + outExt[4]; if( _minZ < wholeExtent[4]) _minZ=*/wholeExtent[4]; int _maxZ = /* 5 + outExt[4]; if( _maxZ > wholeExtent[5]) _maxZ=*/wholeExtent[5]; if(_maxZ<_minZ) return; double invNum = 1.0 / (_maxZ-_minZ+1) ; switch(self->GetThickSliceMode()) { default: case vtkMitkThickSlicesFilter::MIP: { //MIP for (idxY = 0; idxY <= maxY; idxY++) { //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1]; //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1]; for (idxX = 0; idxX <= maxX; idxX++) { //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0]; //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0]; T mip = inPtr[_minZ*inIncs[2]]; for(int z = _minZ+1; z<= _maxZ;z++) { T value = inPtr[z*inIncs[2]]; if(value > mip) mip=value; } // do X axis *outPtr = mip; outPtr++; inPtr++; } outPtr += outIncY; inPtr += inIncY; } } break; case vtkMitkThickSlicesFilter::SUM: { //MIP for (idxY = 0; idxY <= maxY; idxY++) { //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1]; //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1]; for (idxX = 0; idxX <= maxX; idxX++) { //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0]; //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0]; double sum = 0; for(int z = _minZ; z<= _maxZ;z++) { T value = inPtr[z*inIncs[2]]; sum += value; } // do X axis *outPtr = static_cast<T>(invNum*sum); outPtr++; inPtr++; } outPtr += outIncY; inPtr += inIncY; } } break; case vtkMitkThickSlicesFilter::WEIGHTED: { const int size = _maxZ-_minZ; std::vector<double> weights(size); double mean = 0.5 * double(_minZ + _maxZ); double sigma_sq = double(size) / 6.0; sigma_sq *= sigma_sq; double sum = 0; int i=0; for(int z = _minZ+1; z<= _maxZ;z++) { double val = exp(-(((double)z-mean)/sigma_sq)); weights[i++] = val; sum += val; } for(i=0; i<size; i++) { weights[i] /= sum; } for (idxY = 0; idxY <= maxY; idxY++) { //useYMin = ((idxY + outExt[2]) <= wholeExtent[2]) ? 0 : -inIncs[1]; //useYMax = ((idxY + outExt[2]) >= wholeExtent[3]) ? 0 : inIncs[1]; for (idxX = 0; idxX <= maxX; idxX++) { //useXMin = ((idxX + outExt[0]) <= wholeExtent[0]) ? 0 : -inIncs[0]; //useXMax = ((idxX + outExt[0]) >= wholeExtent[1]) ? 0 : inIncs[0]; T mip = inPtr[_minZ*inIncs[2]]; i=0; double mymip = 0; for(int z = _minZ+1; z<= _maxZ;z++) { double value = inPtr[z*inIncs[2]]; mymip+=value*weights[i++]; } mip = static_cast<T>(mymip); // do X axis *outPtr = mip; outPtr++; inPtr++; } outPtr += outIncY; inPtr += inIncY; } } break; case vtkMitkThickSlicesFilter::MINIP: { for (idxY = 0; idxY <= maxY; idxY++) { for (idxX = 0; idxX <= maxX; idxX++) { T mip = inPtr[_minZ*inIncs[2]]; for(int z = _minZ+1; z<= _maxZ;z++) { T value = inPtr[z*inIncs[2]]; if(value < mip) mip=value; } // do X axis *outPtr = mip; outPtr++; inPtr++; } outPtr += outIncY; inPtr += inIncY; } } break; case vtkMitkThickSlicesFilter::MEAN: { const int size = _maxZ-_minZ; //MEAN for (idxY = 0; idxY <= maxY; idxY++) { for (idxX = 0; idxX <= maxX; idxX++) { T sum = 0; for(int z = _minZ; z <= _maxZ;z++) { T value = inPtr[z*inIncs[2]]; sum += value; } T mip = sum/size; // do X axis *outPtr = mip; outPtr++; inPtr++; } outPtr += outIncY; inPtr += inIncY; } } break; } }
// The main program int main(int argc, char** argv) { // Initialize libMesh LibMeshInit init(argc, argv); // Parameters GetPot infile("fem_system_params.in"); const Real global_tolerance = infile("global_tolerance", 0.); const unsigned int nelem_target = infile("n_elements", 400); const bool transient = infile("transient", true); const Real deltat = infile("deltat", 0.005); unsigned int n_timesteps = infile("n_timesteps", 1); //const unsigned int coarsegridsize = infile("coarsegridsize", 1); const unsigned int coarserefinements = infile("coarserefinements", 0); const unsigned int max_adaptivesteps = infile("max_adaptivesteps", 10); //const unsigned int dim = 2; #ifdef LIBMESH_HAVE_EXODUS_API const unsigned int write_interval = infile("write_interval", 5); #endif // Create a mesh, with dimension to be overridden later, distributed // across the default MPI communicator. Mesh mesh(init.comm()); GetPot infileForMesh("convdiff_mprime.in"); std::string find_mesh_here = infileForMesh("mesh","psiLF_mesh.xda"); mesh.read(find_mesh_here); std::cout << "Read in mesh from: " << find_mesh_here << "\n\n"; // And an object to refine it MeshRefinement mesh_refinement(mesh); mesh_refinement.coarsen_by_parents() = true; mesh_refinement.absolute_global_tolerance() = global_tolerance; mesh_refinement.nelem_target() = nelem_target; mesh_refinement.refine_fraction() = 0.3; mesh_refinement.coarsen_fraction() = 0.3; mesh_refinement.coarsen_threshold() = 0.1; //mesh_refinement.uniformly_refine(coarserefinements); // Print information about the mesh to the screen. mesh.print_info(); // Create an equation systems object. EquationSystems equation_systems (mesh); // Name system ConvDiff_MprimeSys & system = equation_systems.add_system<ConvDiff_MprimeSys>("Diff_ConvDiff_MprimeSys"); // Steady-state problem system.time_solver = AutoPtr<TimeSolver>(new SteadySolver(system)); // Sanity check that we are indeed solving a steady problem libmesh_assert_equal_to (n_timesteps, 1); // Read in all the equation systems data from the LF solve (system, solutions, rhs, etc) std::string find_psiLF_here = infileForMesh("psiLF_file","psiLF.xda"); std::cout << "Looking for psiLF at: " << find_psiLF_here << "\n\n"; equation_systems.read(find_psiLF_here, READ, EquationSystems::READ_HEADER | EquationSystems::READ_DATA | EquationSystems::READ_ADDITIONAL_DATA); // Check that the norm of the solution read in is what we expect it to be Real readin_L2 = system.calculate_norm(*system.solution, 0, L2); std::cout << "Read in solution norm: "<< readin_L2 << std::endl << std::endl; //DEBUG //equation_systems.write("right_back_out.xda", WRITE, EquationSystems::WRITE_DATA | // EquationSystems::WRITE_ADDITIONAL_DATA); #ifdef LIBMESH_HAVE_GMV //GMVIO(equation_systems.get_mesh()).write_equation_systems(std::string("right_back_out.gmv"), equation_systems); #endif // Initialize the system //equation_systems.init (); //already initialized by read-in // And the nonlinear solver options NewtonSolver *solver = new NewtonSolver(system); system.time_solver->diff_solver() = AutoPtr<DiffSolver>(solver); solver->quiet = infile("solver_quiet", true); solver->verbose = !solver->quiet; solver->max_nonlinear_iterations = infile("max_nonlinear_iterations", 15); solver->relative_step_tolerance = infile("relative_step_tolerance", 1.e-3); solver->relative_residual_tolerance = infile("relative_residual_tolerance", 0.0); solver->absolute_residual_tolerance = infile("absolute_residual_tolerance", 0.0); // And the linear solver options solver->max_linear_iterations = infile("max_linear_iterations", 50000); solver->initial_linear_tolerance = infile("initial_linear_tolerance", 1.e-3); // Print information about the system to the screen. equation_systems.print_info(); // Now we begin the timestep loop to compute the time-accurate // solution of the equations...not that this is transient, but eh, why not... for (unsigned int t_step=0; t_step != n_timesteps; ++t_step) { // A pretty update message std::cout << "\n\nSolving time step " << t_step << ", time = " << system.time << std::endl; // Adaptively solve the timestep unsigned int a_step = 0; for (; a_step != max_adaptivesteps; ++a_step) { // VESTIGIAL for now ('vestigial' eh ? ;) ) std::cout << "\n\n I should be skipped what are you doing here lalalalalalala *!**!*!*!*!*!* \n\n"; system.solve(); system.postprocess(); ErrorVector error; AutoPtr<ErrorEstimator> error_estimator; // To solve to a tolerance in this problem we // need a better estimator than Kelly if (global_tolerance != 0.) { // We can't adapt to both a tolerance and a mesh // size at once libmesh_assert_equal_to (nelem_target, 0); UniformRefinementEstimator *u = new UniformRefinementEstimator; // The lid-driven cavity problem isn't in H1, so // lets estimate L2 error u->error_norm = L2; error_estimator.reset(u); } else { // If we aren't adapting to a tolerance we need a // target mesh size libmesh_assert_greater (nelem_target, 0); // Kelly is a lousy estimator to use for a problem // not in H1 - if we were doing more than a few // timesteps we'd need to turn off or limit the // maximum level of our adaptivity eventually error_estimator.reset(new KellyErrorEstimator); } // Calculate error std::vector<Real> weights(9,1.0); // based on u, v, p, c, their adjoints, and source parameter // Keep the same default norm type. std::vector<FEMNormType> norms(1, error_estimator->error_norm.type(0)); error_estimator->error_norm = SystemNorm(norms, weights); error_estimator->estimate_error(system, error); // Print out status at each adaptive step. Real global_error = error.l2_norm(); std::cout << "Adaptive step " << a_step << ": " << std::endl; if (global_tolerance != 0.) std::cout << "Global_error = " << global_error << std::endl; if (global_tolerance != 0.) std::cout << "Worst element error = " << error.maximum() << ", mean = " << error.mean() << std::endl; if (global_tolerance != 0.) { // If we've reached our desired tolerance, we // don't need any more adaptive steps if (global_error < global_tolerance) break; mesh_refinement.flag_elements_by_error_tolerance(error); } else { // If flag_elements_by_nelem_target returns true, this // should be our last adaptive step. if (mesh_refinement.flag_elements_by_nelem_target(error)) { mesh_refinement.refine_and_coarsen_elements(); equation_systems.reinit(); a_step = max_adaptivesteps; break; } } // Carry out the adaptive mesh refinement/coarsening mesh_refinement.refine_and_coarsen_elements(); equation_systems.reinit(); std::cout << "Refined mesh to " << mesh.n_active_elem() << " active elements and " << equation_systems.n_active_dofs() << " active dofs." << std::endl; } // End loop over adaptive steps // Do one last solve if necessary if (a_step == max_adaptivesteps) { QoISet qois; std::vector<unsigned int> qoi_indices; qoi_indices.push_back(0); qois.add_indices(qoi_indices); qois.set_weight(0, 1.0); system.assemble_qoi_sides = true; //QoI doesn't involve sides std::cout << "\n~*~*~*~*~*~*~*~*~ adjoint solve start ~*~*~*~*~*~*~*~*~\n" << std::endl; std::pair<unsigned int, Real> adjsolve = system.adjoint_solve(); std::cout << "number of iterations to solve adjoint: " << adjsolve.first << std::endl; std::cout << "final residual of adjoint solve: " << adjsolve.second << std::endl; std::cout << "\n~*~*~*~*~*~*~*~*~ adjoint solve end ~*~*~*~*~*~*~*~*~" << std::endl; NumericVector<Number> &dual_solution = system.get_adjoint_solution(0); NumericVector<Number> &primal_solution = *system.solution; primal_solution.swap(dual_solution); ExodusII_IO(mesh).write_timestep("super_adjoint.exo", equation_systems, 1, /* This number indicates how many time steps are being written to the file */ system.time); primal_solution.swap(dual_solution); system.assemble(); //overwrite residual read in from psiLF solve // The total error estimate system.postprocess(); //to compute M_HF(psiLF) and M_LF(psiLF) terms Real QoI_error_estimate = (-0.5*(system.rhs)->dot(dual_solution)) + system.get_MHF_psiLF() - system.get_MLF_psiLF(); std::cout << "\n\n 0.5*M'_HF(psiLF)(superadj): " << std::setprecision(17) << 0.5*(system.rhs)->dot(dual_solution) << "\n"; std::cout << " M_HF(psiLF): " << std::setprecision(17) << system.get_MHF_psiLF() << "\n"; std::cout << " M_LF(psiLF): " << std::setprecision(17) << system.get_MLF_psiLF() << "\n"; std::cout << "\n\n Residual L2 norm: " << system.calculate_norm(*system.rhs, L2) << "\n"; std::cout << " Residual discrete L2 norm: " << system.calculate_norm(*system.rhs, DISCRETE_L2) << "\n"; std::cout << " Super-adjoint L2 norm: " << system.calculate_norm(dual_solution, L2) << "\n"; std::cout << " Super-adjoint discrete L2 norm: " << system.calculate_norm(dual_solution, DISCRETE_L2) << "\n"; std::cout << "\n\n QoI error estimate: " << std::setprecision(17) << QoI_error_estimate << "\n\n"; //DEBUG std::cout << "\n------------ herp derp ------------" << std::endl; //libMesh::out.precision(16); //dual_solution.print(); //system.get_adjoint_rhs().print(); AutoPtr<NumericVector<Number> > adjresid = system.solution->clone(); (system.matrix)->vector_mult(*adjresid,system.get_adjoint_solution(0)); SparseMatrix<Number>& adjmat = *system.matrix; (system.matrix)->get_transpose(adjmat); adjmat.vector_mult(*adjresid,system.get_adjoint_solution(0)); //std::cout << "******************** matrix-superadj product (libmesh) ************************" << std::endl; //adjresid->print(); adjresid->add(-1.0, system.get_adjoint_rhs(0)); //std::cout << "******************** superadjoint system residual (libmesh) ***********************" << std::endl; //adjresid->print(); std::cout << "\n\nadjoint system residual (discrete L2): " << system.calculate_norm(*adjresid,DISCRETE_L2) << std::endl; std::cout << "adjoint system residual (L2, all): " << system.calculate_norm(*adjresid,L2) << std::endl; std::cout << "adjoint system residual (L2, 0): " << system.calculate_norm(*adjresid,0,L2) << std::endl; std::cout << "adjoint system residual (L2, 1): " << system.calculate_norm(*adjresid,1,L2) << std::endl; std::cout << "adjoint system residual (L2, 2): " << system.calculate_norm(*adjresid,2,L2) << std::endl; std::cout << "adjoint system residual (L2, 3): " << system.calculate_norm(*adjresid,3,L2) << std::endl; std::cout << "adjoint system residual (L2, 4): " << system.calculate_norm(*adjresid,4,L2) << std::endl; std::cout << "adjoint system residual (L2, 5): " << system.calculate_norm(*adjresid,5,L2) << std::endl; /* AutoPtr<NumericVector<Number> > sadj_matlab = system.solution->clone(); AutoPtr<NumericVector<Number> > adjresid_matlab = system.solution->clone(); if(FILE *fp=fopen("superadj_matlab.txt","r")){ Real value; int counter = 0; int flag = 1; while(flag != -1){ flag = fscanf(fp,"%lf",&value); if(flag != -1){ sadj_matlab->set(counter, value); counter += 1; } } fclose(fp); } (system.matrix)->vector_mult(*adjresid_matlab,*sadj_matlab); //std::cout << "******************** matrix-superadj product (matlab) ***********************" << std::endl; //adjresid_matlab->print(); adjresid_matlab->add(-1.0, system.get_adjoint_rhs(0)); //std::cout << "******************** superadjoint system residual (matlab) ***********************" << std::endl; //adjresid_matlab->print(); std::cout << "\n\nmatlab import adjoint system residual (discrete L2): " << system.calculate_norm(*adjresid_matlab,DISCRETE_L2) << "\n" << std::endl; */ /* AutoPtr<NumericVector<Number> > sadj_fwd_hack = system.solution->clone(); AutoPtr<NumericVector<Number> > adjresid_fwd_hack = system.solution->clone(); if(FILE *fp=fopen("superadj_forward_hack.txt","r")){ Real value; int counter = 0; int flag = 1; while(flag != -1){ flag = fscanf(fp,"%lf",&value); if(flag != -1){ sadj_fwd_hack->set(counter, value); counter += 1; } } fclose(fp); } (system.matrix)->vector_mult(*adjresid_fwd_hack,*sadj_fwd_hack); //std::cout << "******************** matrix-superadj product (fwd_hack) ***********************" << std::endl; //adjresid_fwd_hack->print(); adjresid_fwd_hack->add(-1.0, system.get_adjoint_rhs(0)); //std::cout << "******************** superadjoint system residual (fwd_hack) ***********************" << std::endl; //adjresid_fwd_hack->print(); std::cout << "\n\nfwd_hack import adjoint system residual (discrete L2): " << system.calculate_norm(*adjresid_fwd_hack,DISCRETE_L2) << "\n" << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 0): " << system.calculate_norm(*adjresid_fwd_hack,0,L2) << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 1): " << system.calculate_norm(*adjresid_fwd_hack,1,L2) << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 2): " << system.calculate_norm(*adjresid_fwd_hack,2,L2) << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 3): " << system.calculate_norm(*adjresid_fwd_hack,3,L2) << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 4): " << system.calculate_norm(*adjresid_fwd_hack,4,L2) << std::endl; std::cout << "fwd_hack adjoint system residual (L2, 5): " << system.calculate_norm(*adjresid_fwd_hack,5,L2) << std::endl; */ //std::cout << "************************ system.matrix ***********************" << std::endl; //system.matrix->print(); std::cout << "\n------------ herp derp ------------" << std::endl; // The cell wise breakdown ErrorVector cell_wise_error; cell_wise_error.resize((system.rhs)->size()); for(unsigned int i = 0; i < (system.rhs)->size() ; i++) { if(i < system.get_mesh().n_elem()) cell_wise_error[i] = fabs(-0.5*((system.rhs)->el(i) * dual_solution(i)) + system.get_MHF_psiLF(i) - system.get_MLF_psiLF(i)); else cell_wise_error[i] = fabs(-0.5*((system.rhs)->el(i) * dual_solution(i))); /*csv from 'save data' from gmv output gives a few values at each node point (value for every element that shares that node), yet paraview display only seems to show one of them -> the value in an element is given at each of the nodes that it has, hence the repetition; what is displayed in paraview is each element's value; even though MHF_psiLF and MLF_psiLF are stored by element this seems to give elemental contributions that agree with if we had taken the superadj-residual dot product by integrating over elements*/ /*at higher mesh resolutions and lower k, weird-looking artifacts start to appear and it no longer agrees with output from manual integration of superadj-residual...*/ } // Plot it std::ostringstream error_gmv; error_gmv << "error.gmv"; cell_wise_error.plot_error(error_gmv.str(), equation_systems.get_mesh()); //alternate element-wise breakdown, outputed as values matched to element centroids; for matlab plotz primal_solution.swap(dual_solution); system.postprocess(1); primal_solution.swap(dual_solution); system.postprocess(2); std::cout << "\n\n -0.5*M'_HF(psiLF)(superadj): " << std::setprecision(17) << system.get_half_adj_weighted_resid() << "\n"; primal_solution.swap(dual_solution); std::string write_error_here = infileForMesh("error_est_output_file", "error_est_breakdown.dat"); std::ofstream output(write_error_here); for(unsigned int i = 0 ; i < system.get_mesh().n_elem(); i++) { Point elem_cent = system.get_mesh().elem(i)->centroid(); if(output.is_open()) { output << elem_cent(0) << " " << elem_cent(1) << " " << fabs(system.get_half_adj_weighted_resid(i) + system.get_MHF_psiLF(i) - system.get_MLF_psiLF(i)) << "\n"; } } output.close(); } // End if at max adaptive steps #ifdef LIBMESH_HAVE_EXODUS_API // Write out this timestep if we're requested to if ((t_step+1)%write_interval == 0) { std::ostringstream file_name; /* // We write the file in the ExodusII format. file_name << "out_" << std::setw(3) << std::setfill('0') << std::right << t_step+1 << ".e"; //this should write out the primal which should be the same as what's read in... ExodusII_IO(mesh).write_timestep(file_name.str(), equation_systems, 1, //number of time steps written to file system.time); */ } #endif // #ifdef LIBMESH_HAVE_EXODUS_API } // All done. return 0; } //end main
void potential_and_gradient(const Eigen::VectorXd& parameters, const Eigen::VectorXd& hyperparameters, View& view, double& potential, Eigen::VectorXd& gradient) { // Loop over layers to calculate weights part of potential, and non-data part of gradient potential = 0; for (size_t layer_idx = 0; layer_idx < count_weights_layers(); layer_idx++) { //potential -= 0.5 * (hyperparameters[layer_idx * 2] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[layer_idx * 2 + 1] * biases(parameters, layer_idx).squaredNorm()); potential -= 0.5 * (hyperparameters[0] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[1] * biases(parameters, layer_idx).squaredNorm()); // TODO: Debugging here! //weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2]).matrix(); //biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2 + 1]).matrix(); weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[0]).matrix(); biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[1]).matrix(); } /*if (std::isnan(gradient[0])) { std::cout << gradient[0] << std::endl; }*/ // Calculate output part of potential and gradient for (size_t data_idx = 0; data_idx < view.size(); data_idx++) { // Get the class label for this observation size_t class_idx = get_nonzero_idx(view.second(data_idx)); // Calculate the output for this sample, and the gradient of the output with respect to the parameters // gradient_and_output(size_t variable_idx, const Eigen::VectorXd& inputs, const Eigen::VectorXd& parameters, Eigen::VectorXd& outputs, Eigen::VectorXd& gradient_vector) /*if (std::isnan(temp_gradient_[0])) { std::cout << temp_gradient_[0] << std::endl; }*/ log_gradient_and_output(class_idx, view.first(data_idx), parameters, outputs(), temp_gradient_); //if (outputs()[class_idx] != 0.) gradient = gradient + temp_gradient_; /*if (std::isnan(temp_gradient_[0])) { std::cout << temp_gradient_[0] << std::endl; } if (std::isnan(gradient[0])) { std::cout << gradient[0] << std::endl; }*/ //if () // NOTE: Does it matter here when -E[theta] = -INF? //potential += log(outputs()[class_idx]); potential += outputs()[class_idx]; } // DEBUG: Check that all entries are finite and not NaN /*if (!std::isfinite(potential)) { if (std::isnan(potential)) std::cout << "NaN: Potential" << std::endl; else if (std::isinf(potential)) std::cout << "INF: Potential" << std::endl; } for (size_t idx = 0; idx < static_cast<size_t>(gradient.size()); idx++) { if (!std::isfinite(gradient[idx])) { if (std::isnan(gradient[idx])) std::cout << "NaN: Gradient[" << idx << "]" << std::endl; else if (std::isinf(gradient[idx])) std::cout << "NaN: Gradient[" << idx << "]" << std::endl; } }*/ }
RooWorkspace* makeInvertedANFit(TTree* tree, float forceSigma=-1, bool constrainMu=false, float forceMu=-1) { RooWorkspace *ws = new RooWorkspace("ws",""); std::vector< TString (*)(TString, RooRealVar&, RooWorkspace&) > bkgPdfList; bkgPdfList.push_back(makeSingleExp); bkgPdfList.push_back(makeDoubleExp); #if DEBUG==0 //bkgPdfList.push_back(makeTripleExp); bkgPdfList.push_back(makeModExp); bkgPdfList.push_back(makeSinglePow); bkgPdfList.push_back(makeDoublePow); bkgPdfList.push_back(makePoly2); bkgPdfList.push_back(makePoly3); #endif RooRealVar mgg("mgg","m_{#gamma#gamma}",103,160,"GeV"); mgg.setBins(38); mgg.setRange("sideband_low", 103,120); mgg.setRange("sideband_high",131,160); mgg.setRange("signal",120,131); RooRealVar MR("MR","",0,3000,"GeV"); MR.setBins(60); RooRealVar Rsq("t1Rsq","",0,1,"GeV"); Rsq.setBins(20); RooRealVar hem1_M("hem1_M","",-1,2000,"GeV"); hem1_M.setBins(40); RooRealVar hem2_M("hem2_M","",-1,2000,"GeV"); hem2_M.setBins(40); RooRealVar ptgg("ptgg","p_{T}^{#gamma#gamma}",0,500,"GeV"); ptgg.setBins(50); RooDataSet data("data","",tree,RooArgSet(mgg,MR,Rsq,hem1_M,hem2_M,ptgg)); RooDataSet* blind_data = (RooDataSet*)data.reduce("mgg<121 || mgg>130"); std::vector<TString> tags; //fit many different background models for(auto func = bkgPdfList.begin(); func != bkgPdfList.end(); func++) { TString tag = (*func)("bonly",mgg,*ws); tags.push_back(tag); ws->pdf("bonly_"+tag+"_ext")->fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high")); RooFitResult* bres = ws->pdf("bonly_"+tag+"_ext")->fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high")); bres->SetName(tag+"_bonly_fitres"); ws->import(*bres); //make blinded fit RooPlot *fmgg_b = mgg.frame(); blind_data->plotOn(fmgg_b,RooFit::Range("sideband_low,sideband_high")); TBox blindBox(121,fmgg_b->GetMinimum()-(fmgg_b->GetMaximum()-fmgg_b->GetMinimum())*0.015,130,fmgg_b->GetMaximum()); blindBox.SetFillColor(kGray); fmgg_b->addObject(&blindBox); ws->pdf("bonly_"+tag+"_ext")->plotOn(fmgg_b,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("sideband_low,sideband_high")); fmgg_b->SetName(tag+"_blinded_frame"); ws->import(*fmgg_b); delete fmgg_b; //set all the parameters constant RooArgSet* vars = ws->pdf("bonly_"+tag)->getVariables(); RooFIter iter = vars->fwdIterator(); RooAbsArg* a; while( (a = iter.next()) ){ if(string(a->GetName()).compare("mgg")==0) continue; static_cast<RooRealVar*>(a)->setConstant(kTRUE); } //make the background portion of the s+b fit (*func)("b",mgg,*ws); RooRealVar sigma(tag+"_s_sigma","",5,0,100); if(forceSigma!=-1) { sigma.setVal(forceSigma); sigma.setConstant(true); } RooRealVar mu(tag+"_s_mu","",126,120,132); if(forceMu!=-1) { mu.setVal(forceMu); mu.setConstant(true); } RooGaussian sig(tag+"_sig_model","",mgg,mu,sigma); RooRealVar Nsig(tag+"_sb_Ns","",5,0,100); RooRealVar Nbkg(tag+"_sb_Nb","",100,0,100000); RooRealVar HiggsMass("HiggsMass","",125.1); RooRealVar HiggsMassError("HiggsMassError","",0.24); RooGaussian HiggsMassConstraint("HiggsMassConstraint","",mu,HiggsMass,HiggsMassError); RooAddPdf fitModel(tag+"_sb_model","",RooArgList( *ws->pdf("b_"+tag), sig ),RooArgList(Nbkg,Nsig)); RooFitResult* sbres; RooAbsReal* nll; if(constrainMu) { fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); } else { fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE)); sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE)); nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE)); } sbres->SetName(tag+"_sb_fitres"); ws->import(*sbres); ws->import(fitModel); RooPlot *fmgg = mgg.frame(); data.plotOn(fmgg); fitModel.plotOn(fmgg); ws->pdf("b_"+tag+"_ext")->plotOn(fmgg,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("Full")); fmgg->SetName(tag+"_frame"); ws->import(*fmgg); delete fmgg; RooMinuit(*nll).migrad(); RooPlot *fNs = Nsig.frame(0,25); fNs->SetName(tag+"_Nsig_pll"); RooAbsReal *pll = nll->createProfile(Nsig); //nll->plotOn(fNs,RooFit::ShiftToZero(),RooFit::LineColor(kRed)); pll->plotOn(fNs); ws->import(*fNs); delete fNs; RooPlot *fmu = mu.frame(125,132); fmu->SetName(tag+"_mu_pll"); RooAbsReal *pll_mu = nll->createProfile(mu); pll_mu->plotOn(fmu); ws->import(*fmu); delete fmu; } RooArgSet weights("weights"); RooArgSet pdfs_bonly("pdfs_bonly"); RooArgSet pdfs_b("pdfs_b"); RooRealVar minAIC("minAIC","",1E10); //compute AIC stuff for(auto t = tags.begin(); t!=tags.end(); t++) { RooAbsPdf *p_bonly = ws->pdf("bonly_"+*t); RooAbsPdf *p_b = ws->pdf("b_"+*t); RooFitResult *sb = (RooFitResult*)ws->obj(*t+"_bonly_fitres"); RooRealVar k(*t+"_b_k","",p_bonly->getParameters(RooArgSet(mgg))->getSize()); RooRealVar nll(*t+"_b_minNll","",sb->minNll()); RooRealVar Npts(*t+"_b_N","",blind_data->sumEntries()); RooFormulaVar AIC(*t+"_b_AIC","2*@0+2*@1+2*@1*(@1+1)/(@2-@1-1)",RooArgSet(nll,k,Npts)); ws->import(AIC); if(AIC.getVal() < minAIC.getVal()) { minAIC.setVal(AIC.getVal()); } //aicExpSum+=TMath::Exp(-0.5*AIC.getVal()); //we will need this precomputed for the next step pdfs_bonly.add(*p_bonly); pdfs_b.add(*p_b); } ws->import(minAIC); //compute the AIC weight float aicExpSum=0; for(auto t = tags.begin(); t!=tags.end(); t++) { RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC"); aicExpSum+=TMath::Exp(-0.5*(AIC->getVal()-minAIC.getVal())); //we will need this precomputed for the next step } std::cout << "aicExpSum: " << aicExpSum << std::endl; for(auto t = tags.begin(); t!=tags.end(); t++) { RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC"); RooRealVar *AICw = new RooRealVar(*t+"_b_AICWeight","",TMath::Exp(-0.5*(AIC->getVal()-minAIC.getVal()))/aicExpSum); if( TMath::IsNaN(AICw->getVal()) ) {AICw->setVal(0);} ws->import(*AICw); std::cout << *t << ": " << AIC->getVal()-minAIC.getVal() << " " << AICw->getVal() << std::endl; weights.add(*AICw); } RooAddPdf bonly_AIC("bonly_AIC","",pdfs_bonly,weights); RooAddPdf b_AIC("b_AIC","",pdfs_b,weights); //b_AIC.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high")); //RooFitResult* bres = b_AIC.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::Range("sideband_low,sideband_high")); //bres->SetName("AIC_b_fitres"); //ws->import(*bres); //make blinded fit RooPlot *fmgg_b = mgg.frame(RooFit::Range("sideband_low,sideband_high")); blind_data->plotOn(fmgg_b,RooFit::Range("sideband_low,sideband_high")); TBox blindBox(121,fmgg_b->GetMinimum()-(fmgg_b->GetMaximum()-fmgg_b->GetMinimum())*0.015,130,fmgg_b->GetMaximum()); blindBox.SetFillColor(kGray); fmgg_b->addObject(&blindBox); bonly_AIC.plotOn(fmgg_b,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("sideband_low,sideband_high")); fmgg_b->SetName("AIC_blinded_frame"); ws->import(*fmgg_b); delete fmgg_b; #if 1 RooRealVar sigma("AIC_s_sigma","",5,0,100); if(forceSigma!=-1) { sigma.setVal(forceSigma); sigma.setConstant(true); } RooRealVar mu("AIC_s_mu","",126,120,132); if(forceMu!=-1) { mu.setVal(forceMu); mu.setConstant(true); } RooGaussian sig("AIC_sig_model","",mgg,mu,sigma); RooRealVar Nsig("AIC_sb_Ns","",5,0,100); RooRealVar Nbkg("AIC_sb_Nb","",100,0,100000); RooRealVar HiggsMass("HiggsMass","",125.1); RooRealVar HiggsMassError("HiggsMassError","",0.24); RooGaussian HiggsMassConstraint("HiggsMassConstraint","",mu,HiggsMass,HiggsMassError); RooAddPdf fitModel("AIC_sb_model","",RooArgList( b_AIC, sig ),RooArgList(Nbkg,Nsig)); RooFitResult* sbres; RooAbsReal *nll; if(constrainMu) { fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE),RooFit::ExternalConstraints(RooArgSet(HiggsMassConstraint))); } else { fitModel.fitTo(data,RooFit::Strategy(0),RooFit::Extended(kTRUE)); sbres = fitModel.fitTo(data,RooFit::Strategy(2),RooFit::Save(kTRUE),RooFit::Extended(kTRUE)); nll = fitModel.createNLL(data,RooFit::NumCPU(4),RooFit::Extended(kTRUE)); } assert(nll!=0); sbres->SetName("AIC_sb_fitres"); ws->import(*sbres); ws->import(fitModel); RooPlot *fmgg = mgg.frame(); data.plotOn(fmgg); fitModel.plotOn(fmgg); ws->pdf("b_AIC")->plotOn(fmgg,RooFit::LineColor(kRed),RooFit::Range("Full"),RooFit::NormRange("Full")); fmgg->SetName("AIC_frame"); ws->import(*fmgg); delete fmgg; RooMinuit(*nll).migrad(); RooPlot *fNs = Nsig.frame(0,25); fNs->SetName("AIC_Nsig_pll"); RooAbsReal *pll = nll->createProfile(Nsig); //nll->plotOn(fNs,RooFit::ShiftToZero(),RooFit::LineColor(kRed)); pll->plotOn(fNs); ws->import(*fNs); delete fNs; RooPlot *fmu = mu.frame(125,132); fmu->SetName("AIC_mu_pll"); RooAbsReal *pll_mu = nll->createProfile(mu); pll_mu->plotOn(fmu); ws->import(*fmu); delete fmu; std::cout << "min AIC: " << minAIC.getVal() << std::endl; for(auto t = tags.begin(); t!=tags.end(); t++) { RooFormulaVar *AIC = (RooFormulaVar*)ws->obj(*t+"_b_AIC"); RooRealVar *AICw = ws->var(*t+"_b_AICWeight"); RooRealVar* k = ws->var(*t+"_b_k"); printf("%s & %0.0f & %0.2f & %0.2f \\\\\n",t->Data(),k->getVal(),AIC->getVal()-minAIC.getVal(),AICw->getVal()); //std::cout << k->getVal() << " " << AIC->getVal()-minAIC.getVal() << " " << AICw->getVal() << std::endl; } #endif return ws; }
bool CollisionModel::sampleForR(int seed, const std::string &link_name, const Eigen::Vector2d &r, Eigen::Vector3d &p, Eigen::Vector4d &q) const { std::map<std::string, LinkCollisionModel >::const_iterator it = link_models_map_.find( link_name ); if (it == link_models_map_.end()) { return false; } const std::vector<Feature > &features = it->second.features_; std::mt19937 gen_(seed); const int n_points = features.size(); std::vector<double > weights(n_points, 0.0); double result_x, result_y, result_z; Eigen::Vector4d result_q; // sample the x coordinate double sum = 0.0; for (int pidx = 0; pidx < n_points; pidx++) { // if (std::fabs(r(0) - features[pidx].pc1) > r_dist_max_ || std::fabs(r(1) - features[pidx].pc2) > r_dist_max_ || features[pidx].weight < 0.0000001) { // weights[pidx] = 0.0; // } // else { weights[pidx] = features[pidx].weight * biVariateIsotropicGaussianKernel(r, Eigen::Vector2d(features[pidx].pc1, features[pidx].pc2), sigma_r_); // } sum += weights[pidx]; } Feature random_kernel; double rr = randomUniform(0.0, sum); for (int pidx = 0; pidx < n_points; pidx++) { rr -= weights[pidx]; if (rr <= 0.0) { random_kernel = features[pidx]; break; } } Eigen::Vector4d mean_q; result_x = random_kernel.T_C_F.p.x(); result_y = random_kernel.T_C_F.p.y(); result_z = random_kernel.T_C_F.p.z(); random_kernel.T_C_F.M.GetQuaternion(mean_q(0), mean_q(1), mean_q(2), mean_q(3)); std::normal_distribution<> d = std::normal_distribution<>(result_x, sigma_p_); result_x = d(gen_); d = std::normal_distribution<>(result_y, sigma_p_); result_y = d(gen_); d = std::normal_distribution<>(result_z, sigma_p_); result_z = d(gen_); int iterations = orientationNormalSample(mean_q, sigma_q_, result_q); if (iterations < 0) { std::cout << "ERROR: orientationNormalSample" << std::endl; } p(0) = result_x; p(1) = result_y; p(2) = result_z; q = result_q; return true; }
void AdaBoost::train(const vector<vector<Mat> > &features) { vector <vector<Mat> > cells = features; int neg = 0; for (int i = 0; i < triplessize; i++) if (triples[i].label == 0) neg++; // vector<double> weights(labels.size(), 1/(double)(labels.size())); vector<double> weights(triplessize, 1/2.0/(triplessize-neg)); for (int i = 0; i < triplessize; i++) if (triples[i].label == 0) weights[i] = 1/2.0/neg; int preferred = 0; double preverror = 0; size = 50; vector<int> used; for (int i = 0; i < size; i++) { double norm = 0; for (int t = 0; t < weights.size(); t++) norm += weights[t]; for (int t = 0; t < weights.size(); t++) weights[t] = weights[t] / norm; double error = DBL_MAX; vector<int> correctness(triplessize); bool flag = false; #pragma omp parallel for for (int k = 0; k < weak.size(); k++) { double currerror = 0; for (int j = 0; j < triplessize; j++) { double corr = weak[k].classify(cells[j][k]); currerror += weights[j] * std::fabs(corr - triples[j].label); } if (currerror < error) { error = currerror; preferred = k; } } for (int j = 0; j < triplessize; j++) { double corr = weak[preferred].classify(cells[j][preferred]); if (corr > 1/2.0 && triples[j].label == 1) correctness[j] = 1; else if (corr < 1/2.0 && triples[j].label == 0) correctness[j] = 1; else correctness[j] = 0; } for (int g = 0; g < used.size(); g++) if (used[g] == preferred) flag = true; if (!flag) std::cout << "round " << i << " error: " << error << " preferred: " << preferred << " " << std::endl; // if (flag) std::cout << "not used" << std::endl; if (!flag) { prefclass.push_back(weak[preferred]); betha.push_back(error / (1 - error)); used.push_back(preferred); } for (int j = 0; j < weights.size(); j++) if (correctness[j]) weights[j] = weights[j] * (error / (1 - error)); } alphas = 0; for (int i = 0; i < betha.size(); i++) { alpha.push_back(-std::log(betha[i])); alphas += alpha[i]; } params.erase(params.begin(), params.end()); for (int i = 0; i < prefclass.size(); i++) { params.push_back(prefclass[i].getParams()); // if (i < 10) // prefclass[i].drawLut(); } size = prefclass.size(); }
int test_Matrix::serialtest3() { testData* testdata = new testData(localProc_, numProcs_); std::vector<int>& fieldIDs = testdata->fieldIDs; std::vector<int>& fieldSizes = testdata->fieldSizes; std::vector<int>& idTypes = testdata->idTypes; std::vector<int>& ids = testdata->ids; fei::SharedPtr<fei::VectorSpace> vspc(new fei::VectorSpace(comm_, "sU_Mat3")); vspc->defineFields(fieldIDs.size(), &fieldIDs[0], &fieldSizes[0]); vspc->defineIDTypes(idTypes.size(), &idTypes[0]); fei::SharedPtr<fei::MatrixGraph> matgraph(new fei::MatrixGraph_Impl2(vspc, vspc, "sU_Mat3")); int numIDs = ids.size(); int fieldID = fieldIDs[0]; int idType = idTypes[0]; int patternID = matgraph->definePattern(numIDs, idType, fieldID); CHK_ERR( matgraph->initConnectivityBlock(0, 1, patternID) ); CHK_ERR( matgraph->initConnectivity(0, 0, &ids[0]) ); //set up a slave constraint that defines id 2, field 0 to be equal to //id 1, field 0. int offsetOfSlave = 1; int offsetIntoSlaveField = 0; std::vector<double> weights(2); weights[0] = 1.0; weights[1] = -1.0; double rhsValue = 0.0; std::vector<int> cr_idtypes(2, idTypes[0]); std::vector<int> cr_fieldIDs(2, fieldIDs[0]); CHK_ERR( matgraph->initSlaveConstraint(2, //numIDs &cr_idtypes[0], &ids[1], &cr_fieldIDs[0], offsetOfSlave, offsetIntoSlaveField, &weights[0], rhsValue) ); CHK_ERR( matgraph->initComplete() ); fei::SharedPtr<fei::FillableMat> ssmat(new fei::FillableMat); int localsize = matgraph->getRowSpace()->getNumIndices_Owned(); localsize -= 1;//subtract the slave fei::Matrix* matrix = new fei::Matrix_Impl<fei::FillableMat>(ssmat, matgraph, localsize); if (matrix == NULL) { ERReturn(-1); } std::vector<int> indices(numIDs); CHK_ERR( matgraph->getConnectivityIndices(0, 0, numIDs, &indices[0], numIDs) ); std::vector<double> data1(numIDs*numIDs); std::vector<double*> data2d(numIDs); int i; for(i=0; i<numIDs; ++i) { data2d[i] = &(data1[i*numIDs]); } for(i=0; i<numIDs*numIDs; ++i) { data1[i] = 1.0*i; } CHK_ERR( matrix->sumIn(numIDs, &indices[0], numIDs, &indices[0], &data2d[0], 0) ); CHK_ERR( matrix->sumIn(0, 0, &data2d[0], 0) ); delete matrix; delete testdata; return(0); }
void QCAD::ResponseFieldValue<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { ScalarT opVal, qpVal, cellVol; if(!opRegion->elementBlockIsInRegion(workset.EBName)) return; for (std::size_t cell=0; cell < workset.numCells; ++cell) { if(!opRegion->cellIsInRegion(cell)) continue; // Get the cell volume, used for averaging over a cell cellVol = 0.0; for (std::size_t qp=0; qp < numQPs; ++qp) cellVol += weights(cell,qp); // Get the scalar value of the field being operated on which will be used // in the operation (all operations just deal with scalar data so far) opVal = 0.0; for (std::size_t qp=0; qp < numQPs; ++qp) { qpVal = 0.0; if(bOpFieldIsVector) { if(opX) qpVal += opField(cell,qp,0) * opField(cell,qp,0); if(opY) qpVal += opField(cell,qp,1) * opField(cell,qp,1); if(opZ) qpVal += opField(cell,qp,2) * opField(cell,qp,2); } else qpVal = opField(cell,qp); opVal += qpVal * weights(cell,qp); } opVal /= cellVol; // opVal = the average value of the field operated on over the current cell // Check if the currently stored min/max value needs to be updated if( (operation == "Maximize" && opVal > this->global_response[1]) || (operation == "Minimize" && opVal < this->global_response[1]) ) { max_nodeID = workset.wsElNodeEqID[cell]; // set g[0] = value of return field at the current cell (avg) this->global_response[0]=0.0; if(bReturnOpField) { for (std::size_t qp=0; qp < numQPs; ++qp) { qpVal = 0.0; if(bOpFieldIsVector) { for(std::size_t i=0; i<numDims; i++) { qpVal += opField(cell,qp,i)*opField(cell,qp,i); } } else qpVal = opField(cell,qp); this->global_response[0] += qpVal * weights(cell,qp); } } else { for (std::size_t qp=0; qp < numQPs; ++qp) { qpVal = 0.0; if(bRetFieldIsVector) { for(std::size_t i=0; i<numDims; i++) { qpVal += retField(cell,qp,i)*retField(cell,qp,i); } } else qpVal = retField(cell,qp); this->global_response[0] += qpVal * weights(cell,qp); } } this->global_response[0] /= cellVol; // set g[1] = value of the field operated on at the current cell (avg) this->global_response[1] = opVal; // set g[2+] = average qp coordinate values of the current cell for(std::size_t i=0; i<numDims; i++) { this->global_response[i+2] = 0.0; for (std::size_t qp=0; qp < numQPs; ++qp) this->global_response[i+2] += coordVec(cell,qp,i); this->global_response[i+2] /= numQPs; } } } // end of loop over cells // No local scattering }