Result solve(const Problem& iProblem) const { // set up initial (empty) result Result result; result.mSuccess = false; result.mNumIterations = 0; // ensure that there are enough data points to proceed const int sampleSize = iProblem.getSampleSize(); const int n = iProblem.getNumDataPoints(); if (n < sampleSize) { return result; } const double epsilon = 1e-10; // best results are currently invalid double bestScore = -1; bool success = false; // start number of iterations as infinite, then reduce as we go double numIterationsNeeded = 1e10; int iterationCount = 0; int skippedSampleCount = 0; // for random sample index generation std::vector<int> allIndices(n); // iterate until adaptive number of iterations are exceeded while (iterationCount < numIterationsNeeded) { // determine random sample indices for (int i = 0; i < n; ++i) { allIndices[i] = i; } for (int i = 0; i < sampleSize; ++i) { int randIndex = std::rand() % n; std::swap(allIndices[i], allIndices[randIndex]); } std::vector<int> sampleIndices(allIndices.begin(), allIndices.begin() + sampleSize); // compute solution on minimal set typename Problem::Solution solution = iProblem.estimate(sampleIndices); // compute errors over all data points std::vector<double> errors2 = iProblem.computeSquaredErrors(solution); // check whether this is a valid sample // TODO: this should be done via a method in Problem class, but would // require changing all existing usages to include that method if (errors2.size() == 0) { ++skippedSampleCount; if (skippedSampleCount >= mMaximumIterations) break; continue; } skippedSampleCount = 0; // compute error threshold to be applied to each term double thresh = mMaximumError; if (thresh < 0) { std::sort(errors2.begin(), errors2.end()); double median = (n % 2 == 0) ? (0.5*(errors2[n/2]+errors2[n/2+1])) : errors2[n/2]; thresh = 1.4826*std::sqrt(median)*4.6851; } thresh *= thresh; // determine inliers std::vector<int> inliers; inliers.reserve(n); for (int i = 0; i < n; ++i) { if (errors2[i] <= thresh) { inliers.push_back(i); } } // if this is the best score, update solution and convergence criteria double score = inliers.size(); if (score > bestScore) { bestScore = score; result.mInliers = inliers; result.mSolution = solution; success = true; double inlierProbability = double(inliers.size()) / n; double anyOutlierProbability = 1 - pow(inlierProbability,sampleSize); anyOutlierProbability = std::min(anyOutlierProbability, 1-epsilon); anyOutlierProbability = std::max(anyOutlierProbability, epsilon); numIterationsNeeded = log(1-mGoodSolutionProbability) / log(anyOutlierProbability); } // bump up iteration count and terminate if it exceeds hard max ++iterationCount; if (iterationCount > mMaximumIterations) { break; } } // finish off result params result.mSuccess = success; result.mNumIterations = iterationCount; // refine result using all inliers if specified if (result.mSuccess && mRefineUsingInliers) { result.mSolution = iProblem.estimate(result.mInliers); } // done return result; }
// EnsembleIn_Single::SetupEnsembleRead() int EnsembleIn_Single::SetupEnsembleRead(FileName const& tnameIn, ArgList& argIn, Topology *tparmIn) { if (eio_ != 0) delete eio_; // Set file name and topology pointer. if (SetTraj().SetNameAndParm(tnameIn, tparmIn)) return 1; // Detect file format TrajectoryFile::TrajFormatType tformat; if ( (eio_ = TrajectoryFile::DetectFormat( Traj().Filename(), tformat )) == 0 ) { mprinterr("Error: Could not determine trajectory %s format.\n", Traj().Filename().full()); return 1; } eio_->SetDebug( debug_ ); mprintf("\tReading '%s' as %s\n", Traj().Filename().full(), TrajectoryFile::FormatString(tformat)); // Process ensemble args // TODO: Should be common to Ensemble? bool nosort = argIn.hasKey("nosort"); // Process format-specific read args if (eio_->processReadArgs( argIn )) return 1; // Set up the format for reading and get the number of frames. int nframes = eio_->setupTrajin(Traj().Filename(), Traj().Parm()); if (nframes == TrajectoryIO::TRAJIN_ERR) { mprinterr("Error: Could not set up %s for reading.\n", Traj().Filename().full()); return 1; } if (debug_ > 0) { if (nframes != TrajectoryIO::TRAJIN_UNK) mprintf("\t'%s' contains %i frames.\n", Traj().Filename().base(), nframes); else mprintf("\t'%s' contains an unknown number of frames.\n",Traj().Filename().base()); } // Set the start, stop, and offset args based on user input. Do some bounds // checking. if (SetTraj().Counter().CheckFrameArgs( nframes, argIn )) return 1; // Set trajectory coordinate info. cInfo_ = eio_->CoordInfo(); // NOTE: ensembleSize_ is saved here as a shortcut. Should always equal whats in cInfo_ // Determine if this trajectory actually contains an ensemble. // FIXME: Should check for < 2? ensembleSize_ = cInfo_.EnsembleSize(); if (ensembleSize_ < 1) { mprinterr("Error: Cannot process single file ensemble with '%s'\n", TrajectoryFile::FormatString(tformat)); return 1; } # ifdef MPI // Set up communicators if (Parallel::SetupComms( ensembleSize_ )) return 1; // Set ensemble member number. SetEnsembleMemberNum( EnsembleComm().Rank() ); # endif // If dimensions are present, assume search by indices, otherwise by temp. targetType_ = ReplicaInfo::NONE; if (cInfo_.ReplicaDimensions().Ndims() > 0) targetType_ = ReplicaInfo::INDICES; else if (cInfo_.HasTemp()) targetType_ = ReplicaInfo::TEMP; else if (!nosort) { mprinterr("Error: Ensemble trajectory does not have indices or temperature.\n"); return 1; } if (debug_ > 0) cInfo_.PrintCoordInfo( Traj().Filename().base(), Traj().Parm()->c_str() ); # ifdef MPI // This array will let each thread know who has what frame. frameidx_.resize( ensembleSize_ ); // TODO: Get rid of, should do all in TrajIO class. # endif // Get a list of all temperatures/indices. TemperatureMap_.ClearMap(); IndicesMap_.ClearMap(); if (targetType_ == ReplicaInfo::TEMP || targetType_ == ReplicaInfo::INDICES ) { # ifdef MPI FrameArray f_ensemble(1); # else FrameArray f_ensemble( ensembleSize_ ); # endif f_ensemble.SetupFrames( Traj().Parm()->Atoms(), cInfo_ ); if ( eio_->openTrajin() ) return 1; if ( eio_->readArray( Traj().Counter().Start(), f_ensemble ) ) return 1; eio_->closeTraj(); if (targetType_ == ReplicaInfo::TEMP) { std::vector<double> allTemps( ensembleSize_, -1.0 ); # ifdef MPI // Consolidate temperatures if (GatherTemperatures(f_ensemble[0].tAddress(), allTemps, EnsembleComm())) return 1; # else for (int en = 0; en != ensembleSize_; ++en) allTemps[en] = f_ensemble[en].Temperature(); # endif if (SetTemperatureMap( allTemps )) return 1; } else if (targetType_ == ReplicaInfo::INDICES) { std::vector<RemdIdxType> allIndices( ensembleSize_ ); # ifdef MPI // Consolidate replica indices if (GatherIndices(f_ensemble[0].iAddress(), allIndices, cInfo_.ReplicaDimensions().Ndims(), EnsembleComm())) return 1; # else for (int en = 0; en != ensembleSize_; ++en) allIndices[en] = f_ensemble[en].RemdIndices(); # endif if (SetIndicesMap( allIndices)) return 1; } } return 0; }