Пример #1
0
  Result solve(const Problem& iProblem) const {
    // set up initial (empty) result
    Result result;
    result.mSuccess = false;
    result.mNumIterations = 0;

    // ensure that there are enough data points to proceed
    const int sampleSize = iProblem.getSampleSize();
    const int n = iProblem.getNumDataPoints();
    if (n < sampleSize) {
      return result;
    }

    const double epsilon = 1e-10;

    // best results are currently invalid
    double bestScore = -1;
    bool success = false;

    // start number of iterations as infinite, then reduce as we go
    double numIterationsNeeded = 1e10;
    int iterationCount = 0;
    int skippedSampleCount = 0;

    // for random sample index generation
    std::vector<int> allIndices(n);

    // iterate until adaptive number of iterations are exceeded
    while (iterationCount < numIterationsNeeded) {

      // determine random sample indices
      for (int i = 0; i < n; ++i) {
        allIndices[i] = i;
      }
      for (int i = 0; i < sampleSize; ++i) {
        int randIndex = std::rand() % n;
        std::swap(allIndices[i], allIndices[randIndex]);
      }
      std::vector<int> sampleIndices(allIndices.begin(),
                                     allIndices.begin() + sampleSize);

      // compute solution on minimal set
      typename Problem::Solution solution = iProblem.estimate(sampleIndices);

      // compute errors over all data points
      std::vector<double> errors2 = iProblem.computeSquaredErrors(solution);

      // check whether this is a valid sample
      // TODO: this should be done via a method in Problem class, but would
      // require changing all existing usages to include that method
      if (errors2.size() == 0) {
        ++skippedSampleCount;
        if (skippedSampleCount >= mMaximumIterations) break;
        continue;
      }
      skippedSampleCount = 0;

      // compute error threshold to be applied to each term
      double thresh = mMaximumError;
      if (thresh < 0) {
        std::sort(errors2.begin(), errors2.end());
        double median = (n % 2 == 0) ?
          (0.5*(errors2[n/2]+errors2[n/2+1])) : errors2[n/2];
        thresh = 1.4826*std::sqrt(median)*4.6851;
      }
      thresh *= thresh;

      // determine inliers
      std::vector<int> inliers;
      inliers.reserve(n);
      for (int i = 0; i < n; ++i) {
        if (errors2[i] <= thresh) {
          inliers.push_back(i);
        }
      }

      // if this is the best score, update solution and convergence criteria
      double score = inliers.size();
      if (score > bestScore) {
        bestScore = score;
        result.mInliers = inliers;
        result.mSolution = solution;
        success = true;
        double inlierProbability = double(inliers.size()) / n;
        double anyOutlierProbability = 1 - pow(inlierProbability,sampleSize);
        anyOutlierProbability = std::min(anyOutlierProbability, 1-epsilon);
        anyOutlierProbability = std::max(anyOutlierProbability, epsilon);
        numIterationsNeeded =
          log(1-mGoodSolutionProbability) / log(anyOutlierProbability);
      }

      // bump up iteration count and terminate if it exceeds hard max
      ++iterationCount;
      if (iterationCount > mMaximumIterations) {
        break;
      }
    }

    // finish off result params
    result.mSuccess = success;
    result.mNumIterations = iterationCount;

    // refine result using all inliers if specified
    if (result.mSuccess && mRefineUsingInliers) {
      result.mSolution = iProblem.estimate(result.mInliers);
    }

    // done
    return result;
  }
Пример #2
0
// EnsembleIn_Single::SetupEnsembleRead()
int EnsembleIn_Single::SetupEnsembleRead(FileName const& tnameIn, ArgList& argIn,
                                       Topology *tparmIn)
{
  if (eio_ != 0) delete eio_;
  // Set file name and topology pointer.
  if (SetTraj().SetNameAndParm(tnameIn, tparmIn)) return 1;
  // Detect file format
  TrajectoryFile::TrajFormatType tformat;
  if ( (eio_ = TrajectoryFile::DetectFormat( Traj().Filename(), tformat )) == 0 ) {
    mprinterr("Error: Could not determine trajectory %s format.\n", Traj().Filename().full());
    return 1;
  }
  eio_->SetDebug( debug_ );
  mprintf("\tReading '%s' as %s\n", Traj().Filename().full(), TrajectoryFile::FormatString(tformat));
  // Process ensemble args // TODO: Should be common to Ensemble?
  bool nosort = argIn.hasKey("nosort");
  // Process format-specific read args
  if (eio_->processReadArgs( argIn )) return 1;
  // Set up the format for reading and get the number of frames.
  int nframes = eio_->setupTrajin(Traj().Filename(), Traj().Parm());
  if (nframes == TrajectoryIO::TRAJIN_ERR) {
    mprinterr("Error: Could not set up %s for reading.\n", Traj().Filename().full());
    return 1;
  }
  if (debug_ > 0) {
    if (nframes != TrajectoryIO::TRAJIN_UNK)
      mprintf("\t'%s' contains %i frames.\n", Traj().Filename().base(), nframes);
    else
      mprintf("\t'%s' contains an unknown number of frames.\n",Traj().Filename().base());
  }
  // Set the start, stop, and offset args based on user input. Do some bounds
  // checking.
  if (SetTraj().Counter().CheckFrameArgs( nframes, argIn )) return 1;
  // Set trajectory coordinate info.
  cInfo_ = eio_->CoordInfo();
  // NOTE: ensembleSize_ is saved here as a shortcut. Should always equal whats in cInfo_
  // Determine if this trajectory actually contains an ensemble.
  // FIXME: Should check for < 2?
  ensembleSize_ = cInfo_.EnsembleSize();
  if (ensembleSize_ < 1) {
    mprinterr("Error: Cannot process single file ensemble with '%s'\n", 
              TrajectoryFile::FormatString(tformat));
    return 1;
  }
# ifdef MPI
  // Set up communicators
  if (Parallel::SetupComms( ensembleSize_ )) return 1;
  // Set ensemble member number.
  SetEnsembleMemberNum( EnsembleComm().Rank() );
# endif
  // If dimensions are present, assume search by indices, otherwise by temp.
  targetType_ = ReplicaInfo::NONE;
  if (cInfo_.ReplicaDimensions().Ndims() > 0)
    targetType_ = ReplicaInfo::INDICES;
  else if (cInfo_.HasTemp())
    targetType_ = ReplicaInfo::TEMP;
  else if (!nosort) {
    mprinterr("Error: Ensemble trajectory does not have indices or temperature.\n");
    return 1;
  }
  if (debug_ > 0)
    cInfo_.PrintCoordInfo( Traj().Filename().base(), Traj().Parm()->c_str() );
# ifdef MPI
  // This array will let each thread know who has what frame.
  frameidx_.resize( ensembleSize_ ); // TODO: Get rid of, should do all in TrajIO class.
# endif
  // Get a list of all temperatures/indices.
  TemperatureMap_.ClearMap();
  IndicesMap_.ClearMap();
  if (targetType_ == ReplicaInfo::TEMP || targetType_ == ReplicaInfo::INDICES )
  {
#   ifdef MPI
    FrameArray f_ensemble(1);
#   else
    FrameArray f_ensemble( ensembleSize_ );
#   endif
    f_ensemble.SetupFrames( Traj().Parm()->Atoms(), cInfo_ );
    if ( eio_->openTrajin() ) return 1;
    if ( eio_->readArray( Traj().Counter().Start(), f_ensemble ) ) return 1;
    eio_->closeTraj();
    if (targetType_ == ReplicaInfo::TEMP) {
      std::vector<double> allTemps( ensembleSize_, -1.0 );
#     ifdef MPI
      // Consolidate temperatures
      if (GatherTemperatures(f_ensemble[0].tAddress(), allTemps, EnsembleComm())) return 1;
#     else
      for (int en = 0; en != ensembleSize_; ++en)
        allTemps[en] = f_ensemble[en].Temperature();
#     endif
      if (SetTemperatureMap( allTemps )) return 1;
    } else if (targetType_ == ReplicaInfo::INDICES) {
      std::vector<RemdIdxType> allIndices( ensembleSize_ );
#     ifdef MPI
      // Consolidate replica indices
      if (GatherIndices(f_ensemble[0].iAddress(), allIndices, cInfo_.ReplicaDimensions().Ndims(),
                        EnsembleComm()))
        return 1;
#     else
      for (int en = 0; en != ensembleSize_; ++en)
        allIndices[en] = f_ensemble[en].RemdIndices();
#     endif
      if (SetIndicesMap( allIndices)) return 1;
    }
  }  

  return 0;
}