//----------------------------------------------------------------------------------------------
/// Execute the algorithm.
void EstimateFitParameters::execConcrete() {
  auto costFunction = getCostFunctionInitialized();
  auto func = costFunction->getFittingFunction();

  // Use additional constraints on parameters tied in some way
  // to the varied parameters to exculde unwanted results.
  std::vector<std::unique_ptr<IConstraint>> constraints;
  std::string constraintStr = getProperty("Constraints");
  if (!constraintStr.empty()) {
    Expression expr;
    expr.parse(constraintStr);
    expr.toList();
    for (auto &term : expr.terms()) {
      IConstraint *c =
          ConstraintFactory::Instance().createInitialized(func.get(), term);
      constraints.push_back(std::unique_ptr<IConstraint>(c));
    }
  }

  // Ranges to use with random number generators: one for each free parameter.
  std::vector<std::pair<double, double>> ranges;
  ranges.reserve(costFunction->nParams());
  for (size_t i = 0; i < func->nParams(); ++i) {
    if (!func->isActive(i)) {
      continue;
    }
    auto constraint = func->getConstraint(i);
    if (constraint == nullptr) {
      func->fix(i);
      continue;
    }
    auto boundary = dynamic_cast<Constraints::BoundaryConstraint *>(constraint);
    if (boundary == nullptr) {
      throw std::runtime_error("Parameter " + func->parameterName(i) +
                               " must have a boundary constraint. ");
    }
    if (!boundary->hasLower()) {
      throw std::runtime_error("Constraint of " + func->parameterName(i) +
                               " must have a lower bound.");
    }
    if (!boundary->hasUpper()) {
      throw std::runtime_error("Constraint of " + func->parameterName(i) +
                               " must have an upper bound.");
    }
    // Use the lower and upper bounds of the constraint to set the range
    // of a generator with uniform distribution.
    ranges.push_back(std::make_pair(boundary->lower(), boundary->upper()));
  }
  // Number of parameters could have changed
  costFunction->reset();
  if (costFunction->nParams() == 0) {
    throw std::runtime_error("No parameters are given for which to estimate "
                             "initial values. Set boundary constraints to "
                             "parameters that need to be estimated.");
  }

  size_t nSamples = static_cast<int>(getProperty("NSamples"));
  unsigned int seed = static_cast<int>(getProperty("Seed"));

  if (getPropertyValue("Type") == "Monte Carlo") {
    int nOutput = getProperty("NOutputs");
    auto outputWorkspaceProp = getPointerToProperty("OutputWorkspace");
    if (outputWorkspaceProp->isDefault() || nOutput <= 0) {
      nOutput = 1;
    }
    auto output = runMonteCarlo(*costFunction, ranges, constraints, nSamples,
                                static_cast<size_t>(nOutput), seed);

    if (!outputWorkspaceProp->isDefault()) {
      auto table = API::WorkspaceFactory::Instance().createTable();
      auto column = table->addColumn("str", "Name");
      column->setPlotType(6);
      for (size_t i = 0; i < output.size(); ++i) {
        column = table->addColumn("double", std::to_string(i + 1));
        column->setPlotType(2);
      }

      for (size_t i = 0, ia = 0; i < m_function->nParams(); ++i) {
        if (m_function->isActive(i)) {
          TableRow row = table->appendRow();
          row << m_function->parameterName(i);
          for (auto &j : output) {
            row << j[ia];
          }
          ++ia;
        }
      }
      setProperty("OutputWorkspace", table);
    }
  } else {
    size_t nSelection = static_cast<int>(getProperty("Selection"));
    size_t nIterations = static_cast<int>(getProperty("NIterations"));
    runCrossEntropy(*costFunction, ranges, constraints, nSamples, nSelection,
                    nIterations, seed);
  }
  bool fixBad = getProperty("FixBadParameters");
  if (fixBad) {
    fixBadParameters(*costFunction, ranges);
  }
}
Beispiel #2
0
/**
*   Executes the algorithm.
*/
void LoadTBL::exec() {
  std::string filename = getProperty("Filename");
  std::ifstream file(filename.c_str());
  if (!file) {
    throw Exception::FileError("Unable to open file: ", filename);
  }
  std::string line;

  ITableWorkspace_sptr ws = WorkspaceFactory::Instance().createTable();

  std::vector<std::string> columnHeadings;

  Kernel::Strings::extractToEOL(file, line);
  // We want to check if the first line contains an empty string or series of
  // ",,,,,"
  // to see if we are loading a TBL file that actually contains data or not.
  boost::split(columnHeadings, line, boost::is_any_of(","),
               boost::token_compress_off);
  for (auto entry = columnHeadings.begin(); entry != columnHeadings.end();) {
    if (entry->empty()) {
      // erase the empty values
      entry = columnHeadings.erase(entry);
    } else {
      // keep any non-empty values
      ++entry;
    }
  }
  if (columnHeadings.empty()) {
    // we have an empty string or series of ",,,,,"
    throw std::runtime_error("The file you are trying to load is Empty. \n "
                             "Please load a non-empty TBL file");
  } else {
    // set columns back to empty ready to populated with columnHeadings.
    columnHeadings.clear();
  }
  // this will tell us if we need to just fill in the cell values
  // or whether we will have to create the column headings as well.
  bool isOld = getColumnHeadings(line, columnHeadings);

  std::vector<std::string> rowVec;
  if (isOld) {
    /**THIS IS ESSENTIALLY THE OLD LoadReflTBL CODE**/
    // create the column headings
    auto colStitch = ws->addColumn("str", "StitchGroup");
    auto colRuns = ws->addColumn("str", "Run(s)");
    auto colTheta = ws->addColumn("str", "ThetaIn");
    auto colTrans = ws->addColumn("str", "TransRun(s)");
    auto colQmin = ws->addColumn("str", "Qmin");
    auto colQmax = ws->addColumn("str", "Qmax");
    auto colDqq = ws->addColumn("str", "dq/q");
    auto colScale = ws->addColumn("str", "Scale");
    auto colOptions = ws->addColumn("str", "Options");
    auto colHiddenOptions = ws->addColumn("str", "HiddenOptions");

    for (size_t i = 0; i < ws->columnCount(); i++) {
      auto col = ws->getColumn(i);
      col->setPlotType(0);
    }

    // we are using the old ReflTBL format
    // where all of the entries are on one line
    // so we must reset the stream to reread the first line.
    std::ifstream file(filename.c_str());
    if (!file) {
      throw Exception::FileError("Unable to open file: ", filename);
    }
    std::string line;
    int stitchID = 1;
    while (Kernel::Strings::extractToEOL(file, line)) {
      if (line.empty() || line == ",,,,,,,,,,,,,,,,") {
        continue;
      }
      getCells(line, rowVec, 16, isOld);
      const std::string scaleStr = rowVec.at(16);
      const std::string stitchStr = boost::lexical_cast<std::string>(stitchID);

      // check if the first run in the row has any data associated with it
      // 0 = runs, 1 = theta, 2 = trans, 3 = qmin, 4 = qmax
      if (!rowVec[0].empty() || !rowVec[1].empty() || !rowVec[2].empty() ||
          !rowVec[3].empty() || !rowVec[4].empty()) {
        TableRow row = ws->appendRow();
        row << stitchStr;
        for (int i = 0; i < 5; ++i) {
          row << rowVec.at(i);
        }
        row << rowVec.at(15);
        row << scaleStr;
      }

      // check if the second run in the row has any data associated with it
      // 5 = runs, 6 = theta, 7 = trans, 8 = qmin, 9 = qmax
      if (!rowVec[5].empty() || !rowVec[6].empty() || !rowVec[7].empty() ||
          !rowVec[8].empty() || !rowVec[9].empty()) {
        TableRow row = ws->appendRow();
        row << stitchStr;
        for (int i = 5; i < 10; ++i) {
          row << rowVec.at(i);
        }
        row << rowVec.at(15);
        row << scaleStr;
      }

      // check if the third run in the row has any data associated with it
      // 10 = runs, 11 = theta, 12 = trans, 13 = qmin, 14 = qmax
      if (!rowVec[10].empty() || !rowVec[11].empty() || !rowVec[12].empty() ||
          !rowVec[13].empty() || !rowVec[14].empty()) {
        TableRow row = ws->appendRow();
        row << stitchStr;
        for (int i = 10; i < 17; ++i) {
          if (i == 16)
            row << scaleStr;
          else
            row << rowVec.at(i);
        }
      }
      ++stitchID;
      setProperty("OutputWorkspace", ws);
    }

  } else {
    // we have a TBL format that contains column headings
    // on the first row. These are now entries in the columns vector
    if (!columnHeadings.empty()) {
      // now we need to add the custom column headings from
      // the columns vector to the TableWorkspace
      for (auto heading = columnHeadings.begin();
           heading != columnHeadings.end();) {
        if (heading->empty()) {
          // there is no need to have empty column headings.
          heading = columnHeadings.erase(heading);
        } else {
          Mantid::API::Column_sptr col;
          col = ws->addColumn("str", *heading);
          col->setPlotType(0);
          heading++;
        }
      }
    }
    size_t expectedCommas = columnHeadings.size() - 1;
    while (Kernel::Strings::extractToEOL(file, line)) {
      if (line.empty() || line == ",,,,,,,,,,,,,,,,") {
        // skip over any empty lines
        continue;
      }
      getCells(line, rowVec, columnHeadings.size() - 1, isOld);
      // populate the columns with their values for this row.
      TableRow row = ws->appendRow();
      for (size_t i = 0; i < expectedCommas + 1; ++i) {
        row << rowVec.at(i);
      }
    }
    setProperty("OutputWorkspace", ws);
  }
}
//----------------------------------------------------------------------------------------------
/// Examine the chi squared as a function of fitting parameters and estimate
/// errors for each parameter.
void CalculateChiSquared::estimateErrors() {
  // Number of fiting parameters
  auto nParams = m_function->nParams();
  // Create an output table for displaying slices of the chi squared and
  // the probabilitydensity function
  auto pdfTable = API::WorkspaceFactory::Instance().createTable();

  std::string baseName = getProperty("Output");
  if (baseName.empty()) {
    baseName = "CalculateChiSquared";
  }
  declareProperty(new API::WorkspaceProperty<API::ITableWorkspace>(
                      "PDFs", "", Kernel::Direction::Output),
                  "The name of the TableWorkspace in which to store the "
                  "pdfs of fit parameters");
  setPropertyValue("PDFs", baseName + "_pdf");
  setProperty("PDFs", pdfTable);

  // Create an output table for displaying the parameter errors.
  auto errorsTable = API::WorkspaceFactory::Instance().createTable();
  auto nameColumn = errorsTable->addColumn("str", "Parameter");
  auto valueColumn = errorsTable->addColumn("double", "Value");
  auto minValueColumn = errorsTable->addColumn("double", "Value at Min");
  auto leftErrColumn = errorsTable->addColumn("double", "Left Error");
  auto rightErrColumn = errorsTable->addColumn("double", "Right Error");
  auto quadraticErrColumn = errorsTable->addColumn("double", "Quadratic Error");
  auto chiMinColumn = errorsTable->addColumn("double", "Chi2 Min");
  errorsTable->setRowCount(nParams);
  declareProperty(new API::WorkspaceProperty<API::ITableWorkspace>(
                      "Errors", "", Kernel::Direction::Output),
                  "The name of the TableWorkspace in which to store the "
                  "values and errors of fit parameters");
  setPropertyValue("Errors", baseName + "_errors");
  setProperty("Errors", errorsTable);

  // Calculate initial values
  double chiSquared = 0.0;
  double chiSquaredWeighted = 0.0;
  double dof = 0;
  API::FunctionDomain_sptr domain;
  API::FunctionValues_sptr values;
  m_domainCreator->createDomain(domain, values);
  calcChiSquared(*m_function, nParams, *domain, *values, chiSquared,
                 chiSquaredWeighted, dof);
  // Value of chi squared for current parameters in m_function
  double chi0 = chiSquared;
  // Fit data variance
  double sigma2 = chiSquared / dof;
  bool useWeighted = getProperty("Weighted");

  if (useWeighted) {
    chi0 = chiSquaredWeighted;
    sigma2 = 0.0;
  }

  if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
    g_log.debug() << "chi0=" << chi0 << std::endl;
    g_log.debug() << "sigma2=" << sigma2 << std::endl;
    g_log.debug() << "dof=" << dof << std::endl;
  }

  // Parameter bounds that define a volume in the parameter
  // space within which the chi squared is being examined.
  GSLVector lBounds(nParams);
  GSLVector rBounds(nParams);

  // Number of points in lines for plotting
  size_t n = 100;
  pdfTable->setRowCount(n);
  const double fac = 1e-4;

  // Loop over each parameter
  for (size_t ip = 0; ip < nParams; ++ip) {

    // Add columns for the parameter to the pdf table.
    auto parName = m_function->parameterName(ip);
    nameColumn->read(ip, parName);
    // Parameter values
    auto col1 = pdfTable->addColumn("double", parName);
    col1->setPlotType(1);
    // Chi squared values
    auto col2 = pdfTable->addColumn("double", parName + "_chi2");
    col2->setPlotType(2);
    // PDF values
    auto col3 = pdfTable->addColumn("double", parName + "_pdf");
    col3->setPlotType(2);

    double par0 = m_function->getParameter(ip);
    double shift = fabs(par0 * fac);
    if (shift == 0.0) {
      shift = fac;
    }

    // Make a slice along this parameter
    GSLVector dir(nParams);
    dir.zero();
    dir[ip] = 1.0;
    ChiSlice slice(*m_function, dir, *domain, *values, chi0, sigma2);

    // Find the bounds withn which the PDF is significantly above zero.
    // The bounds are defined relative to par0:
    //   par0 + lBound is the lowest value of the parameter (lBound <= 0)
    //   par0 + rBound is the highest value of the parameter (rBound >= 0)
    double lBound = slice.findBound(-shift);
    double rBound = slice.findBound(shift);
    lBounds[ip] = lBound;
    rBounds[ip] = rBound;

    // Approximate the slice with a polynomial.
    // P is a vector of values of the polynomial at special points.
    // A is a vector of Chebyshev expansion coefficients.
    // The polynomial is defined on interval [lBound, rBound]
    // The value of the polynomial at 0 == chi squared at par0
    std::vector<double> P, A;
    bool ok = true;
    auto base = slice.makeApprox(lBound, rBound, P, A, ok);
    if (!ok) {
      g_log.warning() << "Approximation failed for parameter " << ip << std::endl;
    }
    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "Parameter " << ip << std::endl;
      g_log.debug() << "Slice approximated by polynomial of order "
                    << base->size() - 1;
      g_log.debug() << " between " << lBound << " and " << rBound << std::endl;
    }

    // Write n slice points into the output table.
    double dp = (rBound - lBound) / static_cast<double>(n);
    for (size_t i = 0; i < n; ++i) {
      double par = lBound + dp * static_cast<double>(i);
      double chi = base->eval(par, P);
      col1->fromDouble(i, par0 + par);
      col2->fromDouble(i, chi);
    }

    // Check if par0 is a minimum point of the chi squared
    std::vector<double> AD;
    // Calculate the derivative polynomial.
    // AD are the Chebyshev expansion of the derivative.
    base->derivative(A, AD);
    // Find the roots of the derivative polynomial
    std::vector<double> minima = base->roots(AD);
    if (minima.empty()) {
      minima.push_back(par0);
    }

    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "Minima: ";
    }

    // If only 1 extremum is found assume (without checking) that it's a
    // minimum.
    // If there are more than 1, find the one with the smallest chi^2.
    double chiMin = std::numeric_limits<double>::max();
    double parMin = par0;
    for (size_t i = 0; i < minima.size(); ++i) {
      double value = base->eval(minima[i], P);
      if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
        g_log.debug() << minima[i] << " (" << value << ") ";
      }
      if (value < chiMin) {
        chiMin = value;
        parMin = minima[i];
      }
    }
    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << std::endl;
      g_log.debug() << "Smallest minimum at " << parMin << " is " << chiMin
                    << std::endl;
    }

    // Points of intersections with line chi^2 = 1/2 give an estimate of
    // the standard deviation of this parameter if it's uncorrelated with the
    // others.
    A[0] -= 0.5; // Now A are the coefficients of the original polynomial
                 // shifted down by 1/2.
    std::vector<double> roots = base->roots(A);
    std::sort(roots.begin(), roots.end());

    if (roots.empty()) {
      // Something went wrong; use the whole interval.
      roots.resize(2);
      roots[0] = lBound;
      roots[1] = rBound;
    } else if (roots.size() == 1) {
      // Only one root found; use a bound for the other root.
      if (roots.front() < 0) {
        roots.push_back(rBound);
      } else {
        roots.insert(roots.begin(), lBound);
      }
    } else if (roots.size() > 2) {
      // More than 2 roots; use the smallest and the biggest
      auto smallest = roots.front();
      auto biggest = roots.back();
      roots.resize(2);
      roots[0] = smallest;
      roots[1] = biggest;
    }

    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "Roots: ";
      for (size_t i = 0; i < roots.size(); ++i) {
        g_log.debug() << roots[i] << ' ';
      }
      g_log.debug() << std::endl;
    }

    // Output parameter info to the table.
    valueColumn->fromDouble(ip, par0);
    minValueColumn->fromDouble(ip, par0 + parMin);
    leftErrColumn->fromDouble(ip, roots[0] - parMin);
    rightErrColumn->fromDouble(ip, roots[1] - parMin);
    chiMinColumn->fromDouble(ip, chiMin);

    // Output the PDF
    for (size_t i = 0; i < n; ++i) {
      double chi = col2->toDouble(i);
      col3->fromDouble(i, exp(-chi + chiMin));
    }

    // make sure function parameters don't change.
    m_function->setParameter(ip, par0);
  }

  // Improve estimates for standard deviations.
  // If parameters are correlated the found deviations
  // most likely underestimate the true values.
  unfixParameters();
  GSLJacobian J(m_function, values->size());
  m_function->functionDeriv(*domain, J);
  refixParameters();
  // Calculate the hessian at the current point.
  GSLMatrix H;
  if (useWeighted) {
    H.resize(nParams, nParams);
    for (size_t i = 0; i < nParams; ++i) {
      for (size_t j = i; j < nParams; ++j) {
        double h = 0.0;
        for (size_t k = 0; k < values->size(); ++k) {
          double w = values->getFitWeight(k);
          h += J.get(k, i) * J.get(k, j) * w * w;
        }
        H.set(i, j, h);
        if (i != j) {
          H.set(j, i, h);
        }
      }
    }
  } else {
    H = Tr(J.matrix()) * J.matrix();
  }
  // Square roots of the diagonals of the covariance matrix give
  // the standard deviations in the quadratic approximation of the chi^2.
  GSLMatrix V(H);
  if (!useWeighted) {
    V *= 1. / sigma2;
  }
  V.invert();
  // In a non-quadratic asymmetric case the following procedure can give a
  // better result:
  // Find the direction in which the chi^2 changes slowest and the positive and
  // negative deviations in that direction. The change in a parameter at those
  // points can be a better estimate for the standard deviation.
  GSLVector v(nParams);
  GSLMatrix Q(nParams, nParams);
  // One of the eigenvectors of the hessian is the direction of the slowest
  // change.
  H.eigenSystem(v, Q);

  // Loop over the eigenvectors
  for (size_t i = 0; i < nParams; ++i) {
    auto dir = Q.copyColumn(i);
    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "Direction " << i << std::endl;
      g_log.debug() << dir << std::endl;
    }
    // Make a slice in that direction
    ChiSlice slice(*m_function, dir, *domain, *values, chi0, sigma2);
    double rBound0 = dir.dot(rBounds);
    double lBound0 = dir.dot(lBounds);
    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "lBound " << lBound0 << std::endl;
      g_log.debug() << "rBound " << rBound0 << std::endl;
    }
    double lBound = slice.findBound(lBound0);
    double rBound = slice.findBound(rBound0);
    std::vector<double> P, A;
    // Use a polynomial approximation
    bool ok = true;
    auto base = slice.makeApprox(lBound, rBound, P, A, ok);
    if (!ok) {
      g_log.warning() << "Approximation failed in direction " << i << std::endl;
    }
    // Find the deviation points where the chi^2 = 1/2
    A[0] -= 0.5;
    std::vector<double> roots = base->roots(A);
    std::sort(roots.begin(), roots.end());
    // Sort out the roots
    auto nRoots = roots.size();
    if (nRoots == 0) {
      roots.resize(2, 0.0);
    } else if (nRoots == 1) {
      if (roots.front() > 0.0) {
        roots.insert(roots.begin(), 0.0);
      } else {
        roots.push_back(0.0);
      }
    } else if (nRoots > 2) {
      roots[1] = roots.back();
      roots.resize(2);
    }
    if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
      g_log.debug() << "Roots " << roots[0] << " (" << slice(roots[0]) << ") " << roots[1] << " (" << slice(roots[1]) << ") " << std::endl;
    }
    // Loop over the parameters and see if there deviations along
    // this direction is greater than any previous value.
    for (size_t ip = 0; ip < nParams; ++ip) {
      auto lError = roots.front() * dir[ip];
      auto rError = roots.back() * dir[ip];
      if (lError > rError) {
        std::swap(lError, rError);
      }
      if (lError < leftErrColumn->toDouble(ip)) {
        if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
          g_log.debug() << "  left for  " << ip << ' ' << lError << ' ' << leftErrColumn->toDouble(ip) << std::endl;
        }
        leftErrColumn->fromDouble(ip, lError);
      }
      if (rError > rightErrColumn->toDouble(ip)) {
        if (g_log.is(Kernel::Logger::Priority::PRIO_DEBUG)) {
          g_log.debug() << "  right for " << ip << ' ' << rError << ' ' << rightErrColumn->toDouble(ip) << std::endl;
        }
        rightErrColumn->fromDouble(ip, rError);
      }
    }
    // Output the quadratic estimate for comparrison.
    quadraticErrColumn->fromDouble(i, sqrt(V.get(i, i)));
  }
}