/** Create histogram workspace
 */
MatrixWorkspace_sptr CreateSampleWorkspace::createHistogramWorkspace(
    int numPixels, int numBins, double x0, double binDelta,
    int start_at_pixelID, Geometry::Instrument_sptr inst,
    const std::string &functionString, bool isRandom) {
  MantidVecPtr x, y, e;
  x.access().resize(numBins + 1);
  e.access().resize(numBins);
  for (int i = 0; i < numBins + 1; ++i) {
    x.access()[i] = x0 + i * binDelta;
  }

  std::vector<double> xValues(x.access().begin(), x.access().end() - 1);
  y.access() = evalFunction(functionString, xValues, isRandom ? 1 : 0);
  e.access().resize(numBins);

  // calculate e as sqrt(y)
  typedef double (*uf)(double);
  uf dblSqrt = std::sqrt;
  std::transform(y.access().begin(), y.access().end(), e.access().begin(),
                 dblSqrt);

  MatrixWorkspace_sptr retVal(new DataObjects::Workspace2D);
  retVal->initialize(numPixels, numBins + 1, numBins);
  retVal->setInstrument(inst);

  for (size_t wi = 0; wi < static_cast<size_t>(numPixels); wi++) {
    retVal->setX(wi, x);
    retVal->setData(wi, y, e);
    retVal->getSpectrum(wi)->setDetectorID(detid_t(start_at_pixelID + wi));
    retVal->getSpectrum(wi)->setSpectrumNo(specid_t(wi + 1));
  }

  return retVal;
}
/** Load a single bank into the workspace
 *
 * @param nexusfilename :: file to open
 * @param entry_name :: NXentry name
 * @param bankName :: NXdata bank name
 * @param WS :: workspace to modify
 * @param id_to_wi :: det ID to workspace index mapping
 */
void LoadTOFRawNexus::loadBank(const std::string &nexusfilename,
                               const std::string &entry_name,
                               const std::string &bankName,
                               API::MatrixWorkspace_sptr WS,
                               const detid2index_map &id_to_wi) {
  g_log.debug() << "Loading bank " << bankName << std::endl;
  // To avoid segfaults on RHEL5/6 and Fedora
  m_fileMutex.lock();

  // Navigate to the point in the file
  auto file = new ::NeXus::File(nexusfilename);
  file->openGroup(entry_name, "NXentry");
  file->openGroup("instrument", "NXinstrument");
  file->openGroup(bankName, "NXdetector");

  size_t m_numPixels = 0;
  std::vector<uint32_t> pixel_id;

  if (!m_assumeOldFile) {
    // Load the pixel IDs
    file->readData("pixel_id", pixel_id);
    m_numPixels = pixel_id.size();
    if (m_numPixels == 0) {
      file->close();
      m_fileMutex.unlock();
      g_log.warning() << "Invalid pixel_id data in " << bankName << std::endl;
      return;
    }
  } else {
    // Load the x and y pixel offsets
    std::vector<float> xoffsets;
    std::vector<float> yoffsets;
    file->readData("x_pixel_offset", xoffsets);
    file->readData("y_pixel_offset", yoffsets);

    m_numPixels = xoffsets.size() * yoffsets.size();
    if (0 == m_numPixels) {
      file->close();
      m_fileMutex.unlock();
      g_log.warning() << "Invalid (x,y) offsets in " << bankName << std::endl;
      return;
    }

    size_t bankNum = 0;
    if (bankName.size() > 4) {
      if (bankName.substr(0, 4) == "bank") {
        bankNum = boost::lexical_cast<size_t>(bankName.substr(4));
        bankNum--;
      } else {
        file->close();
        m_fileMutex.unlock();
        g_log.warning() << "Invalid bank number for " << bankName << std::endl;
        return;
      }
    }

    // All good, so construct the pixel ID listing
    size_t numX = xoffsets.size();
    size_t numY = yoffsets.size();

    for (size_t i = 0; i < numX; i++) {
      for (size_t j = 0; j < numY; j++) {
        pixel_id.push_back(
            static_cast<uint32_t>(j + numY * (i + numX * bankNum)));
      }
    }
  }

  size_t iPart = 0;
  if (m_spec_max != Mantid::EMPTY_INT()) {
    uint32_t ifirst = pixel_id[0];
    range_check out_range(m_spec_min, m_spec_max, id_to_wi);
    auto newEnd = std::remove_if(pixel_id.begin(), pixel_id.end(), out_range);
    pixel_id.erase(newEnd, pixel_id.end());
    // check if beginning or end of array was erased
    if (ifirst != pixel_id[0])
      iPart = m_numPixels - pixel_id.size();
    m_numPixels = pixel_id.size();
    if (m_numPixels == 0) {
      file->close();
      m_fileMutex.unlock();
      g_log.warning() << "No pixels from " << bankName << std::endl;
      return;
    };
  }
  // Load the TOF vector
  std::vector<float> tof;
  file->readData(m_axisField, tof);
  size_t m_numBins = tof.size() - 1;
  if (tof.size() <= 1) {
    file->close();
    m_fileMutex.unlock();
    g_log.warning() << "Invalid " << m_axisField << " data in " << bankName
                    << std::endl;
    return;
  }

  // Make a shared pointer
  MantidVecPtr Xptr;
  MantidVec &X = Xptr.access();
  X.resize(tof.size(), 0);
  X.assign(tof.begin(), tof.end());

  // Load the data. Coerce ints into double.
  std::string errorsField = "";
  std::vector<double> data;
  file->openData(m_dataField);
  file->getDataCoerce(data);
  if (file->hasAttr("errors"))
    file->getAttr("errors", errorsField);
  file->closeData();

  // Load the errors
  bool hasErrors = !errorsField.empty();
  std::vector<double> errors;
  if (hasErrors) {
    try {
      file->openData(errorsField);
      file->getDataCoerce(errors);
      file->closeData();
    } catch (...) {
      g_log.information() << "Error loading the errors field, '" << errorsField
                          << "' for bank " << bankName
                          << ". Will use sqrt(counts). " << std::endl;
      hasErrors = false;
    }
  }

  /*if (data.size() != m_numBins * m_numPixels)
  { file->close(); m_fileMutex.unlock(); g_log.warning() << "Invalid size of '"
  << m_dataField << "' data in " << bankName << std::endl; return; }
  if (hasErrors && (errors.size() != m_numBins * m_numPixels))
  { file->close(); m_fileMutex.unlock(); g_log.warning() << "Invalid size of '"
  << errorsField << "' errors in " << bankName << std::endl; return; }
*/
  // Have all the data I need
  m_fileMutex.unlock();
  file->close();

  for (size_t i = iPart; i < iPart + m_numPixels; i++) {
    // Find the workspace index for this detector
    detid_t pixelID = pixel_id[i - iPart];
    size_t wi = id_to_wi.find(pixelID)->second;

    // Set the basic info of that spectrum
    ISpectrum *spec = WS->getSpectrum(wi);
    spec->setSpectrumNo(specid_t(wi + 1));
    spec->setDetectorID(pixel_id[i - iPart]);
    // Set the shared X pointer
    spec->setX(X);

    // Extract the Y
    MantidVec &Y = spec->dataY();
    Y.assign(data.begin() + i * m_numBins, data.begin() + (i + 1) * m_numBins);

    MantidVec &E = spec->dataE();

    if (hasErrors) {
      // Copy the errors from the loaded document
      E.assign(errors.begin() + i * m_numBins,
               errors.begin() + (i + 1) * m_numBins);
    } else {
      // Now take the sqrt(Y) to give E
      E = Y;
      std::transform(E.begin(), E.end(), E.begin(), (double (*)(double))sqrt);
    }
  }

  // Done!
}
/** Process the event file properly.
 * @param workspace :: EventWorkspace to write to.
 */
void
LoadEventPreNexus::procEvents(DataObjects::EventWorkspace_sptr &workspace) {
  this->num_error_events = 0;
  this->num_good_events = 0;
  this->num_ignored_events = 0;

  // Default values in the case of no parallel
  size_t loadBlockSize = Mantid::Kernel::DEFAULT_BLOCK_SIZE * 2;

  shortest_tof = static_cast<double>(MAX_TOF_UINT32) * TOF_CONVERSION;
  longest_tof = 0.;

  // Initialize progress reporting.
  size_t numBlocks = (max_events + loadBlockSize - 1) / loadBlockSize;

  // We want to pad out empty pixels.
  detid2det_map detector_map;
  workspace->getInstrument()->getDetectors(detector_map);

  // -------------- Determine processing mode
  std::string procMode = getProperty("UseParallelProcessing");
  if (procMode == "Serial")
    parallelProcessing = false;
  else if (procMode == "Parallel")
    parallelProcessing = true;
  else {
    // Automatic determination. Loading serially (for me) is about 3 million
    // events per second,
    // (which is sped up by ~ x 3 with parallel processing, say 10 million per
    // second, e.g. 7 million events more per seconds).
    // compared to a setup time/merging time of about 10 seconds per million
    // detectors.
    double setUpTime = double(detector_map.size()) * 10e-6;
    parallelProcessing = ((double(max_events) / 7e6) > setUpTime);
    g_log.debug() << (parallelProcessing ? "Using" : "Not using")
                  << " parallel processing." << std::endl;
  }

  // determine maximum pixel id
  detid2det_map::iterator it;
  detid_max = 0; // seems like a safe lower bound
  for (it = detector_map.begin(); it != detector_map.end(); it++)
    if (it->first > detid_max)
      detid_max = it->first;

  // Pad all the pixels
  prog->report("Padding Pixels");
  this->pixel_to_wkspindex.reserve(
      detid_max + 1); // starting at zero up to and including detid_max
  // Set to zero
  this->pixel_to_wkspindex.assign(detid_max + 1, 0);
  size_t workspaceIndex = 0;
  for (it = detector_map.begin(); it != detector_map.end(); it++) {
    if (!it->second->isMonitor()) {
      this->pixel_to_wkspindex[it->first] = workspaceIndex;
      EventList &spec = workspace->getOrAddEventList(workspaceIndex);
      spec.addDetectorID(it->first);
      // Start the spectrum number at 1
      spec.setSpectrumNo(specid_t(workspaceIndex + 1));
      workspaceIndex += 1;
    }
  }

  // For slight speed up
  loadOnlySomeSpectra = (this->spectra_list.size() > 0);

  // Turn the spectra list into a map, for speed of access
  for (std::vector<int64_t>::iterator it = spectra_list.begin();
       it != spectra_list.end(); it++)
    spectraLoadMap[*it] = true;

  CPUTimer tim;

  // --------------- Create the partial workspaces
  // ------------------------------------------
  // Vector of partial workspaces, for parallel processing.
  std::vector<EventWorkspace_sptr> partWorkspaces;
  std::vector<DasEvent *> buffers;

  /// Pointer to the vector of events
  typedef std::vector<TofEvent> *EventVector_pt;
  /// Bare array of arrays of pointers to the EventVectors
  EventVector_pt **eventVectors;

  /// How many threads will we use?
  size_t numThreads = 1;
  if (parallelProcessing)
    numThreads = size_t(PARALLEL_GET_MAX_THREADS);

  partWorkspaces.resize(numThreads);
  buffers.resize(numThreads);
  eventVectors = new EventVector_pt *[numThreads];

  // cppcheck-suppress syntaxError
  PRAGMA_OMP( parallel for if (parallelProcessing) )
  for (int i = 0; i < int(numThreads); i++) {
    // This is the partial workspace we are about to create (if in parallel)
    EventWorkspace_sptr partWS;
    if (parallelProcessing) {
      prog->report("Creating Partial Workspace");
      // Create a partial workspace
      partWS = EventWorkspace_sptr(new EventWorkspace());
      // Make sure to initialize.
      partWS->initialize(1, 1, 1);
      // Copy all the spectra numbers and stuff (no actual events to copy
      // though).
      partWS->copyDataFrom(*workspace);
      // Push it in the array
      partWorkspaces[i] = partWS;
    } else
      partWS = workspace;

    // Allocate the buffers
    buffers[i] = new DasEvent[loadBlockSize];

    // For each partial workspace, make an array where index = detector ID and
    // value = pointer to the events vector
    eventVectors[i] = new EventVector_pt[detid_max + 1];
    EventVector_pt *theseEventVectors = eventVectors[i];
    for (detid_t j = 0; j < detid_max + 1; j++) {
      size_t wi = pixel_to_wkspindex[j];
      // Save a POINTER to the vector<tofEvent>
      theseEventVectors[j] = &partWS->getEventList(wi).getEvents();
    }
  }

  g_log.debug() << tim << " to create " << partWorkspaces.size()
                << " workspaces for parallel loading." << std::endl;

  prog->resetNumSteps(numBlocks, 0.1, 0.8);

  // ---------------------------------- LOAD THE DATA --------------------------
  PRAGMA_OMP( parallel for schedule(dynamic, 1) if (parallelProcessing) )
  for (int blockNum = 0; blockNum < int(numBlocks); blockNum++) {
    PARALLEL_START_INTERUPT_REGION

    // Find the workspace for this particular thread
    EventWorkspace_sptr ws;
    size_t threadNum = 0;
    if (parallelProcessing) {
      threadNum = PARALLEL_THREAD_NUMBER;
      ws = partWorkspaces[threadNum];
    } else
      ws = workspace;

    // Get the buffer (for this thread)
    DasEvent *event_buffer = buffers[threadNum];

    // Get the speeding-up array of vector<tofEvent> where index = detid.
    EventVector_pt *theseEventVectors = eventVectors[threadNum];

    // Where to start in the file?
    size_t fileOffset = first_event + (loadBlockSize * blockNum);
    // May need to reduce size of last (or only) block
    size_t current_event_buffer_size =
        (blockNum == int(numBlocks - 1))
            ? (max_events - (numBlocks - 1) * loadBlockSize)
            : loadBlockSize;

    // Load this chunk of event data (critical block)
    PARALLEL_CRITICAL(LoadEventPreNexus_fileAccess) {
      current_event_buffer_size = eventfile->loadBlockAt(
          event_buffer, fileOffset, current_event_buffer_size);
    }

    // This processes the events. Can be done in parallel!
    procEventsLinear(ws, theseEventVectors, event_buffer,
                     current_event_buffer_size, fileOffset);

    // Report progress
    prog->report("Load Event PreNeXus");

    PARALLEL_END_INTERUPT_REGION
  }
  PARALLEL_CHECK_INTERUPT_REGION
  g_log.debug() << tim << " to load the data." << std::endl;

  // ---------------------------------- MERGE WORKSPACES BACK TOGETHER
  // --------------------------
  if (parallelProcessing) {
    PARALLEL_START_INTERUPT_REGION
    prog->resetNumSteps(workspace->getNumberHistograms(), 0.8, 0.95);

    size_t memoryCleared = 0;
    MemoryManager::Instance().releaseFreeMemory();

    // Merge all workspaces, index by index.
    PARALLEL_FOR_NO_WSP_CHECK()
    for (int iwi = 0; iwi < int(workspace->getNumberHistograms()); iwi++) {
      size_t wi = size_t(iwi);

      // The output event list.
      EventList &el = workspace->getEventList(wi);
      el.clear(false);

      // How many events will it have?
      size_t numEvents = 0;
      for (size_t i = 0; i < numThreads; i++)
        numEvents += partWorkspaces[i]->getEventList(wi).getNumberEvents();
      // This will avoid too much copying.
      el.reserve(numEvents);

      // Now merge the event lists
      for (size_t i = 0; i < numThreads; i++) {
        EventList &partEl = partWorkspaces[i]->getEventList(wi);
        el += partEl.getEvents();
        // Free up memory as you go along.
        partEl.clear(false);
      }

      // With TCMalloc, release memory when you accumulate enough to make sense
      PARALLEL_CRITICAL(LoadEventPreNexus_trackMemory) {
        memoryCleared += numEvents;
        if (memoryCleared > 10000000) // ten million events = about 160 MB
        {
          MemoryManager::Instance().releaseFreeMemory();
          memoryCleared = 0;
        }
      }
      prog->report("Merging Workspaces");
    }
    // Final memory release
    MemoryManager::Instance().releaseFreeMemory();
    g_log.debug() << tim << " to merge workspaces together." << std::endl;
    PARALLEL_END_INTERUPT_REGION
  }
  PARALLEL_CHECK_INTERUPT_REGION

  // Delete the buffers for each thread.
  for (size_t i = 0; i < numThreads; i++) {
    delete[] buffers[i];
    delete[] eventVectors[i];
  }
  delete[] eventVectors;
  // delete [] pulsetimes;

  prog->resetNumSteps(3, 0.94, 1.00);

  // finalize loading
  prog->report("Deleting Empty Lists");
  if (loadOnlySomeSpectra)
    workspace->deleteEmptyLists();

  prog->report("Setting proton charge");
  this->setProtonCharge(workspace);
  g_log.debug() << tim << " to set the proton charge log." << std::endl;

  // Make sure the MRU is cleared
  workspace->clearMRU();

  // Now, create a default X-vector for histogramming, with just 2 bins.
  Kernel::cow_ptr<MantidVec> axis;
  MantidVec &xRef = axis.access();
  xRef.resize(2);
  xRef[0] = shortest_tof - 1; // Just to make sure the bins hold it all
  xRef[1] = longest_tof + 1;
  workspace->setAllX(axis);
  this->pixel_to_wkspindex.clear();

  g_log.information() << "Read " << this->num_good_events << " events + "
                      << this->num_error_events << " errors"
                      << ". Shortest TOF: " << shortest_tof
                      << " microsec; longest TOF: " << longest_tof
                      << " microsec." << std::endl;
}