Int_t mp102_readNtuplesFillHistosAndFit()
{

   // No nuisance for batch execution
   gROOT->SetBatch();

   // Perform the operation sequentially ---------------------------------------
   TChain inputChain("multiCore");
   inputChain.Add("mp101_multiCore_*.root");
   TH1F outHisto("outHisto", "Random Numbers", 128, -4, 4);
   {
      TimerRAII t("Sequential read and fit");
      inputChain.Draw("r >> outHisto");
      outHisto.Fit("gaus");
   }

   // We now go MP! ------------------------------------------------------------
   // TProcPool offers an interface to directly process trees and chains without
   // the need for the user to go through the low level implementation of a
   // map-reduce.

   // We adapt our parallelisation to the number of input files
   const auto nFiles = inputChain.GetListOfFiles()->GetEntries();


   // This is the function invoked during the processing of the trees.
   auto workItem = [](TTreeReader & reader) {
      TTreeReaderValue<Float_t> randomRV(reader, "r");
      auto partialHisto = new TH1F("outHistoMP", "Random Numbers", 128, -4, 4);
      while (reader.Next()) {
         partialHisto->Fill(*randomRV);
      }
      return partialHisto;
   };

   // Create the pool of processes
   TProcPool workers(nFiles);

   // Process the TChain
   {
      TimerRAII t("Parallel execution");
      TH1F *sumHistogram = workers.ProcTree(inputChain, workItem, "multiCore");
      sumHistogram->Fit("gaus", 0);
   }

   return 0;

}
Int_t mt102_readNtuplesFillHistosAndFit()
{

   // No nuisance for batch execution
   gROOT->SetBatch();

   // Perform the operation sequentially ---------------------------------------
   TChain inputChain("multiCore");
   inputChain.Add("mc101_multiCore_*.root");
   TH1F outHisto("outHisto", "Random Numbers", 128, -4, 4);
   {
      TimerRAII t("Sequential read and fit");
      inputChain.Draw("r >> outHisto");
      outHisto.Fit("gaus");
   }

   // We now go MT! ------------------------------------------------------------

   // The first, fundamental operation to be performed in order to make ROOT
   // thread-aware.
   ROOT::EnableMT();

   // We adapt our parallelisation to the number of input files
   const auto nFiles = inputChain.GetListOfFiles()->GetEntries();
   std::forward_list<UInt_t> workerIDs(nFiles);
   std::iota(std::begin(workerIDs), std::end(workerIDs), 0);


   // We define the histograms we'll fill
   std::vector<TH1F> histograms;
   histograms.reserve(nFiles);
   for (auto workerID : workerIDs){
      histograms.emplace_back(TH1F(Form("outHisto_%u", workerID), "Random Numbers", 128, -4, 4));
   }

   // We define our work item
   auto workItem = [&histograms](UInt_t workerID) {
      TFile f(Form("mc101_multiCore_%u.root", workerID));
      TNtuple *ntuple = nullptr;
      f.GetObject("multiCore", ntuple);
      auto &histo = histograms.at(workerID);
      for (UInt_t index = 0; index < ntuple->GetEntriesFast(); ++index) {
         ntuple->GetEntry(index);
         histo.Fill(ntuple->GetArgs()[0]);
      }
   };

   TH1F sumHistogram("SumHisto", "Random Numbers", 128, -4, 4);

   // Create the collection which will hold the threads, our "pool"
   std::vector<std::thread> workers;

   // We measure time here as well
   {
      TimerRAII t("Parallel execution");

      // Spawn workers
      // Fill the "pool" with workers
      for (auto workerID : workerIDs) {
         workers.emplace_back(workItem, workerID);
      }

      // Now join them
      for (auto&& worker : workers) worker.join();

      // And reduce
      std::for_each(std::begin(histograms), std::end(histograms),
                    [&sumHistogram](const TH1F & h) {
                       sumHistogram.Add(&h);
                    });

      sumHistogram.Fit("gaus",0);
   }

   return 0;

}