Exemplo n.º 1
0
void WWTMVAClassification( TString myMethodList = "", double mH=400., int njets, TString chan="el" ) 
{
    // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
    // if you use your private .rootrc, or run from a different directory, please copy the 
    // corresponding lines from .rootrc
    
    // methods to be processed can be given as an argument; use format:
    //
    // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
    //
    // if you like to use a method via the plugin mechanism, we recommend using
    // 
    // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
    // (an example is given for using the BDT as plugin (see below),
    // but of course the real application is when you write your own
    // method based)
    
    // this loads the library
    TMVA::Tools::Instance();
    
    //---------------------------------------------------------------
    // default MVA methods to be trained + tested
    std::map<std::string,int> Use;
    
    Use["Cuts"]            = 0;
    Use["CutsD"]           = 0;
    Use["CutsPCA"]         = 0;
    Use["CutsGA"]          = 0;
    Use["CutsSA"]          = 0;
    // ---
    Use["Likelihood"]      = 1;
    Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
    Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
    Use["LikelihoodKDE"]   = 0;
    Use["LikelihoodMIX"]   = 0;
    // ---
    Use["PDERS"]           = 0;
    Use["PDERSD"]          = 0;
    Use["PDERSPCA"]        = 0;
    Use["PDERSkNN"]        = 0; // depreciated until further notice
    Use["PDEFoam"]         = 0;
    // --
    Use["KNN"]             = 0;
    // ---
    Use["HMatrix"]         = 0;
    Use["Fisher"]          = 0;
    Use["FisherG"]         = 0;
    Use["BoostedFisher"]   = 0;
    Use["LD"]              = 1;
    // ---
    Use["FDA_GA"]          = 0;
    Use["FDA_SA"]          = 0;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    Use["FDA_MCMT"]        = 0;
    // ---
    Use["MLP"]             = 0; // this is the recommended ANN
    Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
    Use["CFMlpANN"]        = 0; // *** missing
    Use["TMlpANN"]         = 0; 
    // ---
    Use["SVM"]             = 0;
    // ---
    Use["BDT"]             = 1;
    Use["BDTD"]            = 0;
    Use["BDTG"]            = 0;
    Use["BDTB"]            = 0;
    // ---
    Use["RuleFit"]         = 0;
    // ---
    Use["Plugin"]          = 0;
    // ---------------------------------------------------------------
    
    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;
    
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
        
        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);
            
            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }
    
    // Create a new root output file.
    ///////TString outfileName( "TMVA.root" );
    char outfileName[192];
    sprintf(outfileName,"TMVA_%3.0f_nJ%i_%s.root",mH,njets,chan.Data());
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
    
    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory will
    // then run the performance analysis for you.
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weight/ 
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in 
    // front of the "Silent" argument in the option string
    char classifierName[192];
    sprintf(classifierName,"TMVAClassification_%3.0f_nJ%i_%s",mH,njets,chan.Data());
    TMVA::Factory *factory = new TMVA::Factory( classifierName, outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
    //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
    //                                           "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
    
    // If you wish to modify default settings 
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
    
    // Define the input variables that shall be used for the MVA training
    
    // leptonic W
    factory->AddVariable("WWpt := ptlvjj", 'F');
    factory->AddVariable("WWy := ylvjj", 'F');
    //factory->AddVariable("Wpt := W_pt", 'F');
    //factory->AddVariable("MET := event_met_pfmet", 'F');
    if (chan = "mu"){
        factory->AddVariable("LepCharge := W_muon_charge", 'F');
    }
    else if (chan = "el"){
        factory->AddVariable("LepCharge := W_electron_charge", 'F');
    }
    else{
        std::cout << "Invalid channel!" << std::endl;
        return;
    }
    // factory->AddVariable("J1QGL := JetPFCor_QGLikelihood[0]", 'F');
    // factory->AddVariable("J2QGL := JetPFCor_QGLikelihood[1]", 'F');
    
    factory->AddVariable("costheta1 := ang_ha", 'F');
    factory->AddVariable("costheta2 := ang_hb", 'F');
    factory->AddVariable("costhetaS := ang_hs", 'F');
    factory->AddVariable("Phi := ang_phi", 'F');
    factory->AddVariable("Phi2 := ang_phib", 'F');
    
    // You can add so-called "Spectator variables", which are not used in the MVA training, 
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    // input variables, the response values of all trained MVAs, and the spectator variables
    factory->AddSpectator("run := event_runNo", "I");
    factory->AddSpectator("lumi := event_lumi", "I");
    factory->AddSpectator("event := event_evtNo", "I");
    factory->AddSpectator("mjj := Mass2j_PFCor", "F");
    factory->AddSpectator("mlvjj := MassV2j_PFCor", "F");
    factory->AddSpectator("masslvjj := masslvjj", "F");
    //factory->AddSpectator("ggdevt := ggdevt", "F");
    //factory->AddSpectator("fit_mlvjj := fit_mlvjj", "F");
    
    
    
    // read training and test data
    char signalOutputName[192];
    sprintf(signalOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_HWWMH%3.0f_CMSSW428.root",chan.Data(),mH);
    TFile *input1 = TFile::Open( signalOutputName );
    //TFile *input1 = TFile::Open( "/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_mu_HWWMH400_CMSSW428.root");
    char backgroundOutputName[192];
    sprintf(backgroundOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_WpJ_CMSSW428.root",chan.Data());
    TFile *input2 = TFile::Open( backgroundOutputName );
    
    std::cout << "--- TMVAClassification : Using input file: " << input1->GetName() << std::endl;
    
    TTree *signal     = (TTree*)input1->Get("WJet");
    TTree *background = (TTree*)input2->Get("WJet");
    
    
    // global event weights per tree (see below for setting event-wise weights)
    Double_t signalWeight     = 1.0;
    Double_t backgroundWeight = 1.0;
    
    // ====== register trees ====================================================
    //
    // the following method is the prefered one:
    // you can add an arbitrary number of signal or background trees
    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );
    
    // To give different trees for training and testing, do as follows:
    //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
    //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
    
    // Use the following code instead of the above two or four lines to add signal and background 
    // training and test events "by hand"
    // NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    //      variable definition, but simply compute the expression before adding the event
    // 
    //    // --- begin ----------------------------------------------------------
    //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
    //    Float_t  treevars[4];
    //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
    //    for (Int_t i=0; i<signal->GetEntries(); i++) {
    //       signal->GetEntry(i);
    //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
    //       // add training and test events; here: first half is training, second is testing
    //       // note that the weight can also be event-wise	
    //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); 
    //       else                            factory->AddSignalTestEvent    ( vars, signalWeight ); 
    //    }
    //
    //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
    //    for (Int_t i=0; i<background->GetEntries(); i++) {
    //       background->GetEntry(i); 
    //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
    //       // add training and test events; here: first half is training, second is testing
    //       // note that the weight can also be event-wise	
    //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); 
    //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight ); 
    //    }
    //    // --- end ------------------------------------------------------------
    //
    // ====== end of register trees ==============================================
    
    
    // This would set individual event weights (the variables defined in the 
    // expression need to exist in the original TTree)
    //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
    //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
    // factory->SetBackgroundWeightExpression("weight");
    
    // Apply additional cuts on the signal and background samples (can be different)
    //   TCut mycuts = "abs(eta)>1.5"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    //    TCut mycutb = "abs(eta)>1.5"; // for example: TCut mycutb = "abs(var1)<0.5";
    

    char * mass4bodycut = "";
    if(njets==2) {
      if(chan.Contains("mu")) {
	if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170mu =====
	if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180mu
	if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190mu
	if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200mu
	if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250mu
	if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300mu
	if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350mu
	if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400mu
	if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450mu
	if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500mu
	if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550mu
	if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600mu
      }
      if(chan.Contains("el")) {
	if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170el =====
	if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180el
	if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190el
	if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200el
	if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250el
	if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300el
	if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350el
	if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400el
	if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450el
	if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500el
	if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550el
	if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600el
      }
    }

    if(njets==3) {
      if(chan.Contains("mu")) {
	if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170mu =====
	if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180mu
	if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190mu
	if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200mu
	if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250mu
	if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300mu
	if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350mu
	if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400mu
	if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450mu
	if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500mu
	if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550mu
	if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)"; // 3j600mu
      }
      if(chan.Contains("el")) {
	if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170el =====
	if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180el
	if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190el
	if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200el
	if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250el
	if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300el
	if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350el
	if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400el
	if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450el
	if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500el
	if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550el
	if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)";  // 3j600el
      }
    }

    char mycutschar[1000];
    sprintf(mycutschar,"ggdevt == %i &&(Mass2j_PFCor>65 && Mass2j_PFCor<95) && %s", njets, mass4bodycut);
    TCut mycuts (mycutschar);
    


    // tell the factory to use all remaining events in the trees after training for testing:
    factory->PrepareTrainingAndTestTree( mycuts, mycuts,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
    
    // If no numbers of events are given, half of the events in the tree are used for training, and 
    // the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
    // To also specify the number of testing events, use:
    //    factory->PrepareTrainingAndTestTree( mycut, 
    //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  
    
    // ---- Book MVA methods
    //
    // please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
    
    // Cut optimisation
    if (Use["Cuts"])
        factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
    
    if (Use["CutsD"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsD", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
    
    if (Use["CutsPCA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
    
    if (Use["CutsGA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
    
    if (Use["CutsSA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
    
    // Likelihood
    if (Use["Likelihood"])
        factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 
    
    // test the decorrelated likelihood
    if (Use["LikelihoodD"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", 
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); 
    
    if (Use["LikelihoodPCA"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", 
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 
    
    // test the new kernel density estimator
    if (Use["LikelihoodKDE"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", 
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 
    
    // test the mixed splines and kernel density estimator (depending on which variable)
    if (Use["LikelihoodMIX"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", 
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 
    
    // test the multi-dimensional probability density estimator
    // here are the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
    
    if (Use["PDERSkNN"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", 
                            "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
    
    if (Use["PDERSD"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", 
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
    
    if (Use["PDERSPCA"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", 
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
    
    // Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
                            "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );
    
    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
    // H-Matrix (chi2-squared) method
    if (Use["HMatrix"])
        factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); 
    
    // Fisher discriminant   
    if (Use["Fisher"])
        factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );
    
    // Fisher with Gauss-transformed input variables
    if (Use["FisherG"])
        factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
    
    // Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if (Use["BoostedFisher"])
        factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");
    
    // Linear discriminant (same as Fisher)
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );
    
    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
    
    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
    
    if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
    
    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
    
    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
    
    if (Use["FDA_MCMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
    
    // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" );
    
    if (Use["MLPBFGS"])
        factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" );
    
    
    // CF(Clermont-Ferrand)ANN
    if (Use["CFMlpANN"])
        factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  
    
    // Tmlp(Root)ANN
    if (Use["TMlpANN"])
        factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...
    
    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
    
    // Boosted Decision Trees
    if (Use["BDTG"]) // Gradient Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );
    
    if (Use["BDT"])  // Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
    
    if (Use["BDTB"]) // Bagging
        factory->BookMethod( TMVA::Types::kBDT, "BDTB", 
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
    
    if (Use["BDTD"]) // Decorrelation + Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDTD", 
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
    
    // RuleFit -- TMVA implementation of Friedman's method
    if (Use["RuleFit"])
        factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
    
    // For an example of the category classifier, see: TMVAClassificationCategory
    
    // --------------------------------------------------------------------------------------------------
    
    // As an example how to use the ROOT plugin mechanism, book BDT via
    // plugin mechanism
    if (Use["Plugin"]) {
        //
        // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
        //
        // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
        // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
        // 
        // or by telling the global plugin manager directly
        gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
        factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                            "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
    }
    
    // --------------------------------------------------------------------------------------------------
    
    // ---- Now you can tell the factory to train, test, and evaluate the MVAs
    
    // Train MVAs using the set of training events
    factory->TrainAllMethods();
    
    // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();
    
    // ----- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();    
    
    // --------------------------------------------------------------
    
    // Save the output
    outputFile->Close();
    
    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassification is done!" << std::endl;      
    
    delete factory;
    
    // Launch the GUI for the root macros
    if (!gROOT->IsBatch()) TMVAGui( outfileName );
    
}
void TMVAClassificationCategory()
{
    //---------------------------------------------------------------
    // Example for usage of different event categories with classifiers

    std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

    // This loads the library
    TMVA::Tools::Instance();

    // to get access to the GUI and all tmva macros
    TString tmva_dir(TString(gRootDir) + "/tmva");
    if(gSystem->Getenv("TMVASYS"))
        tmva_dir = TString(gSystem->Getenv("TMVASYS"));
    gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
    gROOT->ProcessLine(".L TMVAGui.C");


    bool batchMode = false;

    // Create a new root output file.
    TString outfileName( "TMVA.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object (see TMVAClassification.C for more information)

    std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
    if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

    // Define the input variables used for the MVA training
    factory->AddVariable( "var1", 'F' );
    factory->AddVariable( "var2", 'F' );
    factory->AddVariable( "var3", 'F' );
    factory->AddVariable( "var4", 'F' );

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    factory->AddSpectator( "eta" );

    // Load the signal and background event samples from ROOT trees
    TFile *input(0);
    TString fname( "" );
    if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root";
    else                 fname = "data/toy_sigbkg_categ_varoff.root";
    if (!gSystem->AccessPathName( fname )) {
        // first we try to find tmva_example.root in the local directory
        std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
        input = TFile::Open( fname );
    }

    if (!input) {
        std::cout << "ERROR: could not open data file: " << fname << std::endl;
        exit(1);
    }

    TTree *signal     = (TTree*)input->Get("TreeS");
    TTree *background = (TTree*)input->Get("TreeB");

    /// Global event weights per tree (see below for setting event-wise weights)
    Double_t signalWeight     = 1.0;
    Double_t backgroundWeight = 1.0;

    /// You can add an arbitrary number of signal or background trees
    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    // Tell the factory how to use the training and testing events
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

    // ---- Book MVA methods

    // Fisher discriminant
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

    // Likelihood
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                         "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // --- Categorised classifier
    TMVA::MethodCategory* mcat = 0;

    // The variable sets
    TString theCat1Vars = "var1:var2:var3:var4";
    TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

    // Fisher with categories
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
    mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" );
    mcat->AddMethod( "abs(eta)>1.3",  theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" );

    // Likelihood with categories
    TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
    mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // ----- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassificationCategory is done!" << std::endl;

    // Clean up
    delete factory;

    // Launch the GUI for the root macros
    if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 3
0
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // ---
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   // ---
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDERSkNN"]        = 0; // depreciated until further notice
   Use["PDEFoam"]         = 0;
   // --
   Use["KNN"]             = 0;
   // ---
   Use["HMatrix"]         = 0;
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0;
   Use["LD"]              = 0;
   // ---
   Use["FDA_GA"]          = 0;
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   // ---
   Use["MLP"]             = 0; // this is the recommended ANN
   Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
   Use["MLPBNN"]          = 0;  // recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // *** missing
   Use["TMlpANN"]         = 0;
   // ---
   Use["SVM"]             = 0;
   // ---
   Use["BDT"]             = 1;
   Use["BDTD"]            = 0;
   Use["BDTG"]            = 0;
   Use["BDTB"]            = 0;
   // ---
   Use["RuleFit"]         = 0;
   // ---
   Use["Plugin"]          = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 0;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//   factory->AddVariable( "myvar1 := var1+var2", 'F' );
//   factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
//   factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
//   factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
//   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
//   factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // read training and test data

factory->AddVariable("CScostheta",'F');

factory->AddVariable("ZRapidity",'F');

factory->AddVariable("REDmet",'F');
   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "tmva_example_sig.dat";
      TString datFileB = "tmva_example_bkg.dat";

      factory->SetInputTrees( datFileS, datFileB );
   }
   else {
      // load the signal and background event samples from ROOT trees
      TString fname = "./tmva_class_example.root";

      TString fname_Data7TeV_DoubleElectron2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_0.root";

      TString fname_Data7TeV_MuEG2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_0.root";

      TString fname_ZH125 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH125.root";

      TString fname_SingleT_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_tW.root";

      TString fname_SingleT_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_s.root";

      TString fname_Data7TeV_DoubleMu2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_0.root";

      TString fname_Data7TeV_DoubleElectron2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_0.root";

      TString fname_ZH135 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH135.root";

      TString fname_DYJetsToLL = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//DYJetsToLL.root";

      TString fname_ZH115 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH115.root";

      TString fname_SingleTbar_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_t.root";

      TString fname_Data7TeV_DoubleElectron2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_1.root";

      TString fname_Data7TeV_DoubleMu2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_1.root";

      TString fname_Data7TeV_MuEG2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_1.root";

      TString fname_TTJets = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//TTJets.root";

      TString fname_SingleTbar_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_s.root";

      TString fname_WJetsToLNu = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WJetsToLNu.root";

      TString fname_Data7TeV_DoubleElectron2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_1.root";

      TString fname_ZZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZZ.root";

      TString fname_ZH150 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH150.root";

      TString fname_Data7TeV_MuEG2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_1.root";

      TString fname_WW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WW.root";

      TString fname_ZH105 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH105.root";

      TString fname_Data7TeV_DoubleMu2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_0.root";

      TString fname_SingleTbar_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_tW.root";

      TString fname_WZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WZ.root";

      TString fname_Data7TeV_MuEG2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_0.root";

      TString fname_Data7TeV_DoubleMu2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_1.root";

      TString fname_ZH145 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH145.root";

      TString fname_SingleT_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_t.root";


      if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
         gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");

      TFile *input_Data7TeV_DoubleElectron2011B_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_0 );

      TFile *input_Data7TeV_MuEG2011B_0 = TFile::Open( fname_Data7TeV_MuEG2011B_0 );

      TFile *input_ZH125 = TFile::Open( fname_ZH125 );

      TFile *input_SingleT_tW = TFile::Open( fname_SingleT_tW );

      TFile *input_SingleT_s = TFile::Open( fname_SingleT_s );

      TFile *input_Data7TeV_DoubleMu2011B_0 = TFile::Open( fname_Data7TeV_DoubleMu2011B_0 );

      TFile *input_Data7TeV_DoubleElectron2011A_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_0 );

      TFile *input_ZH135 = TFile::Open( fname_ZH135 );

      TFile *input_DYJetsToLL = TFile::Open( fname_DYJetsToLL );

      TFile *input_ZH115 = TFile::Open( fname_ZH115 );

      TFile *input_SingleTbar_t = TFile::Open( fname_SingleTbar_t );

      TFile *input_Data7TeV_DoubleElectron2011B_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_1 );

      TFile *input_Data7TeV_DoubleMu2011A_1 = TFile::Open( fname_Data7TeV_DoubleMu2011A_1 );

      TFile *input_Data7TeV_MuEG2011A_1 = TFile::Open( fname_Data7TeV_MuEG2011A_1 );

      TFile *input_TTJets = TFile::Open( fname_TTJets );

      TFile *input_SingleTbar_s = TFile::Open( fname_SingleTbar_s );

      TFile *input_WJetsToLNu = TFile::Open( fname_WJetsToLNu );

      TFile *input_Data7TeV_DoubleElectron2011A_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_1 );

      TFile *input_ZZ = TFile::Open( fname_ZZ );

      TFile *input_ZH150 = TFile::Open( fname_ZH150 );

      TFile *input_Data7TeV_MuEG2011B_1 = TFile::Open( fname_Data7TeV_MuEG2011B_1 );

      TFile *input_WW = TFile::Open( fname_WW );

      TFile *input_ZH105 = TFile::Open( fname_ZH105 );

      TFile *input_Data7TeV_DoubleMu2011A_0 = TFile::Open( fname_Data7TeV_DoubleMu2011A_0 );

      TFile *input_SingleTbar_tW = TFile::Open( fname_SingleTbar_tW );

      TFile *input_WZ = TFile::Open( fname_WZ );

      TFile *input_Data7TeV_MuEG2011A_0 = TFile::Open( fname_Data7TeV_MuEG2011A_0 );

      TFile *input_Data7TeV_DoubleMu2011B_1 = TFile::Open( fname_Data7TeV_DoubleMu2011B_1 );

      TFile *input_ZH145 = TFile::Open( fname_ZH145 );

      TFile *input_SingleT_t = TFile::Open( fname_SingleT_t );


      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_0 file: " << input_Data7TeV_DoubleElectron2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_0 file: " << input_Data7TeV_MuEG2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH125 file: " << input_ZH125->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_tW file: " << input_SingleT_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_s file: " << input_SingleT_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_0 file: " << input_Data7TeV_DoubleMu2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_0 file: " << input_Data7TeV_DoubleElectron2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH135 file: " << input_ZH135->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_DYJetsToLL file: " << input_DYJetsToLL->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH115 file: " << input_ZH115->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_t file: " << input_SingleTbar_t->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_1 file: " << input_Data7TeV_DoubleElectron2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_1 file: " << input_Data7TeV_DoubleMu2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_1 file: " << input_Data7TeV_MuEG2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_TTJets file: " << input_TTJets->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_s file: " << input_SingleTbar_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WJetsToLNu file: " << input_WJetsToLNu->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_1 file: " << input_Data7TeV_DoubleElectron2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZZ file: " << input_ZZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH150 file: " << input_ZH150->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_1 file: " << input_Data7TeV_MuEG2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WW file: " << input_WW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH105 file: " << input_ZH105->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_0 file: " << input_Data7TeV_DoubleMu2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_tW file: " << input_SingleTbar_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WZ file: " << input_WZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_0 file: " << input_Data7TeV_MuEG2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_1 file: " << input_Data7TeV_DoubleMu2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH145 file: " << input_ZH145->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_t file: " << input_SingleT_t->GetName() << std::endl;


      TTree *signal_ZH145     = (TTree*)input_ZH145->Get("tmvatree");

      TTree *background_ZZ = (TTree*)input_ZZ->Get("tmvatree");


      // global event weights per tree (see below for setting event-wise weights)
      Double_t signalWeight     = 1.0;
      Double_t backgroundWeight = 1.0;

      // ====== register trees ====================================================
      //
      // the following method is the prefered one:
      // you can add an arbitrary number of signal or background trees
      factory->AddSignalTree    ( signal_ZH145,     1.0     );

      factory->AddBackgroundTree( background_ZZ, 1.0 );


      // To give different trees for training and testing, do as follows:
      //    factory->AddSignalTree( signal_ZH145TrainingTree, signal_ZH145TrainWeight, "Training" );

      //    factory->AddSignalTree( signal_ZH145TestTree,     signal_ZH145TestWeight,  "Test" );


      // Use the following code instead of the above two or four lines to add signal and background
      // training and test events "by hand"
      // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
      //      variable definition, but simply compute the expression before adding the event
      //
      //    // --- begin ----------------------------------------------------------
      //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
      //    Float_t  treevars[4];
      //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<signal->GetEntries(); i++) {
      //       signal->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight );
      //       else                            factory->AddSignalTestEvent    ( vars, signalWeight );
      //    }
      //
      //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<background->GetEntries(); i++) {
      //       background->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight );
      //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight );
      //    }
      //    // --- end ------------------------------------------------------------
      //
      // ====== end of register trees ==============================================
   }

   // This would set individual event weights (the variables defined in the
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetBackgroundWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");
   factory->SetSignalWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");

   // Apply additional cuts on the signal and background samples (can be different)
TCut mycuts = "(CUT>2)";
TCut mycutb = "(CUT>2)";

   // tell the factory to use all remaining events in the trees after training for testing:

 factory->PrepareTrainingAndTestTree( mycuts, "SplitMode=random:!V" );
//                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:Seed=0:EffSel:Steps=50:Cycles=3:PopSize=1000:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // test the decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // test the new kernel density estimator
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSkNN"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN",
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Fisher discriminant
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");

   // Linear discriminant (same as Fisher)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // As an example how to use the ROOT plugin mechanism, book BDT via
   // plugin mechanism
   if (Use["Plugin"]) {
         //
         // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
         //
         // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
         // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
         //
         // or by telling the global plugin manager directly
      gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
      factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                           "!H:!V:NTrees=1000:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
   }

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros

gROOT->ProcessLine(".q;");
}
Exemplo n.º 4
0
void trainMVACat()
{
  char name[1000];
  float XSEC[6] = {3.67e+5,2.94e+4,6.524e+03,1.064e+03,121.5,2.542e+01};
  float NORM[6];
  TCut preselectionCut = "ht>400 && jetPt[5]>40 && (triggerBit[0] || triggerBit[2]) && nBJets>1 && nLeptons==0";
  TFile *bkgSrc[6];
  bkgSrc[0] = TFile::Open("flatTree_QCD_HT300to500.root");
  bkgSrc[1] = TFile::Open("flatTree_QCD_HT500to700.root");
  bkgSrc[2] = TFile::Open("flatTree_QCD_HT700to1000.root");
  bkgSrc[3] = TFile::Open("flatTree_QCD_HT1000to1500.root");
  bkgSrc[4] = TFile::Open("flatTree_QCD_HT1500to2000.root");
  bkgSrc[5] = TFile::Open("flatTree_QCD_HT2000toInf.root");

  TFile *sigSrc = TFile::Open("flatTree_ttHJetTobb_M125.root");
  //TFile *sigSrc = TFile::Open("flatTree_TT.root");
  TTree *sigTree = (TTree*)sigSrc->Get("hadtop/events"); 
  TTree *bkgTree[6];
  
  
  TFile *outf = new TFile("mva_Cat_QCD.root","RECREATE");
  TMVA::Factory* factory = new TMVA::Factory("factory_mva_Cat_QCD_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification");
  factory->AddSignalTree(sigTree);

  for(int k=0;k<6;k++) {
    NORM[k] = ((TH1F*)bkgSrc[k]->Get("hadtop/pileup"))->GetEntries();
    bkgTree[k] = (TTree*)bkgSrc[k]->Get("hadtop/events");
    factory->AddBackgroundTree(bkgTree[k],XSEC[k]/NORM[k]);
  }
  
  //int N_SIG(sigTree->GetEntries(preselectionCut));
  
  //int N_BKG0(bkgTree[0]->GetEntries(preselectionCut));
  //int N_BKG1(bkgTree[1]->GetEntries(preselectionCut));
  //int N_BKG2(bkgTree[2]->GetEntries(preselectionCut));
  //int N_BKG3(bkgTree[3]->GetEntries(preselectionCut));

  //float N_BKG_EFF = N_BKG0*XSEC[0]/NORM[0]+N_BKG1*XSEC[1]/NORM[1]+N_BKG2*XSEC[2]/NORM[2]+N_BKG3*XSEC[3]/NORM[3];
  
  //int N = TMath::Min((float)N_SIG,N_BKG_EFF);

  //cout<<N_SIG<<" "<<N_BKG_EFF<<endl;
  
  const int NVAR = 21;
  TString VAR[NVAR] = {
    "nJets",
    //"nBJets",
    "ht",
    "jetPt[0]","jetPt[1]","jetPt[2]","jetPt[3]","jetPt[4]","jetPt[5]",
    "mbbMin","dRbbMin",
    //"dRbbAve","mbbAve",
    //"btagAve","btagMax","btagMin",
    //"qglAve","qglMin","qglMedian",
    "sphericity","aplanarity","foxWolfram[0]","foxWolfram[1]","foxWolfram[2]","foxWolfram[3]",
    "mTop[0]","ptTTbar","mTTbar","dRbbTop","chi2"
  };
  char TYPE[NVAR] = {
    'I',
    //'I',
    'F',
    'F','F','F','F','F','F', 
    'F','F',
    //'F','F',
    //'F','F','F',
    //'F','F','F',
    'F','F','F','F','F','F', 
    'F','F','F','F','F'
  };

  for(int i=0;i<NVAR;i++) {
    factory->AddVariable(VAR[i],TYPE[i]);
  }

  factory->AddSpectator("status",'I');
  factory->AddSpectator("nBJets",'I');

  sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",-1,-1,-1,-1);
  factory->PrepareTrainingAndTestTree(preselectionCut,name);

  TMVA::IMethod* BDT_Category = factory->BookMethod( TMVA::Types::kCategory,"BDT_Category");
  TMVA::MethodCategory* mcategory_BDT = dynamic_cast<TMVA::MethodCategory*>(BDT_Category); 

  mcategory_BDT->AddMethod("status == 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kBDT,
                      "BDT_Cat1",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status == 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kBDT,
                      "BDT_Cat2",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status < 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kBDT,
                      "BDT_Cat3",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status < 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kBDT,
                      "BDT_Cat4",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  TMVA::IMethod* Fisher_Category = factory->BookMethod( TMVA::Types::kCategory,"Fisher_Category");
  TMVA::MethodCategory* mcategory_Fisher = dynamic_cast<TMVA::MethodCategory*>(Fisher_Category);
  
  mcategory_Fisher->AddMethod("status == 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat1","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status == 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat2","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status < 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat3","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status < 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat4","H:!V:Fisher");

  // specify the training methods
  //factory->BookMethod(TMVA::Types::kFisher,"Fisher");
  //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD_2000","NTrees=2000:BoostType=Grad:Shrinkage=0.1");
  
  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods(); 
  outf->Close();
}
Exemplo n.º 5
0
void TMVARegression( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 1; 
   Use["KNN"]             = 1;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 1;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 1; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( "TMVAReg.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "var1", "Variable 1", "units", 'F' );
   factory->AddVariable( "var2", "Variable 2", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
   factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // Add the variable carrying the regression target
   factory->AddTarget( "fvalue" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname = "./tmva_reg_example.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("TreeR");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddRegressionTree( regTree, regWeight );

   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   factory->SetWeightExpression( "var1", "Regression" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

   if (Use["BDTG"])
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" );
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Exemplo n.º 6
0
void TMVAClassification_qgl( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Fisher"]          = 0;
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   // --- Boosted Decision Trees
   Use["BDTB"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTG"]            = 0; 
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 1;
   Use["LikelihoodMIX"]   = 0;
   // 
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;
   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
 //  TString outfileName( "TMVA_bjet_new_powheg.root" );
 
	int set_type=1;
	TString type[2]  = {"_double","_single"};

   TString outfileName( "TMVA_qgl"+type[set_type]+"_2jet_1.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
  // factory->AddVariable( "myvar1 := var1+var2", 'F' );
  // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   factory->AddVariable( "Jet_pt", "Jet p_{T}", "GeV", 'F' );
   factory->AddVariable( "Jet_eta", "Jet #eta", "", 'F' );
   factory->AddVariable( "Jet_ptd", "Jet ptd", "", 'F' );
   factory->AddVariable( "Jet_axis2", "Jet axis2", "", 'F' );
   factory->AddVariable( "Jet_mult", "Jet multiplicity", "", 'I' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
  // factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  // factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //
	TString fname_signal ="/afs/cern.ch/work/n/nchernya/Hbb/qgl_mva/qgl_tmva_tree_VBFHToBB_M-125_13TeV_powheg_v14"+type[set_type]+"_2jets.root";
	TString fname_bg ="/afs/cern.ch/work/n/nchernya/Hbb/qgl_mva/qgl_tmva_tree_BTagCSV_v14"+type[set_type]+"_2jets.root";

   if (gSystem->AccessPathName( fname_signal )) { // file does not exist in local directory
		cout<<"input file "<< fname_signal<<" doesn't exist!"<<endl;
		//break;
	}
   if (gSystem->AccessPathName( fname_bg)) { // file does not exist in local directory
		cout<<"input file "<< fname_bg<<" doesn't exist!"<<endl;
		//break;
	}
    //  gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *input_signal = TFile::Open( fname_signal );
   TFile *input_bg = TFile::Open( fname_bg );
   
   std::cout << "--- TMVAClassification       : Using input signal file: " << input_signal->GetName() << std::endl;
   std::cout << "--- TMVAClassification       : Using input bg file: " << input_bg->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal     = (TTree*)input_signal->Get("QGL_1");
   TTree *bg = (TTree*)input_bg->Get("QGL_1");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t bgWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight  );
   factory->AddBackgroundTree( bg, bgWeight );

   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators


   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTG"]) //
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
		   "!H:!V:NTrees=120:MinNodeSize=6%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:NegWeightTreatment=IgnoreNegWeightsInTraining" );
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 
   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
void TMVAClassification_cc1pcoh_bdt_verFF( TString myMethodList = "" )
{
    //---------------------------------------------------------------
    // This loads the library
    TMVA::Tools::Instance();

    // to get access to the GUI and all tmva macros
    TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
    gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
    gROOT->ProcessLine(".L TMVAGui.C");

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    // --- Cut optimisation
    Use["Cuts"]            = 1;
    Use["CutsD"]           = 1;
    Use["CutsPCA"]         = 0;
    Use["CutsGA"]          = 0;
    Use["CutsSA"]          = 0;
    //
    // --- 1-dimensional likelihood ("naive Bayes estimator")
    Use["Likelihood"]      = 1;
    Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
    Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
    Use["LikelihoodKDE"]   = 0;
    Use["LikelihoodMIX"]   = 0;
    //
    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 1;
    Use["PDERSD"]          = 0;
    Use["PDERSPCA"]        = 0;
    Use["PDEFoam"]         = 1;
    Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
    Use["KNN"]             = 1; // k-nearest neighbour method
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1; // Linear Discriminant identical to Fisher
    Use["Fisher"]          = 0;
    Use["FisherG"]         = 0;
    Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
    Use["HMatrix"]         = 0;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
    Use["FDA_SA"]          = 0;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    Use["FDA_MCMT"]        = 0;
    //
    // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
    Use["MLP"]             = 0; // Recommended ANN
    Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
    Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
    Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
    Use["TMlpANN"]         = 0; // ROOT's own ANN
    //
    // --- Support Vector Machine
    Use["SVM"]             = 1;
    //
    // --- Boosted Decision Trees
    Use["BDT"]             = 1; // uses Adaptive Boost
    Use["BDTG"]            = 0; // uses Gradient Boost
    Use["BDTB"]            = 0; // uses Bagging
    Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
    Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
    //
    // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
    Use["RuleFit"]         = 1;
    
    // ---------------------------------------------------------------
    // Choose method
    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    // ---------------------------------------------------------------
    // --- Here the preparation phase begins

    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVA_cc1pcoh_bdt_verFF.root" );//newchange
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object.
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_verFF", outputFile,//newchange
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

    
    // Add variable
    //sprintf(select,  "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34");
    factory->AddVariable( "mumucl", 'F' );
    factory->AddVariable( "pmucl", 'F' );
    factory->AddVariable( "pang_t", 'F' );//this is fixed with backward
    factory->AddVariable( "muang_t", 'F' );
    factory->AddVariable( "ppe", 'F');
    factory->AddVariable( "mupe", 'F');
    factory->AddVariable( "rangetot", 'F');//total range in PM and INGRID
    factory->AddVariable( "prangetot", 'F');//total range in PM and INGRID
    factory->AddVariable( "coplanarity", 'F');
    factory->AddVariable( "opening", 'F');//newadd

    // Add spectator
    factory->AddSpectator( "fileIndex", 'I' );
    factory->AddSpectator( "nuE", 'F' );
    factory->AddSpectator( "inttype", 'I' );
    factory->AddSpectator( "norm", 'F' );
    factory->AddSpectator( "totcrsne", 'F' );
    factory->AddSpectator( "veract", 'F' );

    // ---------------------------------------------------------------
    // --- Get weight
    TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root";
    
    
    
    // ---------------------------------------------------------------
    // --- Add sample
    TString fsignalStr="/home/cvson/cc1picoh/dataProcess/fix20150420/pm_merged_ccqe_tot.root";
    TString fbarStr="/home/cvson/cc1picoh/dataProcess/fix20150420/pmbar_merged_ccqe.root";
    TString fbkgStr="/home/cvson/cc1picoh/dataProcess/fix20150420/wall_merged_ccqe_tot.root";
    TString fbkg2Str="/home/cvson/cc1picoh/dataProcess/fix20150420/ingrid_merged_nd3_ccqe_tot.root";
    

    
    TFile *pfileSignal = new TFile(fsignalStr);
    TFile *pfileBar = new TFile(fbarStr);
    TFile *pfileBkg = new TFile(fbkgStr);
    TFile *pfileBkg2 = new TFile(fbkg2Str);
    TFile *pfileRatio = new TFile(fratioStr);
    
    TTree *ptree_sig  = (TTree*)pfileSignal->Get("tree");
    TTree *ptree_bar  = (TTree*)pfileBar->Get("tree");
    TTree *ptree_bkg   = (TTree*)pfileBkg->Get("tree");
    TTree *ptree_bkg2  = (TTree*)pfileBkg2->Get("tree");
    
    // POT normalization
    const int   nmcFile  = 3950;
    const int   nbarFile  = 986;
    const int   nbkgFile  = 55546;//(31085+24461);
    const int   nbkg2File  = 7882;//(3941+3941);
    

   
    // global event weights per tree (see below for setting event-wise weights)
    // adding for signal sample
    // using this as standard and add other later
    Double_t signalWeight_sig     = 1.0;
    Double_t backgroundWeight_sig = 1.0;
   
    factory->AddSignalTree    ( ptree_sig,     signalWeight_sig );
    factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig );
    
    // Add Numubar sample
    //Double_t signalWeight_bar     = nmcFile/float(nbarFile);
    Double_t backgroundWeight_bar = nmcFile/float(nbarFile);
    
    //factory->AddSignalTree    ( ptree_bar,     signalWeight_bar );
    factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar );
    
    // Add wall background
    //Double_t signalWeight_bkg     = nmcFile/float(nbkgFile);
    Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile);
    
    //factory->AddSignalTree    ( ptree_bkg,     signalWeight_bkg );
    factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg );
    
    // Add INGRID background
    //Double_t signalWeight_bkg2     = nmcFile/float(nbkg2File);
    Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File);
    
    //factory->AddSignalTree    ( ptree_bkg2,     signalWeight_bkg2 );
    factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 );
    
   
   
    //factory->SetSignalWeightExpression    ("norm*totcrsne*2.8647e-13");
    //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1)"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   /*if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/
    if (Use["BDT"])  // Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                            "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    
   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 8
0
void TMVAClassification( TString myMethodList = "" ) 
{
  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
  // if you use your private .rootrc, or run from a different directory, please copy the 
  // corresponding lines from .rootrc

  // methods to be processed can be given as an argument; use format:
  //
  // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
  //
  // if you like to use a method via the plugin mechanism, we recommend using
  // 
  // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
  // (an example is given for using the BDT as plugin (see below),
  // but of course the real application is when you write your own
  // method based)

  // this loads the library
  TMVA::Tools::Instance();

  //---------------------------------------------------------------
  // default MVA methods to be trained + tested
  std::map<std::string,int> Use;

  Use["Cuts"]            = 1;
  Use["CutsD"]           = 1;
  Use["CutsPCA"]         = 1;
  Use["CutsGA"]          = 1;
  Use["CutsSA"]          = 1;
  // ---
  Use["Likelihood"]      = 1;
  Use["LikelihoodD"]     = 1; // the "D" extension indicates decorrelated input variables (see option strings)
  Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
  Use["LikelihoodKDE"]   = 1;
  Use["LikelihoodMIX"]   = 1;
  // ---
  Use["PDERS"]           = 1;
  Use["PDERSD"]          = 1;
  Use["PDERSPCA"]        = 1;
  Use["PDERSkNN"]        = 1; // depreciated until further notice
  Use["PDEFoam"]         = 1;
  // --
  Use["KNN"]             = 1;
  // ---
  Use["HMatrix"]         = 1;
  Use["Fisher"]          = 1;
  Use["FisherG"]         = 1;
  Use["BoostedFisher"]   = 1;
  Use["LD"]              = 1;
  // ---
  Use["FDA_GA"]          = 1;
  Use["FDA_SA"]          = 1;
  Use["FDA_MC"]          = 1;
  Use["FDA_MT"]          = 1;
  Use["FDA_GAMT"]        = 1;
  Use["FDA_MCMT"]        = 1;
  // ---
  Use["MLP"]             = 1; // this is the recommended ANN
  Use["MLPBFGS"]         = 1; // recommended ANN with optional training method
  Use["CFMlpANN"]        = 1; // *** missing
  Use["TMlpANN"]         = 1; 
  // ---
  Use["SVM"]             = 1;
  // ---
  Use["BDT"]             = 1;
  Use["BDTD"]            = 0;
  Use["BDTG"]            = 1;
  Use["BDTB"]            = 0;
  // ---
  Use["RuleFit"]         = 1;
  // ---
  Use["Plugin"]          = 0;
  // ---------------------------------------------------------------

  std::cout << std::endl;
  std::cout << "==> Start TMVAClassification" << std::endl;

  if (myMethodList != "") {
    for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

    std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
    for (UInt_t i=0; i<mlist.size(); i++) {
      std::string regMethod(mlist[i]);

      if (Use.find(regMethod) == Use.end()) {
        std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
        std::cout << std::endl;
        return;
      }
      Use[regMethod] = 1;
    }
  }

  // Create a new root output file.
  TString outfileName = "TMVA";
  int toAppNum = 1;
  char name[1024];

  TSystemDirectory dir("",".");
  TList *files = dir.GetListOfFiles();
  vector<string> vfname;
  if (files) {
     TIter next(files);
     TSystemFile *file;
     TString fname;
     
     while ((file=(TSystemFile*)next())) {
        fname = file->GetName();
        if (!file->IsDirectory() && fname.EndsWith(".root") && fname.BeginsWith("TMVA")) {
           vfname.push_back(string(fname));
        }
      }
     delete files;
     if (vfname.size()>0) {
        std::sort(vfname.begin(),vfname.end());
        TString num = TString(vfname[vfname.size()-1]);
        num.ReplaceAll(".root","");
        num.ReplaceAll("TMVA","");
        toAppNum = num.Atoi()+1;
     }
  }
  sprintf(name,"%d",toAppNum);
  outfileName = outfileName + name + ".root";
  //TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory will
  // then run the performance analysis for you.
  //
  // The first argument is the base of the name of all the
  // weightfiles in the directory weight/ 
  //
  // The second argument is the output file for the training results
  // All TMVA output can be suppressed by removing the "!" (not) in 
  // front of the "Silent" argument in the option string
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                              "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

  // If you wish to modify default settings 
  // (please check "src/Config.h" to see all available global options)
  //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
  //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";


  // Define the input variables that shall be used for the MVA training
  factory->AddVariable("tEventProbSig := tEventProb[0]+tEventProb[1]+tEventProb[2]","F",0,0);
  factory->AddVariable("tEventProbBkg := tEventProb","F",0,0);
  
  // You can add so-called "Spectator variables", which are not used in the MVA training, 
  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
  // input variables, the response values of all trained MVAs, and the spectator variables
  factory->AddSpectator("run := m_run", "I");
  factory->AddSpectator("event := m_event", "I");


  // read training and test data
  TFile *input1 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWW_EPDv01.root" );
  TFile *input2 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWZ_EPDv01.root" );
  TFile *input3 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWJets_EPDv01.root" );
  TFile *input4 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microZJets_EPDv01.root" );
  TFile *input5 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microTTbar_EPDv01.root" );
  TFile *input6 = TFile::Open( "/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microHWWMH150_EPDv01.root" );

 
  std::cout << "--- TMVAClassification : Using input file: " << input1->GetName() << std::endl;
  std::cout << "--- TMVAClassification : Using input file: " << input2->GetName() << std::endl;
  std::cout << "--- TMVAClassification : Using input file: " << input3->GetName() << std::endl;
  std::cout << "--- TMVAClassification : Using input file: " << input4->GetName() << std::endl;
  std::cout << "--- TMVAClassification : Using input file: " << input5->GetName() << std::endl;
  std::cout << "--- TMVAClassification : Using input file: " << input6->GetName() << std::endl;
   
  //TTree *signal     = (TTree*)input1->Get("WJet");
  //TTree *background = (TTree*)input3->Get("WJet");
  TChain *signal = new TChain("METree");
  signal->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microHWWMH150_EPDv01.root");
  TChain *background = new TChain("METree"); 
  background->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWW_EPDv01.root");
  background->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWZ_EPDv01.root");
  background->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microWJets_EPDv01.root");
  background->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microZJets_EPDv01.root");
  background->Add("/uscms/home/ilyao/nobackup/Spring12ME7TeV/MEResults/microNtuples/microTTbar_EPDv01.root");

  // global event weights per tree (see below for setting event-wise weights)
  Double_t signalWeight     = 1.0;
  Double_t backgroundWeight = 1.0;

  // ====== register trees ====================================================
  //
  // the following method is the prefered one:
  // you can add an arbitrary number of signal or background trees
  factory->AddSignalTree    ( signal,     signalWeight     );
  factory->AddBackgroundTree( background, backgroundWeight );

  // To give different trees for training and testing, do as follows:
  //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
  //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

  // Use the following code instead of the above two or four lines to add signal and background 
  // training and test events "by hand"
  // NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
  //      variable definition, but simply compute the expression before adding the event
  // 
  //    // --- begin ----------------------------------------------------------
  //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
  //    Float_t  treevars[4];
  //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
  //    for (Int_t i=0; i<signal->GetEntries(); i++) {
  //       signal->GetEntry(i);
  //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
  //       // add training and test events; here: first half is training, second is testing
  //       // note that the weight can also be event-wise	
  //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); 
  //       else                            factory->AddSignalTestEvent    ( vars, signalWeight ); 
  //    }
  //
  //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
  //    for (Int_t i=0; i<background->GetEntries(); i++) {
  //       background->GetEntry(i); 
  //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
  //       // add training and test events; here: first half is training, second is testing
  //       // note that the weight can also be event-wise	
  //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); 
  //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight ); 
  //    }
  //    // --- end ------------------------------------------------------------
  //
  // ====== end of register trees ==============================================

   
  // This would set individual event weights (the variables defined in the 
  // expression need to exist in the original TTree)
  //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
  //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
  // factory->SetBackgroundWeightExpression("weight");

  // Apply additional cuts on the signal and background samples (can be different)
  //   TCut mycuts = "abs(eta)>1.5"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
  //   TCut mycutb = "abs(eta)>1.5"; // for example: TCut mycutb = "abs(var1)<0.5";

  //Acceptance/Base cuts
  //TCut goodW("W_electron_et>30. && TMath::Abs(W_electron_eta)<2.5 && event_met_pfmet>25.");
  TCut goodW("W_muon_pt>25. && TMath::Abs(W_muon_eta)<2.5 && event_met_pfmet>25.");
  TCut twojets("numPFCorJets==2");
  TCut jetPt("JetPFCor_Pt[0]>30. && JetPFCor_Pt[1]>30.");
  TCut jetEta("TMath::Abs(JetPFCor_Eta[0])<2.5 && TMath::Abs(JetPFCor_Eta[1])<2.5");
  TCut deltaR1("TMath::Sqrt(TMath::Power(TMath::Abs(TMath::Abs(TMath::Abs(JetPFCor_Phi[0]-W_muon_phi)-TMath::Pi())-TMath::Pi()),2)+TMath::Power(JetPFCor_Eta[0]-W_muon_eta,2))>0.5");
  TCut deltaR2("TMath::Sqrt(TMath::Power(TMath::Abs(TMath::Abs(TMath::Abs(JetPFCor_Phi[1]-W_muon_phi)-TMath::Pi())-TMath::Pi()),2)+TMath::Power(JetPFCor_Eta[1]-W_muon_eta,2))>0.5");
  TCut noBJets("numPFCorJetBTags==0");
  TCut null("");

  //TCut mycuts (goodW && twojets && jetPt && jetEta && deltaR1 && deltaR2 && noBJets);
  TCut mycuts (null);

  // tell the factory to use all remaining events in the trees after training for testing:
  factory->PrepareTrainingAndTestTree( mycuts, mycuts,
                                       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

  // If no numbers of events are given, half of the events in the tree are used for training, and 
  // the other half for testing:
  //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
  // To also specify the number of testing events, use:
  //    factory->PrepareTrainingAndTestTree( mycut, 
  //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

  // ---- Book MVA methods
  //
  // please lookup the various method configuration options in the corresponding cxx files, eg:
  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

  // Cut optimisation
  if (Use["Cuts"])
    factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                         "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

  if (Use["CutsD"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsD", 
                         "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

  if (Use["CutsPCA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", 
                         "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

  if (Use["CutsGA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                         "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
   
  if (Use["CutsSA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                         "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
   
  // Likelihood
  if (Use["Likelihood"])
     factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                          "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

  // test the decorrelated likelihood
  if (Use["LikelihoodD"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", 
                         "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); 

  if (Use["LikelihoodPCA"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", 
                         "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 
 
  // test the new kernel density estimator
  if (Use["LikelihoodKDE"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", 
                         "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

  // test the mixed splines and kernel density estimator (depending on which variable)
  if (Use["LikelihoodMIX"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", 
                         "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

  // test the multi-dimensional probability density estimator
  // here are the options strings for the MinMax and RMS methods, respectively:
  //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
  //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                         "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

  if (Use["PDERSkNN"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", 
                         "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

  if (Use["PDERSD"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", 
                         "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

  if (Use["PDERSPCA"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", 
                         "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
  /*
  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
  if (Use["PDEFoam"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
                         "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );
  */
  // K-Nearest Neighbour classifier (KNN)
  if (Use["KNN"])
    factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                         "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
  // H-Matrix (chi2-squared) method
  if (Use["HMatrix"])
    factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); 

  // Fisher discriminant   
  if (Use["Fisher"])
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

  // Fisher with Gauss-transformed input variables
  if (Use["FisherG"])
    factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
  if (Use["BoostedFisher"])
    factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");
  /*
  // Linear discriminant (same as Fisher)
  if (Use["LD"])
    factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );
  */
  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
  if (Use["FDA_MC"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
   
  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  if (Use["FDA_MT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

  if (Use["FDA_GAMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

  if (Use["FDA_MCMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                         "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  if (Use["MLP"])
    factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" );

  if (Use["MLPBFGS"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" );


  // CF(Clermont-Ferrand)ANN
  if (Use["CFMlpANN"])
    factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  
  
  // Tmlp(Root)ANN
  if (Use["TMlpANN"])
    factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...
  
  // Support Vector Machine
  if (Use["SVM"])
    factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
   
  // Boosted Decision Trees
  if (Use["BDTG"]) // Gradient Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
                         "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

  if (Use["BDT"])  // Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                         "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
   
  if (Use["BDTB"]) // Bagging
    factory->BookMethod( TMVA::Types::kBDT, "BDTB", 
                         "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTD", 
                         "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
   
  // RuleFit -- TMVA implementation of Friedman's method
  if (Use["RuleFit"])
    factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                         "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
   
  // For an example of the category classifier, see: TMVAClassificationCategory

  // --------------------------------------------------------------------------------------------------

  // As an example how to use the ROOT plugin mechanism, book BDT via
  // plugin mechanism
  if (Use["Plugin"]) {
    //
    // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
    //
    // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
    // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
    // 
    // or by telling the global plugin manager directly
    gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
    factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                         "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
  }

  // --------------------------------------------------------------------------------------------------

  // ---- Now you can tell the factory to train, test, and evaluate the MVAs

  // Train MVAs using the set of training events
  factory->TrainAllMethods();

  // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();

  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();    

  // --------------------------------------------------------------
   
  // Save the output
  outputFile->Close();

  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;      

  delete factory;

  // Launch the GUI for the root macros
  if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 9
0
//------------------------------------------------------------------------------
// MVATrain
//------------------------------------------------------------------------------
void MVATrain(float metPfType1_cut, float mt2ll_cut, TString signal)
{
  TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate");


  // Factory
  //----------------------------------------------------------------------------
  TMVA::Factory* factory = new TMVA::Factory(signal, outputfile,    
					    // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");
						"!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification");


  // Get the trees
  //----------------------------------------------------------------------------
  _mctree.clear();

  AddProcess("signal"    , signal);//"01_Data_reduced_1outof6"); //signal
  AddProcess("background", "04_TTTo2L2Nu");
  
  /*AddProcess("background", "14_HZ");
  AddProcess("background", "10_HWW");
  AddProcess("background", "06_WW");
  AddProcess("background", "02_WZTo3LNu");
  AddProcess("background", "03_VZ");
  AddProcess("background", "11_Wg");
  AddProcess("background", "07_ZJets");
  AddProcess("background", "09_TTV");
  AddProcess("background", "05_ST");
  AddProcess("background", "00_Fakes_reduced_1outof6");*/


  Double_t weight = 1.0;

  factory->AddSignalTree(_signaltree, weight);

  for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight);

  factory->SetWeightExpression("eventW");


  // Add variables
  //----------------------------------------------------------------------------
  // Be careful with the order: it must be respected at the reading step
  // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F');

	factory->AddVariable("newdarkpt"       , "", "", 'F');
	//factory->AddVariable("topRecoW"     , "", "", 'F');
	//factory->AddVariable("lep1pt"       , "", "", 'F');
	//factory->AddVariable("lep1eta"      , "", "", 'F');
	//factory->AddVariable("lep1phi"      , "", "", 'F'); 
	//factory->AddVariable("lep1mass"     , "", "", 'F');
	//factory->AddVariable("lep2pt"       , "", "", 'F'); 
	//factory->AddVariable("lep2eta"      , "", "", 'F');
	//factory->AddVariable("lep2phi"      , "", "", 'F'); 
	//factory->AddVariable("lep2mass"     , "", "", 'F');
	//factory->AddVariable("jet1pt "      , "", "", 'F');
	//factory->AddVariable("jet1eta"      , "", "", 'F');
	//factory->AddVariable("jet1phi"      , "", "", 'F');
	//factory->AddVariable("jet1mass"     , "", "", 'F');
	//factory->AddVariable("jet2pt"       , "", "", 'F');
	//factory->AddVariable("jet2eta"      , "", "", 'F');
	//factory->AddVariable("jet2phi"      , "", "", 'F');
	//factory->AddVariable("jet2mass"     , "", "", 'F');
	factory->AddVariable("metPfType1"   , "", "", 'F');
	//factory->AddVariable("metPfType1Phi", "", "", 'F');
	//factory->AddVariable("m2l"          , "", "", 'F');
	factory->AddVariable("mt2ll"        , "", "", 'F');
	//factory->AddVariable("mt2lblb"      , "", "", 'F');
	//factory->AddVariable("mtw1"         , "", "", 'F');
	//factory->AddVariable("mtw2"         , "", "", 'F');
	//factory->AddVariable("ht"           , "", "", 'F');
	//factory->AddVariable("htjets"       , "", "", 'F');
	//factory->AddVariable("htnojets"     , "", "", 'F');
	//factory->AddVariable("njet"         , "", "", 'F');
	//factory->AddVariable("nbjet30csvv2l", "", "", 'F');
	//factory->AddVariable("nbjet30csvv2m", "", "", 'F');
	//factory->AddVariable("nbjet30csvv2t", "", "", 'F');
	//factory->AddVariable("dphijet1met"  , "", "", 'F');
	//factory->AddVariable("dphijet2met"  , "", "", 'F');
	//factory->AddVariable("dphijj"       , "", "", 'F');
	//factory->AddVariable("dphijjmet"    , "", "", 'F');
	//factory->AddVariable("dphill"       , "", "", 'F');
	//factory->AddVariable("dphilep1jet1" , "", "", 'F');
	//factory->AddVariable("dphilep1jet2" , "", "", 'F');
	//factory->AddVariable("dphilep2jet1" , "", "", 'F');
	//factory->AddVariable("dphilep2jet2" , "", "", 'F');
	//factory->AddVariable("dphilmet1"    , "", "", 'F');
	//factory->AddVariable("dphilmet2"    , "", "", 'F');
	factory->AddVariable("dphillmet"    , "", "", 'F');	
	//factory->AddVariable("sphericity"   , "", "", 'F');
	//factory->AddVariable("alignment"    , "", "", 'F');
	//factory->AddVariable("planarity"    , "", "", 'F');



  // Preselection cuts and preparation
  //----------------------------------------------------------------------------
  //factory->PrepareTrainingAndTestTree(Form("metPfType1>%5.2f&&mt2ll>%5.2f&&newdarkpt>0.", metPfType1_cut, mt2ll_cut), "NormMode=EqualNumEvents:nTrain_Signal=80:nTest_Signal=80:nTrain_Background=400:nTest_Background=400:!V");
  factory->PrepareTrainingAndTestTree("mt2ll>100.&&newdarkpt>0.&&metPfType1>80.", "NormMode=EqualNumEvents:nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:!V");

  // Book MVA
  //----------------------------------------------------------------------------

    factory->BookMethod(TMVA::Types::kMLP, "MLP01",
    	      	      "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=6,3:TestRate=1:LearningRate=0.005");

  //factory->BookMethod(TMVA::Types::kMLP, "MLP01",
  //	      	      "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=4,4:TestRate=3:LearningRate=0.005");  

  //factory->BookMethod(TMVA::Types::kMLP, "MLP02",
  //		      "H:!V:NeuronType=sigmoid:NCycles=40:VarTransform=Norm:HiddenLayers=20,10:TestRate=3:LearningRate=0.005"); 
  
  //factory->BookMethod(TMVA::Types::kMLP, "MLP03",
  //		      "H:!V:NeuronType=sigmoid:NCycles=30:VarTransform=Norm:HiddenLayers=20,20:TestRate=3:LearningRate=0.005");  


  //factory->BookMethod(TMVA::Types::kBDT, "BDT04", "NTrees=50:MaxDepth=2" );
  //factory->BookMethod(TMVA::Types::kBDT, "BDT05", "NTrees=50:MaxDepth=3" );



  // Train, test and evaluate MVA
  //----------------------------------------------------------------------------
  factory->TrainAllMethods();     // Train using the set of training events
  factory->TestAllMethods();      // Evaluate using the set of test events
  factory->EvaluateAllMethods();  // Evaluate and compare performance


  // Save the output
  //----------------------------------------------------------------------------
  outputfile->Close();

  delete factory;
}
Exemplo n.º 10
0
int main ()
{
  TFile * outputfile = TFile::Open ("outputTMVA.root","RECREATE");
  TMVA::Factory * TMVAtest = new TMVA::Factory ("TMVAtest", outputfile, "S") ;

  //PG get the signal and deliver it to the TMVA factory
  
  TChain signalTree ("sample") ;
  signalTree.Add ("data/sig_0.root") ;
  std::cout << "READ " << signalTree.GetEntries () << " signal events\n" ;
  TMVAtest->AddSignalTree (&signalTree, 1) ;  

  //PG get the bkg and deliver it to the TMVA factory
  
  TChain bkgTree ("sample") ;
  bkgTree.Add ("data/bkg_0.root") ;
  std::cout << "READ " << bkgTree.GetEntries () << " bkg events\n" ;
  TMVAtest->AddBackgroundTree (&bkgTree, 1) ;  

  //PG get the training and test samples and deliver them to the TMVA factory

  TChain signalTrainTree ("sample") ;
  signalTrainTree.Add ("data/sig_1.root") ;
  std::cout << "READ " << signalTrainTree.GetEntries () << " signal train events\n" ;
  
  TChain bkgTrainTree ("sample") ;
  bkgTrainTree.Add ("data/bkg_1.root") ;
  std::cout << "READ " << bkgTrainTree.GetEntries () << " bkg train events\n" ;
  
  TMVAtest->SetInputTrees (signalTrainTree.GetTree (), bkgTrainTree.GetTree (), 1., 1.) ;  

  //PG variables to be used for the selection
  //PG must be defined in the TTrees
  
  TMVAtest->AddVariable ("vars.x", 'F') ;
  TMVAtest->AddVariable ("vars.y" , 'F') ;

  int signalNumTrain = signalTrainTree.GetEntries () * 4 / 5 ;
  int bkgNumTrain = bkgTrainTree.GetEntries () * 4 / 5 ;
  int signalNumTest = signalTrainTree.GetEntries () - signalNumTrain ;
  int bkgNumTest = bkgTrainTree.GetEntries () - bkgNumTrain ;
  char trainOptions[120] ;
  sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d",
           signalNumTrain, bkgNumTrain,
           signalNumTest, bkgNumTest) ;
  sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d",
           0,0,0,0) ;
  std::cout << "TRAINING CONFIGURATION : " << trainOptions << "\n" ;
  TMVAtest->PrepareTrainingAndTestTree ("",trainOptions) ;
  
  //PG prepare the classifier
  
  //PG cut-based, default params
  TMVAtest->BookMethod (TMVA::Types::kCuts, "Cuts") ;
  
  TMVAtest->TrainAllMethods () ;
  TMVAtest->TestAllMethods () ;
  TMVAtest->EvaluateAllMethods () ;
 
  delete TMVAtest ;
  delete outputfile ;
}
Exemplo n.º 11
0
int main(int argc, char* argv[]){

	// Configurable parameters
	// int max_events;                 // Maximum number of events to process
	//string filelist;                // The file containing a list of files to use as input
	//string input_prefix;            // A prefix that will be added to the path of each input file
	string folder;
	string output_name;             // Name of the ouput ROOT File
	string output_folder;           // Folder to write the output in
	string paramfile;
	string paramfile2;
	string classname;
	bool twotag;
	bool onetag;

	po::options_description config("Configuration");
	po::variables_map vm;
	po::notify(vm);

	config.add_options()    
		("folder",              po::value<string>(&folder)->default_value("output/Paper_2012/"))
		//   ("input_prefix",        po::value<string>(&input_prefix)->default_value(""))
		("output_name",         po::value<string>(&output_name)->default_value("test_tmva.root"))
		("output_folder",       po::value<string>(&output_folder)->default_value(""))
		("paramfile",						po::value<string>(&paramfile)->default_value("./scripts/Paper_params_2012.dat"))
		("paramfile2", 					po::value<string>(&paramfile2)->default_value("./scripts/TMVAinputshad.dat"))
		("classname",						po::value<string>(&classname)->default_value("HhhMVA"))
		("twotag",								po::value<bool>(&twotag)->default_value(true))
		("onetag",              po::value<bool>(&onetag)->default_value(false))
		;
	po::store(po::command_line_parser(argc, argv).
			options(config).allow_unregistered().run(), vm);
	po::notify(vm);


	std::cout << "-------------------------------------" << std::endl;
	std::cout << "Train MVA" << std::endl;
	std::cout << "-------------------------------------" << std::endl;      string param_fmt = "%-25s %-40s\n";
	std::vector<string> bckglist;
	bckglist.push_back("TTJetsFullLept");
	bckglist.push_back("TTJetsSemiLept");
	bckglist.push_back("TTJetsHadronicExt");
//	bckglist.push_back("WWJetsTo2L2Nu");
//	bckglist.push_back("WZJetsTo2L2Q");
//	bckglist.push_back("WZJetsTo3LNu");
//	bckglist.push_back("ZZJetsTo2L2Nu");
//	bckglist.push_back("ZZJetsTo2L2Q");
//	bckglist.push_back("ZZJetsTo4L");
//	bckglist.push_back("DYJetsToTauTauSoup");
//	bckglist.push_back("DYJetsToLLSoup");
//	bckglist.push_back("DYJetsToTauTau");
//	bckglist.push_back("DYJetsToLL");
//	bckglist.push_back("T-tW");
//	bckglist.push_back("Tbar-tW");

	std::vector<string> signallist;
	signallist.push_back("GluGluToHTohhTo2Tau2B_mH-300");

	sample_names_.reserve(bckglist.size()+signallist.size());
	sample_names_.insert(sample_names_.end(),bckglist.begin(),bckglist.end());
	sample_names_.insert(sample_names_.end(),signallist.begin(),signallist.end());



	std::vector<TFile*> BackgroundSamples;
	for(unsigned int iter=0;iter<bckglist.size();++iter){
		BackgroundSamples.push_back(TFile::Open((folder+bckglist.at(iter)+"_mt_2012.root").c_str()));
	}

	std::vector<TFile*> SignalSamples;
	for(unsigned int sigIter=0;sigIter<signallist.size();++sigIter){
		SignalSamples.push_back(TFile::Open((folder+signallist.at(sigIter)+"_mt_2012.root").c_str()));
	}

	std::vector<TTree*> backgroundTrees;
	for(unsigned int iter2=0;iter2<BackgroundSamples.size();++iter2){
		backgroundTrees.push_back(dynamic_cast<TTree*>(BackgroundSamples.at(iter2)->Get("ntuple")));
	}

	std::vector<TTree*> signalTrees;
	for(unsigned int sigIter2=0;sigIter2<SignalSamples.size();++sigIter2){
		signalTrees.push_back(dynamic_cast<TTree*>(SignalSamples.at(sigIter2)->Get("ntuple")));
	}

	TFile *outfile = new TFile((output_folder+output_name).c_str(),"RECREATE");

	TMVA::Factory *factory = new TMVA::Factory(classname,outfile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


	std::vector<std::string> vars;
	std::ifstream parafile(paramfile2.c_str());
	std::cout<<paramfile2.c_str()<<std::endl;
	string line;
	while(getline(parafile,line)){
		vars.push_back(line);
	}
	parafile.close();

	std::cout<<(vars.at(0)).c_str()<<std::endl;

	std::vector<float> var2;
	for(unsigned int variter=0;variter<vars.size();++variter){
		var2.push_back(::atof((vars.at(variter)).c_str()));
	}


	for(unsigned int variter=0;variter<vars.size();++variter){
		factory->AddVariable((vars.at(variter)).c_str(),(vars.at(variter)).c_str(),"",'F');
	}

	factory->AddSpectator("mt_1","mt_1","",'F');
	factory->AddSpectator("n_prebjets","n_prebjets","",'I');
	factory->AddSpectator("prebjetbcsv_1","prebjetbcsv_1","",'F');
	factory->AddSpectator("prebjetbcsv_2","prebjetbcsv_2","",'F');

	double weightval_=0;

 ParseParamFile(paramfile);	

	for(unsigned int bckgit=0;bckgit<backgroundTrees.size();++bckgit){
		auto it = sample_info_.find(bckglist.at(bckgit).c_str());
		if(it!=sample_info_.end()){
			double evt = it->second.first;
			double xs = it->second.second;
			weightval_=(double) xs/evt;
			std::cout<<weightval_<<std::endl;
		}
		factory->AddBackgroundTree(backgroundTrees.at(bckgit),weightval_);
	}
	for(unsigned int sgit=0;sgit<signalTrees.size();++sgit){
		auto it = sample_info_.find(signallist.at(sgit).c_str());
		if(it!=sample_info_.end()){
			double evt = it->second.first;
			double xs=it->second.second;
			weightval_=(Double_t) xs/evt;
		}
		std::cout<<weightval_<<std::endl;
		factory->AddSignalTree(signalTrees.at(sgit),weightval_);
	}
	factory->SetBackgroundWeightExpression("wt");
	factory->SetSignalWeightExpression("wt");
	TCut mycutb, mycuts;
	if(twotag){
	mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679";
	mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679";
	}
	else if(onetag){
	mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679";
	mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679";
	}
	else{
	mycutb="n_prebjets>1&&mt_1<30";
	mycuts="n_prebjets>1&&mt_1<30";
	}
//TCut mycutb="";
//TCut mycuts="";
	factory->PrepareTrainingAndTestTree( mycuts, mycutb,"SplitMode=Random:!V");

	factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

	factory->TrainAllMethods();
	factory->TestAllMethods();
	factory->EvaluateAllMethods();

	outfile->Close();
	delete factory;

	return 0;
}
Exemplo n.º 12
0
void TMVATraining_ch4( )
{

  TFile* outputFile = TFile::Open( "TMVA_ch4.root", "RECREATE" );
  TMVA::Factory *factory = new TMVA::Factory( "MVAnalysis", outputFile,"!V");
  TFile *signal = TFile::Open("../production/BGx0/Prod2_iptubeK0/B0_etapr-eta-3pi2pi_KS-pi+pi-_output_signal_iptubeK0.root");
  TFile *background = TFile::Open("../production/BGx0/Prod2_iptubeK0/B0_etapr-eta-3pi2pi_KS-pi+pi-_output_ccbar_iptubeK0.root");
  factory->AddSignalTree ( (TTree*)signal->Get("B0"), 1.0 );
  factory->AddBackgroundTree ( (TTree*)background->Get("B0"), 1.0 );
  sigCut = TCut("B0__isContinuumEvent==0");
  bgCut = TCut("B0__isContinuumEvent==1");

  factory->AddVariable("B0_ThrustB",'F');
  factory->AddVariable("B0_ThrustO",'F');
  factory->AddVariable("B0_CosTBTO",'F');
  factory->AddVariable("B0_CosTBz",'F');
  factory->AddVariable("B0_R2",'F');
  factory->AddVariable("B0_cc1",'F');
  factory->AddVariable("B0_cc2",'F');
  factory->AddVariable("B0_cc3",'F');
  factory->AddVariable("B0_cc4",'F');
  factory->AddVariable("B0_cc5",'F');
  factory->AddVariable("B0_cc6",'F');
  factory->AddVariable("B0_cc7",'F');
  factory->AddVariable("B0_cc8",'F');
  factory->AddVariable("B0_cc9",'F');
  factory->AddVariable("B0_mm2",'F');
  factory->AddVariable("B0_et",'F');
  factory->AddVariable("B0_hso00",'F');
  // factory->AddVariable("B0_hso01",'F');
  factory->AddVariable("B0_hso02",'F');
  //factory->AddVariable("B0_hso03",'F');
  factory->AddVariable("B0_hso04",'F');
  factory->AddVariable("B0_hso10",'F');
  factory->AddVariable("B0_hso12",'F');
  factory->AddVariable("B0_hso14",'F');
  factory->AddVariable("B0_hso20",'F');
  factory->AddVariable("B0_hso22",'F');
  factory->AddVariable("B0_hso24",'F');
  factory->AddVariable("B0_hoo0",'F');
  factory->AddVariable("B0_hoo1",'F');
  factory->AddVariable("B0_hoo2",'F');
  factory->AddVariable("B0_hoo3",'F');
  factory->AddVariable("B0_hoo4",'F');

  factory->PrepareTrainingAndTestTree(sigCut, bgCut, "!V:nTrain_Signal=10000:nTest_Signal=10000:nTrain_Background=10000:nTest_Background=10000:SplitMode=Random:NormMode=NumEvents" );

  //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA");
  //factory->BookMethod( TMVA::Types::kMLP, "MLP", "!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" );
  factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" );
  factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
  factory->BookMethod( TMVA::Types::kSVM, "SVM", "!H:!V:Gamma=0.25:Tol=0.001:VarTransform=Norm" );

  //factory->BookMethod( TMVA::Types::kBDT, "FastBDT", "!H:!V:CreateMVAPdfs:NbinsMVAPdf=40:NTrees=100:Shrinkage=0.10"); //:RandRatio=0.5:NCutLevel=8:NTreeLayers=3");

  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();
  outputFile->Close();
  delete factory;

  // Launch the GUI for the root macros
  if (!gROOT->IsBatch()) TMVA::TMVAGui( "TMVA_ch4.root" );
}
Exemplo n.º 13
0
void TMVAClassificationCategory() 
{
   //---------------------------------------------------------------

   std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

   bool batchMode(false);

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
   if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "var1", 'F' );
   factory->AddVariable( "var2", 'F' );
   factory->AddVariable( "var3", 'F' );
   factory->AddVariable( "var4", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "eta" );

   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname( "" );
   if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root";
   else                 fname = "../execs/data/toy_sigbkg_categ_varoff.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find tmva_example.root in the local directory
      std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   } 

   if (!input) {
      std::cout << "ERROR: could not open data file: " << fname << std::endl;
      exit(1);
   }

   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");

   /// global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   /// you can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // Fisher discriminant   
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

   // Likelihood
   factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                        "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

   // Categorised classifier
   TMVA::MethodCategory* mcat = 0;
   
   // the variable sets
   TString theCat1Vars = "var1:var2:var3:var4";
   TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

   // the Fisher 
   TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher");

   // the Likelihood
   TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassificationCategory is done!" << std::endl;      

   // Clean up
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 14
0
int main(int argc, char * argv[])
{
    //Processing input options
    int c;
    std::string outFname;
    outFname = std::string("QualityNaF.root");

    // Open  input files, get the trees
    TChain *mc = InputFileReader("FileListNtuples_ext.txt","parametri_geo");
    // Preparing options for the TMVA::Factory
    std::string options( 
        "!V:" 
        "!Silent:"
        "Color:"
        "DrawProgressBar:"
        "Transformations=I;D;P;G,D:"
        "AnalysisType=Classification"
    );

    //Creating the factory
    TFile *   ldFile = new TFile(outFname.c_str(),"RECREATE");
    TMVA::Factory * factory = new TMVA::Factory("QualityNaF", ldFile, options.c_str());

    //Preparing variables 
    //general
    /*factory->AddVariable("Chisquare", 'F');
    factory->AddVariable("Layernonusati", 'I');
    factory->AddVariable("NTofUsed", 'I');
    factory->AddVariable("diffR", 'F');
    factory->AddVariable("TOF_Up_Down", 'F');*/
    //Tof	
    //factory->AddVariable("TOFchisq_s", 'F');
    //factory->AddVariable("TOFchisq_t", 'F');

    //RICH	
    factory->AddVariable("Richtotused", 'F');	
    factory->AddVariable("RichPhEl", 'F');
    factory->AddVariable("RICHprob", 'F');
    factory->AddVariable("RICHcollovertotal");
    factory->AddVariable("RICHLipBetaConsistency");  
    factory->AddVariable("RICHTOFBetaConsistency");  
    factory->AddVariable("RICHChargeConsistency");
    
    factory->AddVariable("RICHPmts");
    factory->AddVariable("RICHgetExpected");		
    factory->AddVariable("tot_hyp_p_uncorr");
    factory->AddVariable("Bad_ClusteringRICH");
    factory->AddVariable("NSecondariesRICHrich");

    //factory->AddVariable("HitHValldir"); 
    //factory->AddVariable("HitHVallrefl");  	
    
    //factory->AddVariable("HVBranchCheck:= (HitHValldir - HitHVoutdir) - (HitHVallrefl - HitHVoutrefl)");    

    factory->AddVariable("HitHVoutdir"); 
    factory->AddVariable("HitHVoutrefl");

    //Spectator Variables
    factory->AddSpectator("R", 'F');
    factory->AddSpectator("BetaRICH_new", 'F');	

    //Preselection cuts
    std::string PreSelection    = "qL1>0&&(joinCutmask&187)==187&&qL1<1.75&&R>0";
    std::string ChargeCut 	= "qUtof>0.8&&qUtof<1.3&&qLtof>0.8&&qLtof<1.3";
    std::string VelocityCut 	= /*"Beta<0.8";*/"((joinCutmask>>11))==1024&&BetaRICH_new>0&&BetaRICH_new<0.975";
    std::string signalCut 	= /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>1&&GenMass<2";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>0.5&&(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5<1.5";	
    std::string bkgndCut 	= /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>0&&GenMass<1";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>3";		 

    factory->AddTree(mc,"Signal"    ,1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ signalCut).c_str());
    factory->AddTree(mc,"Background",1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ bkgndCut).c_str());

    // Preparing
    std::string preselection = "";
    std::string inputparams(
        "SplitMode=Random:"
        "NormMode=NumEvents:"
        "!V"
    );
    factory->PrepareTrainingAndTestTree(preselection.c_str(),inputparams.c_str());

    // Training
    std::string trainparams ="!H:!V:MaxDepth=3";
    factory->BookMethod(TMVA::Types::kBDT, "BDT", trainparams.c_str());

    trainparams ="!H:!V";
    factory->BookMethod(TMVA::Types::kLikelihood, "Likelihood", trainparams.c_str());

    trainparams ="!H:!V:VarTransform=Decorrelate";
    //factory->BookMethod(TMVA::Types::kLikelihood, "LikelihoodD", trainparams.c_str());

    trainparams ="!H:!V";
    //factory->BookMethod(TMVA::Types::kCuts, "Cuts", trainparams.c_str());



    factory->TrainAllMethods();
    factory->TestAllMethods();
    factory->EvaluateAllMethods();
}
Exemplo n.º 15
0
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") {

    TMVA::Tools::Instance();

    TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile,
            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
    factory->AddVariable( "pt1", "pT-tag1", "GeV/c"         , 'F'  );
    factory->AddVariable( "pt2", "pT-tag2", "GeV/c"         , 'F'  );
    factory->AddVariable( "Deta","|y-tag1 - y-tag2|",""     , 'F'  );
    //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2",""             , 'F'  );
    //factory->AddVariable( "Dphi", "#Delta#phi" ,""             , 'F'  );
    factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}"  , 'F'  );

    factory->AddSpectator( "eta1",  "#eta_{tag1}" , 'F' );
    factory->AddSpectator( "eta2",  "#eta_{tag2}" , 'F' );

    factory->SetWeightExpression( "sampleWeight" );

    TString fSignalName              = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameDYJets    = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameWJets     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameQCD       = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root";
    TString fBackgroundNameTTbar     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root";


    TFile *fSignal(0);
    TFile *fBackgroundDYJets(0);
    TFile *fBackgroundWJets(0);
    TFile *fBackgroundQCD(0);
    TFile *fBackgroundTTbar(0);

    fSignal           = TFile::Open( fSignalName );
    fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets );
    fBackgroundWJets  = TFile::Open( fBackgroundNameWJets );
    fBackgroundQCD    = TFile::Open( fBackgroundNameQCD );
    fBackgroundTTbar  = TFile::Open( fBackgroundNameTTbar );

    if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) {
        std::cout << "ERROR: could not open files" << std::endl;
        exit(1);
    }

    TString tree = "outTree"+ordering_+"Ord";

    TCut mycuts = "";
    TCut mycutb = "";

    TCut cutA  = "pt1>0 && tightestHPSWP>0";
    TCut cutB  = "pt1>0 && combRelIsoLeg1<0.1";
    TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3";
    TCut cutC  = "pt1>0 && diTauCharge==0";
    TCut cutD  = "pt1>0 && MtLeg1<40";

    // select events for training
    TFile* dummy = new TFile("dummy.root","RECREATE");
    TH1F* allEvents = new TH1F("allEvents","",1,-10,10);
    float totalEvents, cutEvents;

    // signal: all
    TTree *signal           = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> "  << signal->GetEntries()  << endl;
    allEvents->Reset();
    signal->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t signalWeight =   1.0;
    cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl;

    // Z+jets: all
    TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> "  << backgroundDYJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundDYJets->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t backgroundDYJetsWeight = 1.0;
    cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl;

    // W+jets: iso+Mt
    TTree *backgroundWJets  = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> "  << backgroundWJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundWJetsWeight  =  cutEvents / totalEvents;
    cout << "WJets: expected yield " << cutEvents  << " -- weight " << backgroundWJetsWeight << endl;

    // QCD: Mt+loose iso
    TTree *backgroundQCD    = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl);
    cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> "  << backgroundQCD->GetEntries()  << endl;
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundQCDWeight  =  cutEvents / totalEvents;
    cout << "QCD: expected yield " << cutEvents  << " -- weight "  << backgroundQCDWeight << endl;


    // TTbar: iso+Mt
    TTree *backgroundTTbar  = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> "  << backgroundTTbar->GetEntries()  << endl;
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundTTbarWeight  =  cutEvents / totalEvents;
    cout << "TTbar: expected yield "  << cutEvents  << " -- weight " << backgroundTTbarWeight << endl;


    delete allEvents;


    factory->AddSignalTree    ( signal,           signalWeight           );
    //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight );
    //factory->AddBackgroundTree( backgroundWJets,  backgroundWJetsWeight  );
    factory->AddBackgroundTree( backgroundQCD,    backgroundQCDWeight    );
    //factory->AddBackgroundTree( backgroundTTbar,  backgroundTTbarWeight  );


    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" );

    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                         "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" );

    /*
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
    	       "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
    */

    factory->TrainAllMethods();

    factory->TestAllMethods();

    factory->EvaluateAllMethods();

    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassification is done!" << std::endl;

    delete factory;

    //if (!gROOT->IsBatch()) TMVAGui( outfileName );

}
Exemplo n.º 16
0
void myTMVA( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 1;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "myTMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:!Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory->AddVariable( "Photon_Pt1", 'F' );
    factory->AddVariable( "Photon_Pt2", 'F' );
    factory->AddVariable( "Lepton_Pt", 'F' );

    factory->AddVariable( "Jet_Pt", 'F' );
    factory->AddVariable( "BJet_Pt", 'F' );
//    factory->AddVariable( "JetInfo_Pt1", 'F' );
//    factory->AddVariable( "JetInfo_Pt2", 'F' );
//    factory->AddVariable( "LepInfo_Pt1", 'F' );
//    factory->AddVariable( "LepInfo_Pt2", 'F' );
//    factory->AddVariable( "LepInfo_Pt3", 'F' );
//    factory->AddVariable( "LepInfo_ID1", 'I' );
//    factory->AddVariable( "LepInfo_ID2", 'I' );
//   factory->AddVariable( "LepInfo_ID3", 'I' );
//    factory->AddVariable( "LepInfo_Type1", 'I' );
//    factory->AddVariable( "LepInfo_Type2", 'I' );
//    factory->AddVariable( "LepInfo_Type3", 'I' );
//    factory->AddVariable( "ZInfo_Pt", 'F' );
   factory->AddVariable( "Higgs_M", 'F' );
//    factory->AddVariable( "ZInfo_Pt", 'F' );
   factory->AddVariable( "MET", 'F' );
//    factory->AddVariable( "WInfo_Pt", 'F' );
//   factory->AddVariable( "Wln_Mt", 'F' );
//    factory->AddVariable( "Top_Pt1", 'F' );
//    factory->AddVariable( "Top_Pt2", 'F' );
    factory->AddVariable( "Top_M1", 'F' );
    factory->AddVariable( "Top_M2", 'F' );
    factory->AddVariable( "Top_dPhi", 'F' );
   // factory->AddVariable( "HT", 'F' );
   // factory->AddVariable( "HT_J", 'F' );
   factory->AddVariable( "HT_S", 'F' );
   // factory->AddVariable( "HT_SL", 'F' );
   factory->AddVariable( "HT_O", 'F' );

//  factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
//  factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   //factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   // factory->AddSpectator( "TopInfo_M1",  "M(cZ)", "units", 'F' );
   // factory->AddSpectator( "TopInfo_M2",  "M(bW)", "units", 'F' );
//    factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
//    factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   TFile *sigFile = TFile::Open( "hist_top_ch-a.root" );
   TFile *bkgFile1 = TFile::Open( "hist_top_ch-a.root" );
   // TFile *bkgFile1 = TFile::Open( "hist_tth.root" );
   // TFile *bkgFile2 = TFile::Open( "hist_ttgg.root" );
   //TFile *bkgFile3 = TFile::Open( "hist_ttgj.root" );
   // TFile *bkgFile4 = TFile::Open( "hist_ww_1.root" );
   // TFile *bkgFile5 = TFile::Open( "hist_wz_1.root" );
   // TFile *bkgFile6 = TFile::Open( "hist_zz_1.root" );
//    TFile *sig = TFile::Open( "hist_top_cz1.root" );
//    TFile *sig = TFile::Open( "hist_top_cz1.root" );
   
   std::cout << "--- TMVAClassification       : Using input file: "
	     << sigFile->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal     = (TTree*)sigFile->Get("mvaTree");
   TTree *background1 = (TTree*)bkgFile1->Get("mvaTree");
   // TTree *background1 = (TTree*)bkgFile1->Get("mvaTree");
   // TTree *background2 = (TTree*)bkgFile2->Get("mvaTree");
   //TTree *background3 = (TTree*)bkgFile3->Get("mvaTree");
   // TTree *background4 = (TTree*)bkgFile4->Get("mvaTree");
   // TTree *background5 = (TTree*)bkgFile5->Get("mvaTree");
   // TTree *background6 = (TTree*)bkgFile6->Get("mvaTree");
   //TTree *background = (TTree*)sigFile->Get("mvaTree");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   //   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddSignalTree    ( signal,     234*0.01*0.00228*2*19970/284065  );
   factory->AddBackgroundTree( background1, 234*0.01*0.00228*2*19970/284065 );
   // factory->AddBackgroundTree( background1, 0.1277*19970/100048 );
   // factory->AddBackgroundTree( background2, 0.146*19970/122040 );
   //factory->AddBackgroundTree( background3, 2.166*19970/1719954 );
   // factory->AddBackgroundTree( background4, 43.*1000/4223963 );
   // factory->AddBackgroundTree( background5, 18.*1000/4263113 );
   // factory->AddBackgroundTree( background6, 5.9*1000/2108054 );
   //   factory->AddBackgroundTree( background, backgroundWeight );
   
   //factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   //TCut mycuts = "TopInfo_MCTruth>2"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   //    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   //TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
    //    TCut mycutb = "TopInfo_MCTruth<3"; // for example: TCut mycutb = "abs(var1)<0.5";
    TCut mycuts = "Photon_Pt1>20 && Photon_Pt2>20 && Jet_Pt>30 && BJet_Pt>30 && Lepton_Pt>30 && Top_MCTruth==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = "Photon_Pt1>20 && Photon_Pt2>20 && Jet_Pt>30 && BJet_Pt>30 && Lepton_Pt>30 && Top_MCTruth==0"; // for example: TCut mycutb = "abs(var1)<0.5";
    // TCut mycutb = "Photon_Pt1>20 && Photon_Pt2>20 && Jet_Pt>30 && BJet_Pt>30 && Lepton_Pt>30"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=2000:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 17
0
void TMVATrainer(){
   // This loads the library
   TMVA::Tools::Instance();

   // --- Here the preparation phase begins
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName = "TMVATrainingResults_fat_BBvsGSP.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVATrainer", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   factory->AddVariable("TagVarCSV_vertexCategory","TagVarCSV_vertexCategory","units",'F');
   factory->AddVariable("TagVarCSV_jetNTracks","TagVarCSV_jetNTracks","units",'F');
   //factory->AddVariable("TagVarCSV_trackSip2dSig_0","TagVarCSV_trackSip2dSig_0","units",'F');
   //factory->AddVariable("TagVarCSV_trackSip2dSig_1","TagVarCSV_trackSip2dSig_1","units",'F');
   //factory->AddVariable("TagVarCSV_trackSip2dSig_2","TagVarCSV_trackSip2dSig_2","units",'F');
   //factory->AddVariable("TagVarCSV_trackSip2dSig_3","TagVarCSV_trackSip2dSig_3","units",'F');
   factory->AddVariable("TagVarCSV_trackSip3dSig_0","TagVarCSV_trackSip3dSig_0","units",'F');
   factory->AddVariable("TagVarCSV_trackSip3dSig_1","TagVarCSV_trackSip3dSig_1","units",'F');
   factory->AddVariable("TagVarCSV_trackSip3dSig_2","TagVarCSV_trackSip3dSig_2","units",'F');
   factory->AddVariable("TagVarCSV_trackSip3dSig_3","TagVarCSV_trackSip3dSig_3","units",'F');
   //factory->AddVariable("TagVarCSV_trackPtRel_0","TagVarCSV_trackPtRel_0","units",'F');
   //factory->AddVariable("TagVarCSV_trackPtRel_1","TagVarCSV_trackPtRel_1","units",'F');
   //factory->AddVariable("TagVarCSV_trackPtRel_2","TagVarCSV_trackPtRel_2","units",'F');
   //factory->AddVariable("TagVarCSV_trackPtRel_3","TagVarCSV_trackPtRel_3","units",'F');
   factory->AddVariable("TagVarCSV_trackSip2dSigAboveCharm","TagVarCSV_trackSip2dSigAboveCharm","units",'F');
   //factory->AddVariable("TagVarCSV_trackSip3dSigAboveCharm","TagVarCSV_trackSip3dSigAboveCharm","units",'F');
   //factory->AddVariable("TagVarCSV_trackSumJetEtRatio","TagVarCSV_trackSumJetEtRatio","units",'F');
   //factory->AddVariable("TagVarCSV_trackSumJetDeltaR","TagVarCSV_trackSumJetDeltaR","units",'F');
   factory->AddVariable("TagVarCSV_jetNTracksEtaRel","TagVarCSV_jetNTracksEtaRel","units",'F');
   factory->AddVariable("TagVarCSV_trackEtaRel_0","TagVarCSV_trackEtaRel_0","units",'F');
   factory->AddVariable("TagVarCSV_trackEtaRel_1","TagVarCSV_trackEtaRel_1","units",'F');
   factory->AddVariable("TagVarCSV_trackEtaRel_2","TagVarCSV_trackEtaRel_2","units",'F');
   factory->AddVariable("TagVarCSV_jetNSecondaryVertices","TagVarCSV_jetNSecondaryVertices","units",'F');
   factory->AddVariable("TagVarCSV_vertexMass","TagVarCSV_vertexMass","units",'F');
   factory->AddVariable("TagVarCSV_vertexNTracks","TagVarCSV_vertexNTracks","units",'F');
   factory->AddVariable("TagVarCSV_vertexEnergyRatio","TagVarCSV_vertexEnergyRatio","units",'F');
   factory->AddVariable("TagVarCSV_vertexJetDeltaR","TagVarCSV_vertexJetDeltaR","units",'F');
   factory->AddVariable("TagVarCSV_flightDistance2dSig","TagVarCSV_flightDistance2dSig","units",'F');
   //factory->AddVariable("TagVarCSV_flightDistance3dSig","TagVarCSV_flightDistance3dSig","units",'F');
   
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator("Jet_pt","Jet_pt","units",'F');
   factory->AddSpectator("Jet_eta","Jet_eta","units",'F');
   factory->AddSpectator("Jet_phi","Jet_phi","units",'F');
   factory->AddSpectator("Jet_mass","Jet_mass","units",'F');
   factory->AddSpectator("Jet_massGroomed","Jet_massGroomed","units",'F');
   factory->AddSpectator("Jet_flavour","Jet_flavour","units",'F');
   factory->AddSpectator("Jet_nbHadrons","Jet_nbHadrons","units",'F');
   factory->AddSpectator("Jet_JP","Jet_JP","units",'F');
   factory->AddSpectator("Jet_JBP","Jet_JBP","units",'F');
   factory->AddSpectator("Jet_CSV","Jet_CSV","units",'F');
   factory->AddSpectator("Jet_CSVIVF","Jet_CSVIVF","units",'F');
   factory->AddSpectator("Jet_tau1","Jet_tau1","units",'F');
   factory->AddSpectator("Jet_tau2","Jet_tau2","units",'F');

   factory->AddSpectator("SubJet1_CSVIVF","SubJet1_CSVIVF","units",'F');
   factory->AddSpectator("SubJet2_CSVIVF","SubJet2_CSVIVF","units",'F');

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   TString fnameSig = "RadionToHH_4b_M-800_TuneZ2star_8TeV-Madgraph_pythia6_JetTaggingVariables_training.root";
   TString fnameBkg = "QCD_Pt-300to470_TuneZ2star_8TeV_pythia6_JetTaggingVariables_training.root";
   TFile *inputSig = TFile::Open( fnameSig );
   TFile *inputBkg = TFile::Open( fnameBkg );
   
   std::cout << "--- TMVAClassification       : Using input files: " << inputSig->GetName() << std::endl
                                                                     << inputBkg->GetName() << std::endl;
   
   // --- Register the training and test trees
   TTree *sigTree = (TTree*)inputSig->Get("tagVars/ttree");
   TTree *bkgTree = (TTree*)inputBkg->Get("tagVars/ttree");
   
   // // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // factory->SetInputTrees( tree,signalCut,backgroundCut );
   factory->AddSignalTree    ( sigTree, signalWeight     );
   factory->AddBackgroundTree( bkgTree, backgroundWeight );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut signalCut = "Jet_massGroomed>80 && Jet_massGroomed<150";
   TCut backgroundCut = "abs(Jet_flavour)==5 && Jet_nbHadrons>1 && Jet_massGroomed>80 && Jet_massGroomed<150";

   // Tell the factory how to use the training and testing events
   factory->PrepareTrainingAndTestTree( signalCut, backgroundCut,
                                        "nTrain_Signal=22000:nTest_Signal=20000:nTrain_Background=22000:nTest_Background=2730:SplitMode=Random:!V" );

   // Gradient Boost
   factory->BookMethod( TMVA::Types::kBDT, "BDTG_T1000D3_fat_BBvsGSP",
                          "!H:!V:NTrees=1000:MaxDepth=3:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   //factory->BookMethod( TMVA::Types::kBDT, "BDTG_T1000D5_fat_BBvsGSP",
   //                       "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20" );

//    // Adaptive Boost
//    factory->BookMethod( TMVA::Types::kBDT, "BDT",
//                            "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
//    // Bagging
//    factory->BookMethod( TMVA::Types::kBDT, "BDTB",
//                            "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
//    // Decorrelation + Adaptive Boost
//    factory->BookMethod( TMVA::Types::kBDT, "BDTD",
//                            "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );


}
Exemplo n.º 18
0
void TMVARegression( int optimIndex, int Cat=0, TString myMethodList = "" ) 
{

   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 0; 
   Use["KNN"]             = 0;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 0;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 0; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( Form("TMVAoutput/TMVAReg_%i.root",optimIndex) );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( Form("TMVARegression_%i_Cat%i",optimIndex,Cat), outputFile, 
                                               "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   factory->AddVariable( "jet_eta", "jet_eta", "units", 'F' );
   factory->AddVariable( "jet_emfrac", "jet_emfrac", "units", 'F' );
   factory->AddVariable( "jet_hadfrac", "jet_hadfrac", "units", 'F' );
   factory->AddVariable( "jet_nconstituents", "jet_nconst", "units", 'F' );
   factory->AddVariable( "jet_vtx3dL", "jet_vtx3dL", "units", 'F' );
   factory->AddVariable( "MET", "MET", "units", 'F' );
   factory->AddVariable( "jet_dPhiMETJet", "jet_dPhiMETJet", "units", 'F' );
   
   //factory->AddVariable( "hJet_vtxPt", "hJet_vtxPt", "units", 'F' );
   //factory->AddVariable( "hJet_JECUnc", "hJet_JECUnc", "units", 'F' );
   //factory->AddVariable( "hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptPtCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptPt) : (-99)", "hJet_SoftLeptPt", "units", 'F' );
   //factory->AddVariable( "hJet_En", "hJet_En", "units", 'F' );
   //factory->AddVariable( "hJet_Et", "hJet_Et", "units", 'F' );
   //factory->AddVariable( "hJet_Mt", "hJet_Mt", "units", 'F' );
   //factory->AddVariable( "hJet_nch", "hJet_nch", "units", 'F' );
   //factory->AddVariable( "hJet_vtx3deL", "hJet_vtx3deL", "units", 'F' );
   //factory->AddVariable( "hJet_vtxMass", "hJet_vtxMass", "units", 'F' );
   //factory->AddVariable( "hJet_ptRaw", "hJet_ptRaw", "units", 'F' );
   //factory->AddVariable( "hJet_EnRaw", "hJet_EnRaw", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptptRelCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptptRel) : (-99)", "hJet_SoftLeptptRel", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptdRCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptdR) : (-99)", "hJet_SoftLeptdR", "units", 'F' );
   //factory->AddVariable( "rho25", "rho25", "units", 'F' );
   //factory->AddVariable( "dPhiMETJet", "dPhiMETJet", "units", 'F' );


   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables

   // Add the variable carrying the regression target
   //factory->AddTarget( "jet_genPt" ); 
   factory->AddTarget( "jet_genJetPt/jet_pt" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
/*   TFile *input(0);
   TString fname = "./tmva_reg_example.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("TreeR");
*/

   TChain chainTraining("Events");
   chainTraining.Add("TrainingFiles/training.root");
   TTree *regTreeTraining = (TTree*) chainTraining;

   TChain chainTesting("Events");
   chainTesting.Add("TrainingFiles/testing.root");
   TTree *regTreeTesting = (TTree*) chainTesting;

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddTree( regTreeTraining, "Regression", regWeight, "", "training" );
   factory->AddTree( regTreeTesting, "Regression", regWeight, "", "test" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = "hJet_pt[0]>20. && hJet_pt[1]>20. && fabs(hJet_eta[0])<2.5 && fabs(hJet_eta[1])<2.5 && hJet_csv[0]>0. && hJet_csv[1]>0. && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500. && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_puJetIdL[0]>0.0 && hJet_puJetIdL[1]>0.0";
  TCut testingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";
  TCut mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) < 110";

  if(Cat==1) mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) > 110";
  TCut jetPtCut="jet_pt>90";
  if(Cat==1) jetPtCut="jet_pt>90";
  //TCut trainingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5 && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_csv[0]>0.0 && hJet_csv[1]>0.0 && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500.";
  TCut trainingCut = "jet_pt>20. && abs(jet_eta)<2.5 && jet_genJetPt>0. && jet_dRJetGenJet < 0.4 && (jet_partonID)==5";

// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=600000:nTest_Regression=600000:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=337500:nTest_Regression=337500:SplitMode=Random:NormMode=NumEvents:!V");
   //   factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=158393:nTest_Regression=158393:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=14197:nTest_Regression=14197:SplitMode=Random:NormMode=NumEvents:!V");
   factory->PrepareTrainingAndTestTree(trainingCut+jetPtCut, "!V");

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
//100, 5
   // Boosted Decision Tree   //100,5 nCuts=-1
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:nEventsMin=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=-1:PruneMethod=CostComplexity:PruneStrength=30" );

   const bool doScan1=0;

   int ntreesArray[8] = {100,200,300,400,500,600,700,800};
   float shrinkageArray[3] = {0.1,0.2,0.3};
   float gradbaggingfracArray[3] = {0.7,0.8,0.9};
   int maxdepthArray[3] = {2,3,4};
   int nnodesmaxArray[3] = {5,10,15};
   if(!doScan1) shrinkageArray[2]=1.0;
   int nnodesmaxIndex=-1,maxdepthIndex=-1,gradbaggingfracIndex=-1,shrinkageIndex=-1,ntreesIndex=-1;

   if(doScan1){
     nnodesmaxIndex = optimIndex/216;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*216)/72;
     gradbaggingfracIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24-shrinkageIndex*8);
   }
   else{
     nnodesmaxIndex = optimIndex/72;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24-shrinkageIndex*8);
   }

   if (Use["BDTG"]){
     if(doScan1)
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:UseBaggedGrad:GradBaggingFraction=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
     else
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
   }

   if(doScan1)
     cout << Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.2f:UseBaggedGrad:GradBaggingFraction=%.2f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex])<<endl;
   else
     cout << Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) << endl;

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Exemplo n.º 19
0
//void TMVAClassification( TString myMethodList = "" )
void tmvaClassifier( TString myMethodList = "", TString inputDir="~/work/ewkzp2j_5311/ll/", bool minimalTrain=false, bool useQG=false)
{   
  gSystem->ExpandPathName(inputDir);
  TString pf("base_weights");
  if(!minimalTrain){
    if(useQG) pf="full_weights";
    else      pf="weights";
  }
  TMVA::gConfig().GetIONames().fWeightFileDir = inputDir + pf;
  
  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
  // if you use your private .rootrc, or run from a different directory, please copy the
  // corresponding lines from .rootrc
  
  // methods to be processed can be given as an argument; use format:
  //
  // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
  //
  // if you like to use a method via the plugin mechanism, we recommend using
  //
  // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
  // (an example is given for using the BDT as plugin (see below),
  // but of course the real application is when you write your own
  // method based)
  
  //---------------------------------------------------------------
  // This loads the library
  TMVA::Tools::Instance();
  
  // Default MVA methods to be trained + tested
  std::map<std::string,int> Use;
  
  // --- Cut optimisation
  Use["Cuts"]            = 0;
  Use["CutsD"]           = 0;
  Use["CutsPCA"]         = 0;
  Use["CutsGA"]          = 0;
  Use["CutsSA"]          = 0;
  // 
  // --- 1-dimensional likelihood ("naive Bayes estimator")
  Use["Likelihood"]      = 0;
  Use["LikelihoodD"]     = 1; // the "D" extension indicates decorrelated input variables (see option strings)
  Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
  Use["LikelihoodKDE"]   = 0;
  Use["LikelihoodMIX"]   = 0;
  //
  // --- Mutidimensional likelihood and Nearest-Neighbour methods
  Use["PDERS"]           = 0;
  Use["PDERSD"]          = 0;
  Use["PDERSPCA"]        = 0;
  Use["PDEFoam"]         = 0;
  Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
  Use["KNN"]             = 0; // k-nearest neighbour method
  //
  // --- Linear Discriminant Analysis
  Use["LD"]              = 0; // Linear Discriminant identical to Fisher
  Use["Fisher"]          = 1;
  Use["FisherCat"]       = 0;//added by loic
  Use["FisherG"]         = 0;
  Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
  Use["HMatrix"]         = 0;
  //
  // --- Function Discriminant analysis
  Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
  Use["FDA_SA"]          = 0;
  Use["FDA_MC"]          = 0;
  Use["FDA_MT"]          = 0;
  Use["FDA_GAMT"]        = 0;
  Use["FDA_MCMT"]        = 0;
  //
  // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
  Use["MLP"]             = 0; // Recommended ANN
  Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
  Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
  Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
  Use["TMlpANN"]         = 0; // ROOT's own ANN
  //
  // --- Support Vector Machine 
  Use["SVM"]             = 0;
  // 
  // --- Boosted Decision Trees
  Use["BDT"]             = 0; // uses Adaptive Boost
  Use["BDTG"]            = 0; // uses Gradient Boost
  Use["BDTB"]            = 0; // uses Bagging
  Use["BDTD"]            = 1; // decorrelation + Adaptive Boost
  Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
  // 
  // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
  Use["RuleFit"]         = 0;
  // ---------------------------------------------------------------

  std::cout << std::endl;
  std::cout << "==> Start TMVAClassification" << std::endl;

  // Select methods (don't look at this code - not of interest)
  if (myMethodList != "") {
    for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

    std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
    for (UInt_t i=0; i<mlist.size(); i++) {
      std::string regMethod(mlist[i]);

      if (Use.find(regMethod) == Use.end()) {
	std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
	for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
	std::cout << std::endl;
	return;
      }
      Use[regMethod] = 1;
    }
  }

  // --------------------------------------------------------------------------------------------------

  // --- Here the preparation phase begins

  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
  //
  // The first argument is the base of the name of all the
  // weightfiles in the directory weight/
  //
  // The second argument is the output file for the training results
  // All TMVA output can be suppressed by removing the "!" (not) in
  // front of the "Silent" argument in the option string
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
					      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

  // You can add so-called "Spectator variables", which are not used in the MVA training,
  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
  // input variables, the response values of all trained MVAs, and the spectator variables
  //   factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  // factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

  // Read training and test data
  // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
  //   TString fname = "./tmva_class_example.root";
   
  //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
  //   gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
  
  //   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
  // --- Register the training and test trees
  TChain *signal     = new TChain("ewkzp2j");
  TChain *background = new TChain("ewkzp2j");
  TSystemDirectory dir(inputDir,inputDir);
  TList *files = dir.GetListOfFiles();
  if (files) {
    TSystemFile *file;
    TString fname;
    TIter next(files);
    while ((file=(TSystemFile*)next())) {
      fname = file->GetName();
      if(!fname.EndsWith("_summary.root")) continue;
      if(fname.Contains("Data")) continue;
      if(!fname.Contains("DY")) continue;
      bool isSignal(false);
      if(fname.Contains("JJ")) { signal->Add(fname); isSignal=true; }
      else if(fname.Contains("50toInf") && fname.Contains("DY")) background->Add(fname);
      cout << fname << " added as " << (isSignal ? "signal" : "background") << endl;
    }
  }else{
    cout << "[Error] no files found in " << inputDir << endl;
  }
  cout << "Signal has " << signal->GetEntries() << " raw events" << endl
       << "Background has " << background->GetEntries() << " raw events"<< endl;

  // global event weights per tree
  Double_t signalWeight     = 1.0;
  Double_t backgroundWeight = 1.0;
  factory->AddSignalTree    ( signal,     signalWeight     );
  factory->AddBackgroundTree( background, backgroundWeight );
  // event-per-event weights per tree
  factory->SetBackgroundWeightExpression( "weight/cnorm" );
  factory->SetSignalWeightExpression( "weight/cnorm" );

  //define variables for the training
  if(minimalTrain)
    {
      factory->AddVariable( "mjj",     "M_{jj}"              "GeV", 'F' );
      factory->AddVariable( "detajj",  "#Delta#eta_{jj}",     "",    'F' );
      factory->AddVariable( "spt",     "#Delta_{rel}",        "GeV", 'F' );
    }
  else
    {
      factory->AddVariable( "mjj",     "M_{jj}"              "GeV",  'F' );
      factory->AddVariable( "detajj",  "#Delta#eta_{jj}",     "",    'F' );
      factory->AddVariable( "setajj",  "#Sigma#eta_{j}",      "",    'F' );
      factory->AddVariable( "eta1",    "#eta(1)",             "",    'F' );
      factory->AddVariable( "eta2",    "#eta(2)",             "",    'F' );
      factory->AddVariable( "pt1",     "p_{T}(1)",            "GeV", 'F' );
      factory->AddVariable( "pt2",     "p_{T}(2)",            "GeV", 'F' );
      factory->AddVariable( "spt",     "#Delta_{rel}",        "GeV", 'F' );
      if(useQG) factory->AddVariable( "qg1",   "q/g(1)",      "",    'F' );
      if(useQG) factory->AddVariable( "qg2",   "q/g(2)",      "",    'F' );
    }
  

  // Apply additional cuts on the signal and background samples (can be different)
  TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
  TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

  // ---- Book MVA methods
  //
  // Please lookup the various method configuration options in the corresponding cxx files, eg:
  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

  // Cut optimisation
  if (Use["Cuts"])
    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

  if (Use["CutsD"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsD",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

  if (Use["CutsPCA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

  if (Use["CutsGA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

  if (Use["CutsSA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  // Likelihood ("naive Bayes estimator")
  if (Use["Likelihood"])
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
			 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

  // Decorrelated likelihood
  if (Use["LikelihoodD"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
			 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

  // PCA-transformed likelihood
  if (Use["LikelihoodPCA"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
			 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

  // Use a kernel density estimator to approximate the PDFs
  if (Use["LikelihoodKDE"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
			 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

  // Use a variable-dependent mix of splines and kernel density estimator
  if (Use["LikelihoodMIX"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
			 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

  // Test the multi-dimensional probability density estimator
  // here are the options strings for the MinMax and RMS methods, respectively:
  //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
  //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
			 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

  if (Use["PDERSD"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

  if (Use["PDERSPCA"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
  if (Use["PDEFoam"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
			 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

  if (Use["PDEFoamBoost"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
			 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

  // K-Nearest Neighbour classifier (KNN)
  if (Use["KNN"])
    factory->BookMethod( TMVA::Types::kKNN, "KNN",
			 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

  // H-Matrix (chi2-squared) method
  if (Use["HMatrix"])
    factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

  // Linear discriminant (same as Fisher discriminant)
  if (Use["LD"])
    factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher discriminant (same as LD)
  if (Use["Fisher"])
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
   
  if (Use["FisherCat"]){
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
    TMVA::MethodCategory* mcategory = dynamic_cast<TMVA::MethodCategory*>(fiCat);
    mcategory->AddMethod( "mjj<250", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=250&&mjj<350" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=350&&mjj<450" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0350", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=450&&mjj<550" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0450", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=550&&mjj<750" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0550", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=750&&mjj<1000", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0750", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=1000"         , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
  }


  // Fisher with Gauss-transformed input variables
  if (Use["FisherG"])
    factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
  if (Use["BoostedFisher"])
    factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
			 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
  if (Use["FDA_MC"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  if (Use["FDA_MT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

  if (Use["FDA_GAMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

  if (Use["FDA_MCMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  if (Use["MLP"])
    factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

  if (Use["MLPBFGS"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

  if (Use["MLPBNN"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

  // CF(Clermont-Ferrand)ANN
  if (Use["CFMlpANN"])
    factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

  // Tmlp(Root)ANN
  if (Use["TMlpANN"])
    factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

  // Support Vector Machine
  if (Use["SVM"])
    factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

  // Boosted Decision Trees
  if (Use["BDTG"]) // Gradient Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTG",
			 "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" );

  if (Use["BDT"])  // Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
			 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  if (Use["BDTB"]) // Bagging
    factory->BookMethod( TMVA::Types::kBDT, "BDTB",
			 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTD",
			 "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=25:PruneMethod=CostComplexity:PruneStrength=25.0:VarTransform=Decorrelate");
  //"!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

  if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
    factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
			 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  // RuleFit -- TMVA implementation of Friedman's method
  if (Use["RuleFit"])
    factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
			 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

  // For an example of the category classifier usage, see: TMVAClassificationCategory




  // --------------------------------------------------------------------------------------------------

  // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

  // factory->OptimizeAllMethods("SigEffAt001","Scan");
  // factory->OptimizeAllMethods("ROCIntegral","FitGA");

  // --------------------------------------------------------------------------------------------------

  // ---- Now you can tell the factory to train, test, and evaluate the MVAs

  // Train MVAs using the set of training events
  factory->TrainAllMethods();

  // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();

  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();

  // --------------------------------------------------------------

  // Save the output
  outputFile->Close();

  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;
  std::cout << " ==> Weights are stored in " << TMVA::gConfig().GetIONames().fWeightFileDir << std::endl;
  delete factory;



  // Launch the GUI for the root macros
  //   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 20
0
void tmva_test_lite(const int sigreg = true){
  const string outfileName("TMVA.root");
  TFile* outputFile = TFile::Open(outfileName.c_str(),"RECREATE");
  string name;
  if(!sigreg) name = string("MVAnalysisLite");
  else name = string("MVAnalysis_sig_Lite");
  TMVA::Factory *factory = new TMVA::Factory(name.c_str(),outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");

  TFile *inputSig = TFile::Open("/home/vitaly/B0toDh0/Tuples/fil_b2dh_sigmc.root");
  TFile *inputBack = TFile::Open("/home/vitaly/B0toDh0/Tuples/fil_b2dh_cont.root");

//  TFile *tfile = new TFile("","RECREATE");

  TTree* insigtree   = (TTree*)inputSig->Get("TEventTr");
  TTree* inbacktree  = (TTree*)inputBack->Get("TEventTr");

  factory->AddVariable("abs(cos_b0)");
//  factory->AddVariable("p_ks");
  factory->AddVariable("log(chi2_ndf_D0)");
//  factory->AddVariable("log(chi2_ndf_B0)");
//  factory->AddVariable("log(chi2_tag_vtx/ndf_tag_vtx)");
  factory->AddVariable("abs(cos_thr)");
  factory->AddVariable("thr_sig");
  factory->AddVariable("thr_oth");
  factory->AddVariable("log(tag_LH_err)");
//  factory->AddVariable("log(dzerr)");
//  factory->AddVariable("log(pi0_chi2)");
//  factory->AddVariable("log(egamma)");
//  factory->AddVariable("log(ptgamma)");
  
  factory->AddVariable("k1mm2");
  factory->AddVariable("k1et");
  factory->AddVariable("k1hso00");
//  factory->AddVariable("k1hso01");
  factory->AddVariable("k1hso02");
//  factory->AddVariable("k1hso03");
  factory->AddVariable("k1hso04");
  factory->AddVariable("k1hso10");
  factory->AddVariable("k1hso12");
  factory->AddVariable("k1hso14");
  factory->AddVariable("k1hso20");
  factory->AddVariable("k1hso22");
  factory->AddVariable("k1hso24");
  factory->AddVariable("k1hoo0");
  factory->AddVariable("k1hoo1");
  factory->AddVariable("k1hoo2");
  factory->AddVariable("k1hoo3");
  factory->AddVariable("k1hoo4");

//  factory->AddVariable("k0mm2");
//  factory->AddVariable("k0et");
//  factory->AddVariable("k0hso00");
//  factory->AddVariable("k0hso02");
//  factory->AddVariable("k0hso04");
//  factory->AddVariable("k0hso10");
//  factory->AddVariable("k0hso12");
//  factory->AddVariable("k0hso14");
//  factory->AddVariable("k0hso20");
//  factory->AddVariable("k0hso22");
//  factory->AddVariable("k0hso24");
//  factory->AddVariable("k0hoo0");
//  factory->AddVariable("k0hoo1");
//  factory->AddVariable("k0hoo2");
//  factory->AddVariable("k0hoo4");

  factory->AddSignalTree(insigtree,1.0);
  factory->AddBackgroundTree(inbacktree,1.0);

//  string Common_precuts("mbc>5.271 && mbc<5.289 && de<0.08 && de>-0.1 && chi2_ndf_B0<1000 && abs(mks_raw-0.4975)<0.009 && abs(md0_raw-1.865)<0.015 && abs(mpi0_raw-0.135)<0.012");
  string Common_precuts("chi2_ndf_B0<1000 && de<0.1 && de>-0.1 && mbc>5.25 && mbc<5.289");
  if(sigreg) Common_precuts += string(" && mbc>5.271 && mbc<5.289 && de<0.08 && de>-0.1");
  string sig_cuts = Common_precuts;// + string("");
  string back_cuts = Common_precuts;// + string(" && abs(md0_raw-1.865)<0.030 && abs(mpi0_raw-0.135)<0.030");
  TCut cutsig(sig_cuts.c_str());
  TCut cutback(back_cuts.c_str());

  factory->PrepareTrainingAndTestTree(cutsig,cutback,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
//  factory->BookMethod( TMVA::Types::kCuts, "Cuts","!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

//  factory->BookMethod(TMVA::Types::kBDT,"BDTG","!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggedFraction=0.6:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrenght=50:NNodesMax=5");
  factory->BookMethod(TMVA::Types::kBDT,"BDTG","!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:NNodesMax=5");
  factory->BookMethod(TMVA::Types::kBDT,"BDT","!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20");

  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();

  outputFile->Close();

  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;

  delete factory;

  // Launch the GUI for the root macros
  if (!gROOT->IsBatch()) TMVAGui( outfileName.c_str() );
}
Exemplo n.º 21
0
void Boost(){
   TString outfileName = "boost.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
   factory->AddVariable( "var0", 'F' );
   factory->AddVariable( "var1", 'F' );
   TFile *input(0);
   TString fname = "./data.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find tmva_example.root in the local directory
      std::cout << "--- BOOST       : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      gROOT->LoadMacro( "./createData.C");
      create_circ(20000);
      cout << " created data.root with data and circle arranged in half circles"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   TString fisher="H:!V";
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoost", fisher+":Boost_Num=100:Boost_Type=AdaBoost" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog2", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=2.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep2", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.2" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep3", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.5" );

  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
   
   
}
Exemplo n.º 22
0
void Boost2(){
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString tmva_dir(TString(gRootDir) + "/tmva");
   if(gSystem->Getenv("TMVASYS"))
      tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   gROOT->ProcessLine(".L TMVAGui.C");

   TString outfileName = "boost.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
   factory->AddVariable( "var0", 'F' );
   factory->AddVariable( "var1", 'F' );
   TFile *input(0);
   TString fname = "./circledata.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find data.root in the local directory
      std::cout << "--- BOOST       : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      gROOT->LoadMacro( "./createData.C");
      create_fullcirc(20000);
      cout << " created circledata.root with data and circle arranged in circles"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=10000:nTrain_Background=10000:SplitMode=Random:NormMode=NumEvents:!V" );

   TString fisher="!H:!V";
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher );
   factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:V:NTrees=150:NCuts=101:MaxDepth=1:UseFisherCuts:UseExclusiveVars:MinLinCorrForFisher=0." );
//   factory->BookMethod( TMVA::Types::kFisher, "FisherBS", fisher+":Boost_Num=100:Boost_Type=Bagging:Boost_Transform=step" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherS", fisher+":Boost_Num=150:Boost_Type=AdaBoost:Boost_Transform=step" );


  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
   
   
}
Exemplo n.º 23
0
void testBDT(){


   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();
/*
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "signal_exclusif.root" );
   TFile *input_wz = TFile::Open( "bruit_w_z.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("tree");
   TTree *background = (TTree*)input_wz->Get("tree");

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   
   factory->AddVariable("PT_z"   , 'F');
   factory->AddVariable("ASYM"    , 'F');
   factory->AddVariable("PHI_lw_b", 'F');
   factory->AddVariable("M_top", 'F');
   */
   
   
   
   TString outfileName( "bdtTMVA_FCNC_tZ.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "proof.root" );
   TFile *input_wz = TFile::Open( "proof.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("Ttree_FCNCkut");
   
   
   TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ");
   /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ");
   TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW");
   
   TTree *background_TTbar  = (TTree*)input_wz->Get("Ttree_TTbar");
   TTree *background_Zjets  = (TTree*)input_wz->Get("Ttree_Zjets");
   TTree *background_Wjets  = (TTree*)input_wz->Get("Ttree_Wjets");
   TTree *background_TtW    = (TTree*)input_wz->Get("Ttree_TtW");
   TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,            signalWeight     );
   factory->AddBackgroundTree( background_WZ,     backgroundWeight );
   /*factory->AddBackgroundTree( background_ZZ,     backgroundWeight );
   factory->AddBackgroundTree( background_WW,     backgroundWeight );
   factory->AddBackgroundTree( background_TTbar,  backgroundWeight );
   factory->AddBackgroundTree( background_Zjets,  backgroundWeight );
   factory->AddBackgroundTree( background_Wjets,  backgroundWeight );
   factory->AddBackgroundTree( background_TtW,    backgroundWeight );
   factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/
   
   
   factory->AddVariable("tree_topMass",    'F');
   factory->AddVariable("tree_deltaPhilb", 'F');
   factory->AddVariable("tree_asym",       'F');
   factory->AddVariable("tree_Zpt",        'F');
   
   
   
   
   
   
   
   
   
   // to set weights. The variable must exist in the tree
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
   
   
   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );




   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );


}
Exemplo n.º 24
0
void Training_BDT(TString channel, bool isZHvsOther, TString cat){

  TString name;
  if (isZHvsOther) name="ZHvsOther";
  else name="TTvsDY";
  if (channel=="Mu") name=name+"_Mu";
  else name=name+"_El";   
  if (cat=="2j")name=name+"_2j";
  else if(cat=="2j") name=name+"_3j";
  else name=name+"_noCat";
  

  
  TMVA::Tools::Instance();

  TFile* outputFile = TFile::Open( "outputtrainning"+name+".root", "RECREATE" );
  
  
  TMVA::Factory *factory = new TMVA::Factory( "BDT_NN_trainning"+name, outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );;

  //TFile *input   = TFile::Open( "study_histo.root" );

  TFile *inputTT   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTFullLept_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputDYM10   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_M10to50_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputDY   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputZZ   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZZ_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputZH   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZH125_Summer12_final_skimed_llbbX_withWeights_V3.root" ); 
  TFile *inputWW   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WW_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputWZ   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WZ_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputWt   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wt_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputWtbar   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wtbar_Summer12_final_skimed_llbbX_withWeights_V3.root" );
  TFile *inputTTSemi   = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTSemiLept_Summer12_final_skimed_llbbX_withWeights_V3.root" );

  
  
  TTree *signal     = (TTree*)inputZH->Get("rds_zbb");
  TTree *backgroundDY = (TTree*)inputDY->Get("rds_zbb");
  TTree *backgroundZZ = (TTree*)inputZZ->Get("rds_zbb");
  TTree *backgroundTT = (TTree*)inputTT->Get("rds_zbb");
  TTree *backgroundDYM10 = (TTree*)inputDYM10->Get("rds_zbb");
  TTree *backgroundWW = (TTree*)inputWW->Get("rds_zbb");
  TTree *backgroundWZ = (TTree*)inputWZ->Get("rds_zbb");
  TTree *backgroundWt = (TTree*)inputWt->Get("rds_zbb");
  TTree *backgroundWtbar = (TTree*)inputWtbar->Get("rds_zbb");
  TTree *backgroundTTSemi = (TTree*)inputTTSemi->Get("rds_zbb");
  
  if (isZHvsOther ){
    factory->AddSignalTree( signal    , 1.2);
    factory->AddBackgroundTree( backgroundDY, 0.54);
    factory->AddBackgroundTree( backgroundZZ, 0.1);
    factory->AddBackgroundTree( backgroundDYM10, 0.02);
    factory->AddBackgroundTree( backgroundTT, 0.3); 
    factory->AddBackgroundTree(backgroundWW , 0.005);
    factory->AddBackgroundTree(backgroundWZ , 0.005);
    factory->AddBackgroundTree(backgroundWt , 0.01);
    factory->AddBackgroundTree(backgroundWtbar ,0.01 );  
    factory->AddBackgroundTree(backgroundTTSemi ,0.01 );
  }
 else{
    factory->AddSignalTree( backgroundDY    , 1.0);
    factory->AddBackgroundTree( backgroundTT, 1.0);
 }
  if (isZHvsOther ){
    factory->AddVariable("MinusLogW_ZH_cor3",   'F');
    factory->AddVariable("MinusLogW_ZH_cor0", 'F');
    factory->AddVariable("MinusLogW_ZZ_cor3",   'F');
    factory->AddVariable("MinusLogW_ZZ_cor0", 'F');  
    factory->AddVariable("MinusLogW_TT",   'F');
    factory->AddVariable("MinusLogW_gg_Zbb",  'F');
    factory->AddVariable("MinusLogW_qq_Zbb",     'F');
  }
  else{
    factory->AddVariable("MinusLogW_TT",   'F');
    factory->AddVariable("MinusLogW_gg_Zbb",  'F');
    factory->AddVariable("MinusLogW_qq_Zbb",     'F');  
  }
  

  TCut mycuts, mycutb;
  
  if(isZHvsOther){
    if(channel=="Mu"){ 
      if(cat=="2j"){ 
        mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))";
        mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))";
      }
      else{
        mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))";
        mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))";
      }
    }
    else {
      if(cat=="2j"){ 
        mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106  && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))";
        mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106  && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))";
      }
      else{
        mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106  && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))";
        mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 76 &&  eventSelectiondilepM_inc < 106  && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))";
      
      }
    }
  }
  
  else{
    if(channel=="Mu"){ 
     
        mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 60 &&  eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)";
        mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 &&  eventSelectiondilepM_inc > 60 &&  eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)";
      
    }
    else {
        mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 60 &&  eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)";
        mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 &&  eventSelectiondilepM_inc > 60 &&  eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)";
      
  
    }
  }
    
  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
  
  
  if (isZHvsOther)factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=40:nEventsMin=600:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
  else factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=50:nEventsMin=1000:MaxDepth=20:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
  
  if (isZHvsOther)factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+6:TestRate=5:!UseRegulator" );
  else factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+2:TestRate=5:!UseRegulator" );
  // Train MVAs using the set of training events
  factory->TrainAllMethods();

  // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();

  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();
   
  outputFile->Close();

  
  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;

  delete factory;

  // Launch the GUI for the root macros
  if (!gROOT->IsBatch() ) TMVAGui( "outputtrainning"+name+".root" );


  
  


}
Exemplo n.º 25
0
////////////////////////////////////////////////////////////////////////////////
/// Main                                                                     ///
////////////////////////////////////////////////////////////////////////////////
void TrainRegressionFJ(TString myMethodList="")
{
    gROOT->SetBatch(1);
    gROOT->LoadMacro("HelperFunctions.h" );  // make functions visible to TTreeFormula

    if (!TString(gROOT->GetVersion()).Contains("5.34")) {
        std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl;
        std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl;
        std::cout << "Return without doing anything." << std::endl;
        return;
    }
    
    //TString curDynamicPath( gSystem->GetDynamicPath() );
    //gSystem->SetDynamicPath( "../lib:" + curDynamicPath );

    //TString curIncludePath(gSystem->GetIncludePath());
    //gSystem->SetIncludePath( " -I../include " + curIncludePath );

    // Load the library
    TMVA::Tools::Instance();


    //--------------------------------------------------------------------------
    // Default MVA methods to be trained + tested
    std::map<std::string, int> Use;

    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 0;
    Use["PDEFoam"]         = 1;
    Use["KNN"]             = 1;
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    //
    // --- Neural Network
    Use["MLP"]             = 1; 
    //
    // --- Support Vector Machine 
    Use["SVM"]             = 0;
    // 
    // --- Boosted Decision Trees
    Use["BDT"]             = 1;
    Use["BDT1"]            = 0;
    Use["BDTG"]            = 0;
    Use["BDTG1"]           = 0;

    //--------------------------------------------------------------------------
    std::cout << std::endl;
    std::cout << "==> Start TMVARegression" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    //--------------------------------------------------------------------------
    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVARegFJ.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory will
    // then run the performance analysis for you.
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weights/
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in 
    // front of the "Silent" argument in the option string
    TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, 
                                                "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

    // If you wish to modify default settings
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

    const std::vector<std::string> & inputExpressions      = GetInputExpressionsFJReg();
    const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg();
    assert(inputExpressions.size() == inputExpressionLabels.size());

    // Define the input variables that shall be used for the MVA training
    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    //factory->AddVariable( "var1", "Variable 1", "units", 'F' );
    //factory->AddVariable( "var2", "Variable 2", "units", 'F' );
    
    for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){
        Label label = MakeLabel(inputExpressionLabels.at(iexpr));
        TString expr = inputExpressions.at(iexpr);
        factory->AddVariable(expr, label.xlabel, label.unit, label.type);
    }

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    // Add the variable carrying the regression target
    //factory->AddTarget( "fvalue" );
    factory->AddTarget( "fathFilterJets_genPt" );

    // It is also possible to declare additional targets for multi-dimensional regression, ie:
    // -- factory->AddTarget( "fvalue2" );
    // BUT: this is currently ONLY implemented for MLP

    //--------------------------------------------------------------------------
    // Read training and test data
    TFile *input(0);
    TString dirname = "skim_ZnnH_regression_fj/";
    TString prefix  = "skim_";
    TString suffix  = ".root";
    TTree *regTrainTree(0), *regTestTree(0);
    
    std::vector<std::string> processes;
    processes.push_back("ZnnH110");
    processes.push_back("ZnnH115");
    processes.push_back("ZnnH120");
    processes.push_back("ZnnH125");
    processes.push_back("ZnnH130");
    processes.push_back("ZnnH135");
    processes.push_back("ZnnH140");
    processes.push_back("ZnnH145");
    processes.push_back("ZnnH150");
#ifdef USE_WH
    processes.push_back("WlnH110");
    processes.push_back("WlnH115");
    processes.push_back("WlnH120");
    processes.push_back("WlnH125");
    processes.push_back("WlnH130");
    processes.push_back("WlnH135");
    processes.push_back("WlnH140");
    processes.push_back("WlnH145");
    processes.push_back("WlnH150");
#endif

    std::vector<TFile *> files;
    for (UInt_t i=0; i<processes.size(); i++){
        std::string process = processes.at(i);
        input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ");
        if (!input) {
            std::cout << "ERROR: Could not open input file." << std::endl;
            exit(1);
        }
        std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;
        files.push_back(input);
        
        // --- Register the regression tree
        regTrainTree = (TTree*) input->Get("tree_train");
        regTestTree  = (TTree*) input->Get("tree_test");

        // Global event weights per tree (see below for setting event-wise weights)
        Double_t regWeight = 1.0;

        // You can add an arbitrary number of regression trees
        factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining);
        factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting );
    }

    // Set individual event weights (the variables must exist in the original TTree)
    //factory->SetWeightExpression( "var1", "Regression" );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match
    //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";

    // Tell the factory to use all remaining events in the trees after training for testing:
    factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" );

    // If no numbers of events are given, half of the events in the tree are used 
    // for training, and the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" );

    // --- Book MVA methods
    //
    // Please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    // PDE - RS method
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
    // And the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                             "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN",
                             "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

    // Linear discriminant
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", 
                             "!H:!V:VarTransform=None" );

    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );

    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    // Neural network (MLP)
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", 
                             "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

    // Boosted Decision Trees
    if (Use["BDT"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                             "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" );

    if (Use["BDT1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

    if (Use["BDTG"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                             "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );

    if (Use["BDTG1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG1",
                             "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );

    //--------------------------------------------------------------------------
    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // --- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // --- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    //--------------------------------------------------------------------------
    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVARegression is done!" << std::endl;

    for (UInt_t i=0; i<files.size(); i++)
        files.at(i)->Close();

    delete outputFile;
    delete factory;

    // Launch the GUI for the root macros
    //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" );
    //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" );
    //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" );
    //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Exemplo n.º 26
0
void TMVAClassificationHwwNtuple( TString myMethodList = "" )
{
   // This loads the library
   TMVA::Tools::Instance();

    gROOT->ProcessLine(".L TMVAGui.C");


   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;
   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
	 cout<<regMethod<<" is on"<<endl;
      }
   }
   // -------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // For one variable
   //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
   //                                            "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );
   // For Multiple Variables
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   //factory->AddVariable( "pt1",                "LeadLepton pt", "", 'F' );
   //factory->AddVariable( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddVariable( "pfmet",                "MissingEt", "", 'F' );
   factory->AddVariable( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddVariable( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   //factory->AddVariable( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddVariable( "ptll",               "DiLepton pt", "", 'F' );
   //
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );
   //
   //factory->AddSpectator( "mWW",                "Higgs Mass", "", 'F' );
   factory->AddSpectator( "pt1",                "LeadLepton pt", "", 'F' );
   factory->AddSpectator( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddSpectator( "pfmet",                "MissingEt", "", 'F' );
   factory->AddSpectator( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddSpectator( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   factory->AddSpectator( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddSpectator( "ptll",               "DiLepton pt", "", 'F' );
   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //TString fname = "./tmva_class_example.root";
   //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   
   //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
    // exit(-1);
      //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   //TFile *input = TFile::Open( fname );
   //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino");
   
   TChain *S_Chain = new TChain("latino");
   TChain *C_Chain = new TChain("latino");
   TChain *SCI_Chain = new TChain("latino");
   TChain *qqWW_Chain = new TChain("latino");

   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root");

   qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root");
   
   // --- Register the training and test trees

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( S_Chain  );
   factory->AddBackgroundTree( qqWW_Chain );
   factory->AddBackgroundTree( C_Chain );
   // Classification training and test data in ROOT tree format with signal and background events being located in the same tree
   //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut);
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   factory->SetWeightExpression          ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetSignalWeightExpression    ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468");

   //factory->PrepareTrainingAndTestTree( ChanCommOff,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
                                        //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V";
   factory->PrepareTrainingAndTestTree( ChanCommOff0J,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
   // ---- Book MVA methods
   //
   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
                           //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // -----------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // -----------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 27
0
void Classification()
{
   TMVA::Tools::Instance();
   TMVA::PyMethodBase::PyInitialize();

   TString outfileName("TMVA.root");
   TFile *outputFile = TFile::Open(outfileName, "RECREATE");

   TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile,
         "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");

   factory->AddVariable("myvar1 := var1+var2", 'F');
   factory->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F');
   factory->AddVariable("var3",                "Variable 3", "units", 'F');
   factory->AddVariable("var4",                "Variable 4", "units", 'F');
   factory->AddSpectator("spec1 := var1*2",  "Spectator 1", "units", 'F');
   factory->AddSpectator("spec2 := var1*3",  "Spectator 2", "units", 'F');

   TFile *input(0);
   TString fname = "./tmva_class_example.root";
   if (!gSystem->AccessPathName( fname )) {
      input = TFile::Open( fname ); // check if file in local directory exists
   }
   else {
      TFile::SetCacheFileDir(".");
      input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD");
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }

   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

   // --- Register the training and test trees

   TTree *tsignal     = (TTree *)input->Get("TreeS");
   TTree *tbackground = (TTree *)input->Get("TreeB");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree(tsignal,     signalWeight);
   factory->AddBackgroundTree(tbackground, backgroundWeight);


   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetBackgroundWeightExpression("weight");


   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   factory->PrepareTrainingAndTestTree(mycuts, mycutb,
                                       "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V");


   ///////////////////
   //Booking         //
   ///////////////////
   // Boosted Decision Trees

   //PyMVA methods
   factory->BookMethod(TMVA::Types::kPyRandomForest, "PyRandomForest",
                       "!V:NEstimators=150:Criterion=gini:MaxFeatures=auto:MaxDepth=3:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE");
   factory->BookMethod(TMVA::Types::kPyAdaBoost, "PyAdaBoost",
                       "!V:BaseEstimator=None:NEstimators=100:LearningRate=1:Algorithm=SAMME.R:RandomState=None");
   factory->BookMethod(TMVA::Types::kPyGTB, "PyGTB",
                       "!V:NEstimators=150:Loss=deviance:LearningRate=0.1:Subsample=1:MaxDepth=6:MaxFeatures='auto'");


   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

}
Exemplo n.º 28
0
int TestBDT_forreal_test(TString sig) {

   // This loads the library
   TMVA::Tools::Instance();
   // to get access to the GUI and all tmva macros
   //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());//was not commented, but does not work anymore
   //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());//was not commented, but cannot not work anymore
   //gROOT->ProcessLine(".L TMVAGui.C");
   TString outfileName( "rootfiles/TMVA/resultTMVA_"+sig+"VsTTbar2l.root" );
   TString weightname( "weightsTMVA_"+sig+"VsTTbar2l" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( weightname, outputFile,"!V:!Silent:Color:DrawProgressBar");

  vector<TString> histonames; histonames.clear();
  map<string, float> value;
  
  //histonames.push_back("MT2W");             
  //histonames.push_back("MT2_lb_b");         
  //histonames.push_back("MT2_lb_bqq");       
  //histonames.push_back("MT2_lb_b_mass");   
  //histonames.push_back("MT2_lb_bqq_mass"); 
  histonames.push_back("Mlb_lead_bdiscr");              
  //histonames.push_back("Mjjj");              
  //histonames.push_back("topness");          
  //histonames.push_back("topnessMod");          
  histonames.push_back("pfmet");              
  //histonames.push_back("ak4_HT");               
  //histonames.push_back("MET_over_sqrtHT");    
  //histonames.push_back("ak4_htratiom");          
  histonames.push_back("dR_lep_leadb");        
  //histonames.push_back("hadronic_top_chi2");             
  //histonames.push_back("ngoodbtags");           
  histonames.push_back("ngoodjets");            
  //histonames.push_back("mindphi_met_j1_j2");        
  //histonames.push_back("lep1_pt");
  histonames.push_back("ak4pfjets_leadMEDbjet_p4_Pt"); 

  for(unsigned int b = 0; b<histonames.size(); ++b){
    factory->AddVariable(histonames[b], 'F' );
  }

  TString signame = "/nfs-7/userdata/stopRun2/testMVA/"+sig+".root";
  TString bkgname1 = "/nfs-7/userdata/stopRun2/testMVA/TTJets_DiLept_madgraph_25ns_1.root";
  TString bkgname2 = "/nfs-7/userdata/stopRun2/testMVA/TTJets_DiLept_madgraph_25ns_2.root";
  /*
  TString signame = "/hadoop/cms/store/user/haweber/forBDT/"+sig+".root";
  TString bkgname1 = "/hadoop/cms/store/user/haweber/forBDT/TTJets_DiLept_madgraph_25ns_1.root";
  TString bkgname2 = "/hadoop/cms/store/user/haweber/forBDT/TTJets_DiLept_madgraph_25ns_2.root";
  */
  cout << "signame " << signame << endl;
  TFile *inputSig = TFile::Open( signame );
  TFile *inputBkg1 = TFile::Open( bkgname1 );
  TFile *inputBkg2 = TFile::Open( bkgname2 );
  TTree *signal     = (TTree*)inputSig->Get("t");
  TTree *background1 = (TTree*)inputBkg1->Get("t");
  TTree *background2 = (TTree*)inputBkg2->Get("t");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background1, backgroundWeight );
   factory->AddBackgroundTree( background2, backgroundWeight );

   //factory->SetBackgroundWeightExpression( "weight" );
   // Apply additional cuts on the signal and background samples (can be different)
   //TCut mycuts = "MT2W>200&&mindphi_met_j1_j2>0.8"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   //TCut mycutb = "MT2W>200&&mindphi_met_j1_j2>0.8"; // for example: TCut mycutb = "abs(var1)<0.5";
   //if(sig.Contains("T2tt_425_325")||sig.Contains("T2tt_500_325")){ mycuts = "mindphi_met_j1_j2>0.8"; mycutb = "mindphi_met_j1_j2>0.8"; }
   TCut mycuts = "";
   TCut mycutb = "";
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   factory->BookMethod( TMVA::Types::kBDT, "BDT",
			"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
   // Train MVAs using the set of training events
   cout << "Train methods" << endl;
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   cout << "Test methods" << endl;
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   cout << "Evaluate methods" << endl;
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );

  return 0;
}
void myTMVAClassification( TString myMethodList = "" )
{
    // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
    // if you use your private .rootrc, or run from a different directory, please copy the
    // corresponding lines from .rootrc

    // methods to be processed can be given as an argument; use format:
    //
    // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
    //
    // if you like to use a method via the plugin mechanism, we recommend using
    //
    // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
    // (an example is given for using the BDT as plugin (see below),
    // but of course the real application is when you write your own
    // method based)

    //---------------------------------------------------------------
    // This loads the library
    TMVA::Tools::Instance();

    // to get access to the GUI and all tmva macros
    //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
    //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
    gROOT->ProcessLine(".L TMVAGui.C");

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    // --- Cut optimisation
    Use["Cuts"]            = 1;
    Use["CutsD"]           = 1;
    Use["CutsPCA"]         = 0;
    Use["CutsGA"]          = 0;
    Use["CutsSA"]          = 0;
    //
    // --- 1-dimensional likelihood ("naive Bayes estimator")
    Use["Likelihood"]      = 1;
    Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
    Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
    Use["LikelihoodKDE"]   = 0;
    Use["LikelihoodMIX"]   = 0;
    //
    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 1;
    Use["PDERSD"]          = 0;
    Use["PDERSPCA"]        = 0;
    Use["PDEFoam"]         = 1;
    Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
    Use["KNN"]             = 1; // k-nearest neighbour method
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1; // Linear Discriminant identical to Fisher
    Use["Fisher"]          = 0;
    Use["FisherG"]         = 0;
    Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
    Use["HMatrix"]         = 0;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
    Use["FDA_SA"]          = 0;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    Use["FDA_MCMT"]        = 0;
    //
    // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
    Use["MLP"]             = 0; // Recommended ANN
    Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
    Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
    Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
    Use["TMlpANN"]         = 0; // ROOT's own ANN
    //
    // --- Support Vector Machine
    Use["SVM"]             = 1;
    //
    // --- Boosted Decision Trees
    Use["BDT"]             = 1; // uses Adaptive Boost
    Use["BDTG"]            = 0; // uses Gradient Boost
    Use["BDTB"]            = 0; // uses Bagging
    Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
    Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
    //
    // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
    Use["RuleFit"]         = 1;
    // ---------------------------------------------------------------

    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    // --------------------------------------------------------------------------------------------------

    // --- Here the preparation phase begins

    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVA.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory is
    // the only TMVA object you have to interact with
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weight/
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in
    // front of the "Silent" argument in the option string
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

    // If you wish to modify default settings
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

    // Define the input variables that shall be used for the MVA training
    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]


    std:: cout << "I will tell you about the variables I want " << std::endl;
    factory->AddVariable( "Lambda_b0_IPCHI2_OWNPV", 'F' );
    factory->AddVariable( "Lambda_b0_FD_OWNPV", 'F' );
    factory->AddVariable( "Lambda_b0_P", 'F' );
    factory->AddVariable( "Lambda_b0_PT", 'F' );
    factory->AddVariable( "Lambda_b0_ENDVERTEX_CHI2", 'F' );
    factory->AddVariable( "Lambda_b0_DIRA_OWNPV", 'F' );
    factory->AddVariable( "J_psi_1S_FD_OWNPV", 'F' );
    factory->AddVariable( "Lambda_1520_0_FD_OWNPV", 'F' );
    factory->AddVariable( "J_psi_1S_IPCHI2_OWNPV", 'F' );
    factory->AddVariable( "Lambda_1520_0_IPCHI2_OWNPV", 'F' );
    factory->AddVariable( "J_psi_1S_PT", 'F' );
    factory->AddVariable( "Lambda_1520_0_PT", 'F' );
    factory->AddVariable( "J_psi_1S_ENDVERTEX_CHI2", 'F' );
    factory->AddVariable( "Lambda_1520_0_ENDVERTEX_CHI2", 'F' );
    factory->AddVariable( "eplus_PT",  'F' );
    factory->AddVariable( "eminus_PT",  'F' );

    factory->AddVariable( "pplus_PT", 'F' );
    factory->AddVariable( "Kminus_PT", 'F' );

    factory->AddVariable( "eplus_IPCHI2_OWNPV", 'F' );
    factory->AddVariable( "eminus_IPCHI2_OWNPV", 'F' );

    factory->AddVariable( "pplus_IPCHI2_OWNPV", 'F' );

    factory->AddVariable( "Kminus_IPCHI2_OWNPV", 'F' );





    // Read training and test data


    TFile *input_signal = new TFile("../files/signal_MC.root");
    TFile *input_background = new TFile("../files/background_MC.root");
    input_signal->Print();
    input_background ->Print();
    //cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

    // --- Register the training and test trees

    TTree *signal     = (TTree*)input_signal->Get("DecayTree");
    TTree *background = (TTree*)input_background->Get("DecayTree");


    std::cout << "Number of signal     : " <<  signal->GetEntries() << std::endl;
    std::cout << "Number of background : " <<  background->GetEntries() << std::endl;

    // global event weights per tree (see below for setting event-wise weights)
    Double_t signalWeight     = 1.0;
    Double_t backgroundWeight = 1.0;

    // You can add an arbitrary number of signal or background trees
    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );

    // To give different trees for training and testing, do as follows:
    //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
    //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

    // Use the following code instead of the above two or four lines to add signal and background
    // training and test events "by hand"
    // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    //      variable definition, but simply compute the expression before adding the event
    //
    //     // --- begin ----------------------------------------------------------
    //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
    //     Float_t  treevars[4], weight;
    //
    //     // Signal
    //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
    //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
    //        signal->GetEntry(i);
    //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
    //        // add training and test events; here: first half is training, second is testing
    //        // note that the weight can also be event-wise
    //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
    //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
    //     }
    //
    //     // Background (has event weights)
    //     background->SetBranchAddress( "weight", &weight );
    //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
    //     for (UInt_t i=0; i<background->GetEntries(); i++) {
    //        background->GetEntry(i);
    //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
    //        // add training and test events; here: first half is training, second is testing
    //        // note that the weight can also be event-wise
    //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
    //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
    //     }
    // --- end ------------------------------------------------------------
    //
    // --- end of tree registration

    // Set individual event weights (the variables must exist in the original TTree)
    //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
    //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
    //factory->SetBackgroundWeightExpression( "weight" );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    // Tell the factory how to use the training and testing events
    //
    // If no numbers of events are given, half of the events in the tree are used
    // for training, and the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
    // To also specify the number of testing events, use:
    //    factory->PrepareTrainingAndTestTree( mycut,
    //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

    // ---- Book MVA methods
    //
    // Please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    // Cut optimisation
    if (Use["Cuts"])
        factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                             "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

    if (Use["CutsD"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                             "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

    if (Use["CutsPCA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                             "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

    if (Use["CutsGA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                             "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

    if (Use["CutsSA"])
        factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                             "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    // Likelihood ("naive Bayes estimator")
    if (Use["Likelihood"])
        factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                             "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // Decorrelated likelihood
    if (Use["LikelihoodD"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                             "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

    // PCA-transformed likelihood
    if (Use["LikelihoodPCA"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                             "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );

    // Use a kernel density estimator to approximate the PDFs
    if (Use["LikelihoodKDE"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                             "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );

    // Use a variable-dependent mix of splines and kernel density estimator
    if (Use["LikelihoodMIX"])
        factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                             "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );

    // Test the multi-dimensional probability density estimator
    // here are the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

    if (Use["PDERSD"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

    if (Use["PDERSPCA"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

    // Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                             "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

    if (Use["PDEFoamBoost"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                             "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN",
                             "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

    // H-Matrix (chi2-squared) method
    if (Use["HMatrix"])
        factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

    // Linear discriminant (same as Fisher discriminant)
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

    // Fisher discriminant (same as LD)
    if (Use["Fisher"])
        factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

    // Fisher with Gauss-transformed input variables
    if (Use["FisherG"])
        factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

    // Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if (Use["BoostedFisher"])
        factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
                             "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if (Use["FDA_MCMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                             "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

    if (Use["MLPBFGS"])
        factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

    if (Use["MLPBNN"])
        factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

    // CF(Clermont-Ferrand)ANN
    if (Use["CFMlpANN"])
        factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...

    // Tmlp(Root)ANN
    if (Use["TMlpANN"])
        factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

    // Boosted Decision Trees
    if (Use["BDTG"]) // Gradient Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                             "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

    if (Use["BDT"])  // Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                             "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

    if (Use["BDTB"]) // Bagging
        factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                             "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

    if (Use["BDTD"]) // Decorrelation + Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                             "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

    if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
        factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                             "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

    // RuleFit -- TMVA implementation of Friedman's method
    if (Use["RuleFit"])
        factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                             "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

    // For an example of the category classifier usage, see: TMVAClassificationCategory

    // --------------------------------------------------------------------------------------------------

    // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

    // ---- STILL EXPERIMENTAL and only implemented for BDT's !
    // factory->OptimizeAllMethods("SigEffAt001","Scan");
    // factory->OptimizeAllMethods("ROCIntegral","FitGA");

    // --------------------------------------------------------------------------------------------------

    // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // ----- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassification is done!" << std::endl;

    delete factory;

    // Launch the GUI for the root macros
    if (!gROOT->IsBatch())
        gROOT->ProcessLine(TString::Format("TMVAGui(\"%s\")", outfileName.Data()));
}
Exemplo n.º 30
0
/**********************************************************************************
 * Project   : TMVA - a ROOT-integrated toolkit for multivariate data analysis    *
 * Package   : TMVA                                                               *
 * Root Macro: TMVAClassification                                                 *
 *                                                                                *
 * This macro provides examples for the training and testing of the               *
 * TMVA classifiers.                                                              *
 *                                                                                *
 * As input data is used a toy-MC sample consisting of four Gaussian-distributed  *
 * and linearly correlated input variables.                                       *
 *                                                                                *
 * The methods to be used can be switched on and off by means of booleans, or     *
 * via the prompt command, for example:                                           *
 *                                                                                *
 *    root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\)                     *
 *                                                                                *
 * (note that the backslashes are mandatory)                                      *
 * If no method given, a default set of classifiers is used.                      *
 *                                                                                *
 * The output file "TMVA.root" can be analysed with the use of dedicated          *
 * macros (simply say: root -l <macro.C>), which can be conveniently              *
 * invoked through a GUI that will appear at the end of the run of this macro.    *
 * Launch the GUI via the command:                                                *
 *                                                                                *
 *    root -l ./TMVAGui.C                                                         *
 *                                                                                *
 **********************************************************************************/
void TMVAClassification( TString myMethodList = "")
{
   TTree *signal     = (TTree *)gDirectory->Get("VertexG");

   if (! signal) { std::cout << "No signal TTree" << std::endl; return;}

   TTree *background = (TTree *)gDirectory->Get("VertexB");

   if (! background) { std::cout << "No background TTree" << std::endl; return;}

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string, int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 1;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   //
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 1;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 1;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 1; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine
   Use["SVM"]             = 1;
   //
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
   Use["myBDTD"]          = 1; // mine
   //
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );

      for (size_t i = 0; i < mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;

            for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";

            std::cout << std::endl;
            return;
         }

         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile *outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
         "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // load the signal and background event samples from ROOT trees

   std::cout << " starts ... " << std::endl;
   // global event weights per tree (see below for setting event-wise weights)
   //   Float_t w;
   double signalWeight     = 1.0;
   double backgroundWeight = 1.0;

   std::cout << " signalWeight = " << signalWeight << " backWeight = " << backgroundWeight << std::endl;
   factory->AddSignalTree( signal,    signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );

   TString separator(":");
   TString Vnames(vnames);
   TObjArray *array = Vnames.Tokenize(separator);

   std::vector<std::string> inputVars;
   TIter next(array);
   TObjString *objs;

   while ((objs = (TObjString *) next())) {
      //    std::cout << objs->GetString() << std::endl;
      TString name(objs->GetString());

      if (name == "BEMC") continue;

      if (name == "noBEMC") continue;

      factory->AddVariable(name, 'F');
   }

   // This would set individual event weights (the variables defined in the
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   // commented JB : 04/26 ??
   //factory->dSetBackgroundWeightExpression("weight");

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "";
   TCut mycutb = "";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //factory->PrepareTrainingAndTestTree( mycuts,mycutb,"NSigTrain=9000:NBkgTrain=50000:NSigTest=9000:NBkgTest=50000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4900:nTrain_Background=49000:nTest_Signal=4900:nTest_Background=49000:SplitMode=Random:!V"); // for KFVertex
   //   factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=20000:nTrain_Background=40000:nTest_Signal=20000:nTest_Background=40000:SplitMode=Random:!V"); // for PPV

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["myBDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTDTEST",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   //   TMVA::IMethod* category         = factory->BookMethod( TMVA::Types::kCategory,"Category","" );

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events
#if 0
   factory->OptimizeAllMethods("SigEffAt001", "Scan");
   factory->OptimizeAllMethods("ROCIntegral", "GA");
#endif
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;
}