void test_dae_mydataset()
{
	TMVA::Tools::Instance();
	TFile *inputFile = TFile::Open( "../datasets/mydataset.root"); 
	TFile* outputFile = TFile::Open( "mydataset_output.root", "RECREATE" );
	TMVA::Factory *factory = new TMVA::Factory("TMVARegression", outputFile, 
	                                           "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" );
	TMVA::DataLoader *loader=new TMVA::DataLoader("mydataset");
	loader->AddVariable("var0", 'F');
	loader->AddVariable("var1", 'F');
	loader->AddVariable("var2", 'F');
	loader->AddVariable("var3 := var0-var1", 'F');
	loader->AddVariable("var4 := var0*var2", 'F');
	loader->AddVariable("var5 := var1+var2", 'F');
	TTree *tsignal = (TTree*)inputFile->Get("MyMCSig");
	TTree *tbackground = (TTree*)inputFile->Get("MyMCBkg");
	TCut mycuts = "";
	TCut mycutb = "";
	loader->AddSignalTree( tsignal,     1.0 );
	loader->AddBackgroundTree( tbackground, 1.0 );
	loader->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=1449:nTest_Background=1449:SplitMode=Random:NormMode=NumEvents:!V");

	TString layoutString ("Layout=TANH|3,LINEAR");
	TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1;
	TString nnOptions ("AE(indexLayer=1;pretraining=false;!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":");
	nnOptions.Append (layoutString);
	nnOptions.Append (":");
	nnOptions.Append (trainingStrategyString);
	nnOptions.Append (")");
	cout << nnOptions.Data() << endl;
	TMVA::DataLoader* newloader = loader->VarTransform(nnOptions);
}
Esempio n. 2
0
std::pair<TString,TString> TMVAClassification (
    TString infilename,
    AnalysisType analysisType = AnalysisType::DIRECT,
    TString additionalRootFileName = "")
{
    TMVA::Tools::Instance();

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start TMVAClassification" << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = infilename; //pathToData + infilename + TString (".root");
    if (analysisType != AnalysisType::TRANSFORMED)
        fname = pathToData + infilename + TString (".root");
    std::cout << "open file " << std::endl << fname.Data () << std::endl;


    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TString treeName = "data";
    if (analysisType == AnalysisType::TRANSFORMED)
        treeName = "transformed";

    std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl;
    std::cout << treeName << std::endl;
    TChain* tree = new TChain (treeName);
    std::cout << "add file" << std::endl;
    std::cout << fname << std::endl;
    tree->Add (fname);
    TChain* treeFriend (NULL);
    if (additionalRootFileName.Length () > 0)
    {
        std::cout << "-------------------- add additional input file ---------------- " << std::endl;
        std::cout << additionalRootFileName << std::endl;
        treeFriend = new TChain (treeName);
        treeFriend->Add (additionalRootFileName);
        tree->AddFriend (treeFriend,"p");
    }
//    tree->Draw ("mass:prediction");
//    return std::make_pair(TString("hallo"),TString ("nix"));
    TString outfileName;
    if (analysisType == AnalysisType::BACKGROUND)
    {
        outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root");
    }
    else
        outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
						"AnalysisType=Classification:Transformations=I:!V" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
    }

    for (auto varname : spectatorNames)
    {
	factory->AddSpectator (varname.c_str (), 'F');
    }
    
   
    std::cout << "-------------------- add trees ---------------- " << std::endl;
    TCut signalCut ("signal==1");
    TCut backgroundCut ("signal==0");
    if (analysisType == AnalysisType::TRANSFORMED)
    {
        signalCut = "(signal_original==1 && signal_in==0)";
        backgroundCut = "(signal_original==0 && signal_in==0)";
    }
    if (analysisType == AnalysisType::BACKGROUND)
    {
        signalCut     = TString("(signal==0) * (prediction > 0.7)");
        backgroundCut = TString("(signal==0) * (prediction < 0.4)");
    }
    //tree->Draw ("prediction",signalCut);
    //return std::make_pair(TString("hallo"),TString ("nix"));
    factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting");
    factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting");


    
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    /* // Set individual event weights (the variables must exist in the original TTree) */
    if (analysisType == AnalysisType::BACKGROUND)
    {
        factory->SetSignalWeightExpression ("prediction");
        factory->SetBackgroundWeightExpression ("1");
    }

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					 "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );


    TString methodName ("");
    if (analysisType == AnalysisType::BACKGROUND)
        methodName = TString ("TONBKG_") + tmstmp;

    if (false)
    {
	// gradient boosting training
        methodName += TString("GBDT");
	factory->BookMethod(TMVA::Types::kBDT, methodName,
			    "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10");
    }
    if (false)
    {
        methodName += TString("Likelihood");
	factory->BookMethod( TMVA::Types::kLikelihood, methodName,
			     "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    }
    

    
    if (false)
    {
	TString layoutString ("Layout=TANH|100,LINEAR");

	TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
      
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNgauss");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }

    if (false)
    {
	TString layoutString ("Layout=TANH|200,TANH|70,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2;

      
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName = TString("NNnormalized");
        factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|50,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;

      
	TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNPG");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }
   
   
   
    factory->TrainAllMethods();
//    return std::make_pair(TString("hallo"),TString ("nix"));
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    //input->Close();
    outputFile->Close();

//    TMVA::TMVAGui (outfileName);
   
    delete factory;
    delete tree;
    switch (analysisType)
    {
    case AnalysisType::BACKGROUND:
        std::cout << "DONE BACKGROUND" << std::endl;
        break;
    case AnalysisType::DIRECT:
        std::cout << "DONE DIRECT" << std::endl;
        break;
    case AnalysisType::TRANSFORMED:
        std::cout << "DONE TRANSFORMED" << std::endl;
        break;
        
    };
    std::cout << "classification, return : " << outfileName << "  ,  " << methodName << std::endl;
    return std::make_pair (outfileName, methodName);
}
Esempio n. 3
0
TString autoencoder (std::string inputFileName) 
{

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start Autoencoder " << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root");
    TFile *input = TFile::Open( fname );

    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TTree *tree     = (TTree*)input->Get("data");
   
    TString outfileName( "TMVAAutoEnc__" );
    outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile,
						"AnalysisType=Regression:Color:DrawProgressBar" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames+additionalVariableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
	factory->AddTarget   (varname.c_str (), 'F');
    }



    std::cout << "-------------------- add tree ---------------- " << std::endl;
    // global event weights per tree (see below for setting event-wise weights)
    Double_t regWeight  = 1.0;   
    factory->AddRegressionTree (tree, regWeight);

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
    factory->PrepareTrainingAndTestTree( mycut, 
					 "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );


    /* // This would set individual event weights (the variables defined in the  */
    /* // expression need to exist in the original TTree) */
    /* factory->SetWeightExpression( "var1", "Regression" ); */


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR");

	TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2");
	TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3;

       
	//       TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5");

	TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

	factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN
    }


   
    // --------------------------------------------------------------------------------------------------
    factory->TrainAllMethods();
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    outputFile->Close();

//    TMVA::TMVARegGui (outfileName);
   
    delete factory;
    return TString("NN_")+tmstmp;
}