void test_dae_mydataset() { TMVA::Tools::Instance(); TFile *inputFile = TFile::Open( "../datasets/mydataset.root"); TFile* outputFile = TFile::Open( "mydataset_output.root", "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" ); TMVA::DataLoader *loader=new TMVA::DataLoader("mydataset"); loader->AddVariable("var0", 'F'); loader->AddVariable("var1", 'F'); loader->AddVariable("var2", 'F'); loader->AddVariable("var3 := var0-var1", 'F'); loader->AddVariable("var4 := var0*var2", 'F'); loader->AddVariable("var5 := var1+var2", 'F'); TTree *tsignal = (TTree*)inputFile->Get("MyMCSig"); TTree *tbackground = (TTree*)inputFile->Get("MyMCBkg"); TCut mycuts = ""; TCut mycutb = ""; loader->AddSignalTree( tsignal, 1.0 ); loader->AddBackgroundTree( tbackground, 1.0 ); loader->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=1449:nTest_Background=1449:SplitMode=Random:NormMode=NumEvents:!V"); TString layoutString ("Layout=TANH|3,LINEAR"); TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1; TString nnOptions ("AE(indexLayer=1;pretraining=false;!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); nnOptions.Append (")"); cout << nnOptions.Data() << endl; TMVA::DataLoader* newloader = loader->VarTransform(nnOptions); }
std::pair<TString,TString> TMVAClassification ( TString infilename, AnalysisType analysisType = AnalysisType::DIRECT, TString additionalRootFileName = "") { TMVA::Tools::Instance(); std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start TMVAClassification" << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = infilename; //pathToData + infilename + TString (".root"); if (analysisType != AnalysisType::TRANSFORMED) fname = pathToData + infilename + TString (".root"); std::cout << "open file " << std::endl << fname.Data () << std::endl; std::cout << "-------------------- get tree ---------------- " << std::endl; TString treeName = "data"; if (analysisType == AnalysisType::TRANSFORMED) treeName = "transformed"; std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl; std::cout << treeName << std::endl; TChain* tree = new TChain (treeName); std::cout << "add file" << std::endl; std::cout << fname << std::endl; tree->Add (fname); TChain* treeFriend (NULL); if (additionalRootFileName.Length () > 0) { std::cout << "-------------------- add additional input file ---------------- " << std::endl; std::cout << additionalRootFileName << std::endl; treeFriend = new TChain (treeName); treeFriend->Add (additionalRootFileName); tree->AddFriend (treeFriend,"p"); } // tree->Draw ("mass:prediction"); // return std::make_pair(TString("hallo"),TString ("nix")); TString outfileName; if (analysisType == AnalysisType::BACKGROUND) { outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root"); } else outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "AnalysisType=Classification:Transformations=I:!V" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames) { factory->AddVariable (varname.c_str (), 'F'); } for (auto varname : spectatorNames) { factory->AddSpectator (varname.c_str (), 'F'); } std::cout << "-------------------- add trees ---------------- " << std::endl; TCut signalCut ("signal==1"); TCut backgroundCut ("signal==0"); if (analysisType == AnalysisType::TRANSFORMED) { signalCut = "(signal_original==1 && signal_in==0)"; backgroundCut = "(signal_original==0 && signal_in==0)"; } if (analysisType == AnalysisType::BACKGROUND) { signalCut = TString("(signal==0) * (prediction > 0.7)"); backgroundCut = TString("(signal==0) * (prediction < 0.4)"); } //tree->Draw ("prediction",signalCut); //return std::make_pair(TString("hallo"),TString ("nix")); factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting"); factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting"); TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; /* // Set individual event weights (the variables must exist in the original TTree) */ if (analysisType == AnalysisType::BACKGROUND) { factory->SetSignalWeightExpression ("prediction"); factory->SetBackgroundWeightExpression ("1"); } std::cout << "-------------------- prepare ---------------- " << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); TString methodName (""); if (analysisType == AnalysisType::BACKGROUND) methodName = TString ("TONBKG_") + tmstmp; if (false) { // gradient boosting training methodName += TString("GBDT"); factory->BookMethod(TMVA::Types::kBDT, methodName, "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10"); } if (false) { methodName += TString("Likelihood"); factory->BookMethod( TMVA::Types::kLikelihood, methodName, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); } if (false) { TString layoutString ("Layout=TANH|100,LINEAR"); TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNgauss"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (false) { TString layoutString ("Layout=TANH|200,TANH|70,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2; // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY"); TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName = TString("NNnormalized"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (true) { TString layoutString ("Layout=TANH|100,TANH|50,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNPG"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } factory->TrainAllMethods(); // return std::make_pair(TString("hallo"),TString ("nix")); factory->TestAllMethods(); factory->EvaluateAllMethods(); //input->Close(); outputFile->Close(); // TMVA::TMVAGui (outfileName); delete factory; delete tree; switch (analysisType) { case AnalysisType::BACKGROUND: std::cout << "DONE BACKGROUND" << std::endl; break; case AnalysisType::DIRECT: std::cout << "DONE DIRECT" << std::endl; break; case AnalysisType::TRANSFORMED: std::cout << "DONE TRANSFORMED" << std::endl; break; }; std::cout << "classification, return : " << outfileName << " , " << methodName << std::endl; return std::make_pair (outfileName, methodName); }
TString autoencoder (std::string inputFileName) { std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start Autoencoder " << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root"); TFile *input = TFile::Open( fname ); std::cout << "-------------------- get tree ---------------- " << std::endl; TTree *tree = (TTree*)input->Get("data"); TString outfileName( "TMVAAutoEnc__" ); outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile, "AnalysisType=Regression:Color:DrawProgressBar" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames+additionalVariableNames) { factory->AddVariable (varname.c_str (), 'F'); factory->AddTarget (varname.c_str (), 'F'); } std::cout << "-------------------- add tree ---------------- " << std::endl; // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; factory->AddRegressionTree (tree, regWeight); std::cout << "-------------------- prepare ---------------- " << std::endl; TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); /* // This would set individual event weights (the variables defined in the */ /* // expression need to exist in the original TTree) */ /* factory->SetWeightExpression( "var1", "Regression" ); */ if (true) { TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR"); TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2"); TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3; // TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5"); TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN } // -------------------------------------------------------------------------------------------------- factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); // TMVA::TMVARegGui (outfileName); delete factory; return TString("NN_")+tmstmp; }