コード例 #1
0
ファイル: spr_var_selection.C プロジェクト: piti118/SPR
int spr_var_selection()
{
  // load lib
  gSystem->Load("/raid1/narsky/SPRroot/lib/libSPR.so");
  
  // create main SPR object
  SprRootAdapter spr;
  
  // load training data
  spr.loadDataFromAscii(1,"cleveland.data","train");
  
  // split data into train/test
  spr.split(0.7,true,2627277);
  
  // choose classes
  spr.chooseClasses("0:.");
  
  // show how much data we have
  const int nClasses = spr.nClasses();
  char classes[nClasses][200];
  int events [nClasses];
  double weights [nClasses];
  spr.showDataInClasses(classes,events,weights,"train");
  plotClasses("SPR_Class_1","train",nClasses,classes,events,weights);
  spr.showDataInClasses(classes,events,weights,"test");
  plotClasses("SPR_Class_2","test",nClasses,classes,events,weights);

  // compute correlations between variables
  const unsigned dim = spr.dim();
  double corr [dim*dim];
  char vars[dim][200];
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_1B","background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_1S","signal",dim,vars,corr);

  // compute correlation with the class label
  double corr [dim];
  spr.correlationClassLabel("normal",vars,corr,"test");
  plotImportance("SPR_0","Correlation with class label",
		 dim,vars,corr,0,true);

  // select classifiers
  spr.addRandomForest("rf",2,200,0,10);

  // train
  int verbose = 0;// use >0 to increase verbosity level
  spr.train(verbose);
  
  // save classifier
  spr.saveClassifier("rf","rf.spr");

  // test
  spr.test();
  
  // estimate importance of all variables
  const unsigned nVars = spr.nClassifierVars("rf");
  double importance [nVars];
  double impError [nVars];
  spr.variableImportance("rf",10,vars,importance,impError);
  plotImportance("SPR_1","Variable Importance for RF from permutations",
		 nVars,vars,importance,impError,true);

  // estimate interactions
  // Note: the errors are zero because all points are used for
  // integration!
  const char* subset = "";
  unsigned nPoints = 0;// use all points
  double interaction [nVars];
  double intError [nVars];
  spr.variableInteraction("rf",subset,nPoints,
			  vars,interaction,intError); 
  plotImportance("SPR_3","Variable Interaction for RF",
		 nVars,vars,interaction,intError,true);

  // choose a subset of variables
  char useVars[5][200];
  strcpy(useVars[0],"sex");
  strcpy(useVars[1],"cp");
  strcpy(useVars[2],"oldpeak");
  strcpy(useVars[3],"ca");
  strcpy(useVars[4],"thal");
  spr.chooseVars(5,useVars);
  spr.loadDataFromAscii(1,"cleveland.data","train");

  // use identical splitting
  spr.split(0.7,true,2627277);
  spr.chooseClasses("0:.");

  // train RF on the reduced subset
  spr.addRandomForest("rf_5vars",2,200,0,10);
  spr.train(verbose);

  // save
  spr.saveClassifier("rf_5vars","rf_5vars.spr");

  // compute classifier responses
  spr.test();
  
  // recompute variable importance
  const unsigned nVarsReduced = spr.nClassifierVars("rf_5vars");
  char varsReduced [nVarsReduced][200];
  double importanceReduced [nVarsReduced];
  double impErrorReduced [nVarsReduced];
  spr.variableImportance("rf_5vars",10,varsReduced,
			 importanceReduced,impErrorReduced);
  plotImportance("SPR_2","Variable Importance for RF Reduced",
		 nVarsReduced,varsReduced,
		 importanceReduced,impErrorReduced,true);

  // choose a subset of variables
  char useVars2[6][200];
  strcpy(useVars2[0],"age");
  strcpy(useVars2[1],"chol");
  strcpy(useVars2[2],"trestbps");
  strcpy(useVars2[3],"thalach");
  strcpy(useVars2[4],"cp");
  strcpy(useVars2[5],"oldpeak");
  spr.chooseVars(6,useVars2);
  spr.loadDataFromAscii(1,"cleveland.data","train");

  // use identical splitting
  spr.split(0.7,true,2627277);
  spr.chooseClasses("0:.");

  // train RF on the reduced subset
  spr.addRandomForest("rf_6vars",2,200,0,10);
  spr.train(verbose);

  // save
  spr.saveClassifier("rf_6vars","rf_6vars.spr");

  // choose a subset of variables
  char useVars3[6][200];
  strcpy(useVars3[0],"ca");
  strcpy(useVars3[1],"exang");
  strcpy(useVars3[2],"fbs");
  strcpy(useVars3[3],"sex");
  strcpy(useVars3[4],"slope");
  strcpy(useVars3[5],"thal");
  spr.chooseVars(6,useVars3);
  spr.loadDataFromAscii(1,"cleveland.data","train");

  // use identical splitting
  spr.split(0.7,true,2627277);
  spr.chooseClasses("0:.");

  // train RF on the reduced subset
  spr.addRandomForest("rf_add2rem1",2,200,0,10);
  spr.train(verbose);

  // save
  spr.saveClassifier("rf_add2rem1","rf_add2rem1.spr");

  // reload data
  spr.chooseAllVars();
  spr.loadDataFromAscii(1,"cleveland.data","train");
  spr.split(0.7,true,2627277);
  spr.chooseClasses("0:.");

  // reload save classifiers
  spr.loadClassifier("rf","rf.spr");
  spr.loadClassifier("rf_5vars","rf_5vars.spr");
  spr.loadClassifier("rf_6vars","rf_6vars.spr");
  spr.loadClassifier("rf_add2rem1","rf_add2rem1.spr");

  // rerun test
  spr.test();

  // get the signal-vs-bgrnd curve
  const int ntrained = spr.nTrained();
  const int npts = 9;
  double signalEff [npts] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };
  double bgrndEff [ntrained*npts];
  double bgrndErr [ntrained*npts];
  double fom [ntrained*npts];
  char classifiers[ntrained][200];
  spr.allEffCurves(npts,signalEff,classifiers,bgrndEff,bgrndErr,fom);
  plotEffCurveMulti("SPR_RF",ntrained,npts,signalEff,
		    classifiers,bgrndEff,bgrndErr,0);

  // exit
  return 0;
}
コード例 #2
0
ファイル: spr_transform.C プロジェクト: aashaqshah/cmssw-1
/*
  The following shows how to apply a PCA transformation to a subset 
  of input variables.

  You need to prepare tmva_subset_root.pat. Copy tmva_root.pat and
  remove var2 on the Leaves: line. This will force the reader to read
  in only 3 variables: var1, var3, and var4.

  After you plot the results, you can see that only variables 
  var1, var3 and var4 have been decorrelated. Variable var2 shows
  substantial correlation with others.
*/
int spr_transform_2()
{
  // load lib
  gSystem->Load("/afs/cern.ch/user/n/narsky/w0/CMSSW_1_8_X_2007-11-09-0200/lib/slc4_ia32_gcc345/libPhysicsToolsStatPatternRecognition.so");
  
  // create main SPR object
  SprRootAdapter spr;

  // load training data
  spr.loadDataFromRoot("tmva_subset_root.pat","train");

  // split data into train/test
  spr.split(0.5,true);
  
  // choose classes
  spr.chooseClasses("0:1");
  
  // show how much data we have
  const int nClasses = spr.nClasses();
  char classes[nClasses][200];
  int events [nClasses];
  double weights [nClasses];
  spr.showDataInClasses(classes,events,weights,"train");
  plotClasses("SPR_Class_1","train",nClasses,classes,events,weights);
  spr.showDataInClasses(classes,events,weights,"test");
  plotClasses("SPR_Class_2","test",nClasses,classes,events,weights);

  // perform PCA
  int verbose = 0;
  spr.trainVarTransformer("PCA",verbose);
  spr.saveVarTransformer("pca.spr");

  // reload training and test data, now with all variables included
  spr.loadDataFromRoot("tmva_root.pat","train");
  spr.split(0.5,true);
  spr.chooseClasses("0:1");

  // compute correlations between variables before PCA transform
  const unsigned dim = spr.dim();
  double corr [dim*dim];
  char vars[dim][200];
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_1B","background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_1S","signal",dim,vars,corr);

  // apply PCA transform
  spr.transform();

  // Plot correlations after PCA transform.
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_2B","decorrelated background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_2S","decorrelated signal",dim,vars,corr);

  // exit
  return 0;
}
コード例 #3
0
ファイル: spr_transform.C プロジェクト: aashaqshah/cmssw-1
int spr_transform_1()
{
  // load lib
  gSystem->Load("/afs/cern.ch/user/n/narsky/w0/CMSSW_1_8_X_2007-11-29-1600/lib/slc4_ia32_gcc345/libPhysicsToolsStatPatternRecognition.so");
  
  // create main SPR object
  SprRootAdapter spr;

  // load training data
  spr.loadDataFromRoot("tmva_root.pat","train");

  // split data into train/test
  spr.split(0.5,true);
  
  // choose classes
  spr.chooseClasses("0:1");
  
  // show how much data we have
  const int nClasses = spr.nClasses();
  char classes[nClasses][200];
  int events [nClasses];
  double weights [nClasses];
  spr.showDataInClasses(classes,events,weights,"train");
  plotClasses("SPR_Class_1","train",nClasses,classes,events,weights);
  spr.showDataInClasses(classes,events,weights,"test");
  plotClasses("SPR_Class_2","test",nClasses,classes,events,weights);

  // compute correlations between variables
  const unsigned dim = spr.dim();
  double corr [dim*dim];
  char vars[dim][200];
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_1B","background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_1S","signal",dim,vars,corr);

  // save original test data
  spr.saveTestData("test_orig.root");

  // perform PCA
  int verbose = 0;
  spr.trainVarTransformer("PCA",verbose);
  spr.transform();

  // Plot correlations after PCA transform.
  // Note that PCA transform does not change dimensionality.
  // But it changes variable names!
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_2B","decorrelated background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_2S","decorrelated signal",dim,vars,corr);

  // save transformer to a file for future reference
  spr.saveVarTransformer("pca.spr");

  // select classifiers
  spr.addBoostedBinarySplits("splits",100,20);

  // train
  spr.train(verbose);
  
  // test
  spr.test();
  
  // get signal-vs-background curve for classifiers
  const int npts = 9;
  double signalEff [npts] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };
  const int ntrained = spr.nTrained();
  double bgrndEff [npts*ntrained];
  double bgrndErr [npts*ntrained];
  double fom [npts*ntrained];
  char classifiers[ntrained][200];
  spr.allEffCurves(npts,signalEff,classifiers,bgrndEff,bgrndErr,fom);
  plotEffCurveMulti("SPR_3",ntrained,npts,signalEff,
		    classifiers,bgrndEff,bgrndErr,0);
  
  // save transformed test data with classifier responses
  spr.saveTestData("test_transformed.root");

  // exit
  return 0;
}
コード例 #4
0
ファイル: spr_mlp.C プロジェクト: aashaqshah/cmssw-1
int spr_mlp()
{
  // load lib
  gSystem->Load("/afs/cern.ch/user/n/narsky/w0/CMSSW_1_8_X_2007-11-29-1600/lib/slc4_ia32_gcc345/libPhysicsToolsStatPatternRecognition.so");
  
  // create main SPR object
  SprRootAdapter spr;
  
  // load training data
  spr.loadDataFromRoot("mlp_root.pat","train");
  
  // split data into train/test
  spr.split(0.5,true);
  
  // choose classes
  spr.chooseClasses("0:1");
  
  // show how much data we have
  const int nClasses = spr.nClasses();
  char classes[nClasses][200];
  int events [nClasses];
  double weights [nClasses];
  spr.showDataInClasses(classes,events,weights,"train");
  plotClasses("SPR_Class_1","train",nClasses,classes,events,weights);
  spr.showDataInClasses(classes,events,weights,"test");
  plotClasses("SPR_Class_2","test",nClasses,classes,events,weights);

  // compute correlations between variables
  const unsigned dim = spr.dim();
  double corr [dim*dim];
  char vars[dim][200];
  spr.vars(vars);
  spr.correlation(0,corr,"train");// background
  plotCorrelation("SPR_1B","background",dim,vars,corr);
  spr.correlation(1,corr,"train");// signal
  plotCorrelation("SPR_1S","signal",dim,vars,corr);

  // select classifiers
  spr.addFisher("fisher",1);
  spr.addStdBackprop("mlp","2:5:3:1",100,0.1,0.5,100,5);
  spr.addRandomForest("rf",100,400,1,20);
  spr.addRandomForest("arcx4",100,400,1,20,true);

  // train
  int verbose = 0;// use >0 to increase verbosity level
  spr.train(verbose);
  
  // test
  spr.test();
  
  // get the signal-vs-bgrnd curve
  const int npts = 9;
  double signalEff [npts] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };
  const int ntrained = spr.nTrained();
  double bgrndEff [npts*ntrained];
  double bgrndErr [npts*ntrained];
  double fom [npts*ntrained];
  char classifiers[ntrained][200];
  spr.allEffCurves(npts,signalEff,classifiers,bgrndEff,bgrndErr,fom);
  plotEffCurveMulti("SPR_3",ntrained,npts,signalEff,
		    classifiers,bgrndEff,bgrndErr,0);

  // histogram output of ArcX4 for signal and background
  const int nbin = 40;
  double xlo = 0.;
  double xhi = 0.7;
  double sig[nbin], sigerr[nbin], bgr[nbin], bgrerr[nbin];
  spr.histogram("arcx4",xlo,xhi,nbin,sig,sigerr,bgr,bgrerr);
  plotHistogram("SPR_6","log","ArcX4 output",
    		xlo,xhi,nbin,sig,sigerr,bgr,bgrerr);

  // save NN into a file
  spr.saveClassifier("mlp","mlp.spr");

  // save test data with classifier response into a root file
  spr.saveTestData("mytest.root");

  // estimate importance of all variables
  // use 3 permutations per variable
  const unsigned nVars = spr.nClassifierVars("arcx4");
  char vars[nVars][200];
  double importance [nVars];
  double impError [nVars];
  spr.variableImportance("arcx4",3,vars,importance,impError);
  plotImportance("SPR_7","Variable Importance for ArcX4",
		 nVars,vars,importance,impError,true);

  // exit
  return 0;
}