예제 #1
0
파일: main.cpp 프로젝트: litaoshao/waffles
void AddIndexAttribute(GArgReader& args)
{
	// Parse args
	const char* filename = args.pop_string();
	double nStartValue = 0.0;
	double nIncrement = 1.0;
	while(args.size() > 0)
	{
		if(args.if_pop("-start"))
			nStartValue = args.pop_double();
		else if(args.if_pop("-increment"))
			nIncrement = args.pop_double();
		else
			ThrowError("Invalid option: ", args.peek());
	}

	GMatrix* pData = loadData(filename);
	Holder<GMatrix> hData(pData);
	GArffRelation* pIndexRelation = new GArffRelation();
	pIndexRelation->addAttribute("index", 0, NULL);
	sp_relation pIndexRel = pIndexRelation;
	GMatrix indexes(pIndexRel);
	indexes.newRows(pData->rows());
	for(size_t i = 0; i < pData->rows(); i++)
		indexes.row(i)[0] = nStartValue + i * nIncrement;
	GMatrix* pUnified = GMatrix::mergeHoriz(&indexes, pData);
	Holder<GMatrix> hUnified(pUnified);
	pUnified->print(cout);
}
예제 #2
0
파일: main.cpp 프로젝트: litaoshao/waffles
void threshold(GArgReader& args){
  GMatrix* pData = loadData(args.pop_string());
  Holder<GMatrix> hData(pData);
  unsigned column=args.pop_uint();
  if(column >= hData->cols()){
    std::stringstream msg;
    if(hData->cols() >= 1){
      msg << "The column to threshold is too large.   It should be in "
	  << "the range [0.." << (hData->cols()-1) << "].";
    }else{
      msg << "This data has no columns to threshold.";
    }
    ThrowError(msg.str());
  }
  if(hData->relation()->valueCount(column) != 0){
    ThrowError("Can only use threshold on continuous attributes.");
  }
  double value = args.pop_double();

  //Do the actual thresholding
  for(size_t i = 0; i < hData->rows(); ++i){
    double& v = hData->row(i)[column];
    if(v <= value){ v = 0;
    }else { v = 1; }
  }

  //Print the data
  hData->print(cout);
}
예제 #3
0
void fuzzykmeans(GArgReader& args)
{
	// Load the file and params
	GMatrix data;
	loadData(data, args.pop_string());
	int clusters = args.pop_uint();

	// Parse Options
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	double fuzzifier = 1.3;
	size_t reps = 1;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			nSeed = args.pop_uint();
		else if(args.if_pop("-fuzzifier"))
			fuzzifier = args.pop_double();
		else if(args.if_pop("-reps"))
			reps = args.pop_uint();
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Do the clustering
	GRand prng(nSeed);
	GFuzzyKMeans clusterer(clusters, &prng);
	clusterer.setFuzzifier(fuzzifier);
	clusterer.setReps(reps);
	GMatrix* pOut = clusterer.reduce(data);
	std::unique_ptr<GMatrix> hOut(pOut);
	pOut->print(cout);
}
예제 #4
0
파일: main.cpp 프로젝트: litaoshao/waffles
void wilcoxon(GArgReader& args)
{
	size_t n = args.pop_uint();
	double w = args.pop_double();
	double p = GMath::wilcoxonPValue(n, w);
	cout << p << "\n";
}
예제 #5
0
파일: main.cpp 프로젝트: litaoshao/waffles
void addNoise(GArgReader& args)
{
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	double dev = args.pop_double();

	// Parse the options
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	int excludeLast = 0;
	while(args.next_is_flag())
	{
		if(args.if_pop("-seed"))
			seed = args.pop_uint();
		else if(args.if_pop("-excludelast"))
			excludeLast = args.pop_uint();
		else
			ThrowError("Invalid neighbor finder option: ", args.peek());
	}

	GRand prng(seed);
	size_t cols = pData->cols() - excludeLast;
	for(size_t r = 0; r < pData->rows(); r++)
	{
		double* pRow = pData->row(r);
		for(size_t c = 0; c < cols; c++)
			*(pRow++) += dev * prng.normal();
	}
	pData->print(cout);
}
예제 #6
0
파일: main.cpp 프로젝트: litaoshao/waffles
void multiplyScalar(GArgReader& args)
{
	GMatrix* pA = loadData(args.pop_string());
	Holder<GMatrix> hA(pA);
	double scale = args.pop_double();
	if(args.size() > 0)
		ThrowError("Superfluous arg: ", args.pop_string());
	pA->multiply(scale);
	pA->print(cout);
}
예제 #7
0
파일: main.cpp 프로젝트: kslazarev/waffles
void ManifoldSculpting(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	size_t targetDims = args.pop_uint();

	// Parse Options
	const char* szPreprocessedData = NULL;
	double scaleRate = 0.999;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else if(args.if_pop("-continue"))
			szPreprocessedData = args.pop_string();
		else if(args.if_pop("-scalerate"))
			scaleRate = args.pop_double();
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Load the hint data
	GMatrix* pDataHint = NULL;
	Holder<GMatrix> hDataHint(NULL);
	if(szPreprocessedData)
	{
		pDataHint = loadData(szPreprocessedData);
		hDataHint.reset(pDataHint);
		if(pDataHint->relation()->size() != targetDims)
			throw Ex("Wrong number of dims in the hint data");
		if(pDataHint->rows() != pData->rows())
			throw Ex("Wrong number of patterns in the hint data");
	}

	// Transform the data
	GManifoldSculpting transform(pNF->neighborCount(), targetDims, &prng);
	transform.setSquishingRate(scaleRate);
	if(pDataHint)
		transform.setPreprocessedData(hDataHint.release());
	transform.setNeighborFinder(pNF);
	GMatrix* pDataAfter = transform.doit(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);
}
예제 #8
0
파일: main.cpp 프로젝트: litaoshao/waffles
void shiftColumns(GArgReader& args)
{
	GMatrix* pA = loadData(args.pop_string());
	Holder<GMatrix> hA(pA);
	vector<size_t> cols;
	parseAttributeList(cols, args, pA->cols());
	double offset = args.pop_double();
	for(size_t i = 0; i < pA->rows(); i++)
	{
		double* pRow = pA->row(i);
		for(vector<size_t>::iterator it = cols.begin(); it != cols.end(); it++)
			pRow[*it] += offset;
	}
	pA->print(cout);
}
예제 #9
0
파일: main.cpp 프로젝트: litaoshao/waffles
void dropRandomValues(GArgReader& args)
{
	GMatrix* pData = loadData(args.pop_string());
	double portion = args.pop_double();

	// Parse the options
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	while(args.next_is_flag())
	{
		if(args.if_pop("-seed"))
			seed = args.pop_uint();
		else
			ThrowError("Invalid option: ", args.peek());
	}

	GRand rand(seed);
	size_t n = pData->rows() * pData->cols();
	size_t k = size_t(portion * n);
	for(size_t i = 0; i < pData->cols(); i++)
	{
		size_t vals = pData->relation()->valueCount(i);
		if(vals == 0)
		{
			for(size_t j = 0; j < pData->rows(); j++)
			{
				if(rand.next(n) < k)
				{
					pData->row(j)[i] = UNKNOWN_REAL_VALUE;
					k--;
				}
				n--;
			}
		}
		else
		{
			for(size_t j = 0; j < pData->rows(); j++)
			{
				if(rand.next(n) < k)
				{
					pData->row(j)[i] = UNKNOWN_DISCRETE_VALUE;
					k--;
				}
				n--;
			}
		}
	}
	pData->print(cout);
}
예제 #10
0
파일: main.cpp 프로젝트: litaoshao/waffles
void rotate(GArgReader& args)
{
	GMatrix* pA = loadData(args.pop_string());
	Holder<GMatrix> hA(pA);
	sp_relation relation = pA->relation();
	unsigned colx = args.pop_uint();
	if(colx >= pA->cols()){
	  ThrowError("Rotation first column index (",to_str(colx),") "
		     "should not be greater "
		     "than the largest index, which is ", to_str(pA->cols()-1),
		     ".");
	}
	if(!relation->areContinuous(colx,1)){
	  ThrowError("Rotation first column index (",to_str(colx),") "
		     "should be continuous and it is not.");
		     
	}
	unsigned coly = args.pop_uint();
	if(coly >= pA->cols()){
	  ThrowError("Rotation second column index (",to_str(coly),") "
		     "should not be greater "
		     "than the largest index, which is ", to_str(pA->cols()-1),
		     ".");
	}
	if(!relation->areContinuous(coly,1)){
	  ThrowError("Rotation second column index (",to_str(coly),") "
		     "should be continuous and it is not.");
	}
	
	double angle = args.pop_double();

	angle = angle * M_PI / 180; //Convert from degrees to radians
	double cosAngle = std::cos(angle);
	double sinAngle = std::sin(angle);
	for(std::size_t rowIdx = 0; rowIdx < pA->rows(); ++rowIdx){
	  double* row = (*pA)[rowIdx];
	  double x = row[colx];
	  double y = row[coly];
	  row[colx]=x*cosAngle-y*sinAngle;
	  row[coly]=x*sinAngle+y*cosAngle;
	}
	pA->print(cout);
}
예제 #11
0
파일: main.cpp 프로젝트: litaoshao/waffles
void normalize(GArgReader& args)
{
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);

	double min = 0.0;
	double max = 1.0;
	while(args.size() > 0)
	{
		if(args.if_pop("-range"))
		{
			min = args.pop_double();
			max = args.pop_double();
		}
		else
			ThrowError("Invalid option: ", args.peek());
	}

	GNormalize transform(min, max);
	transform.train(*pData);
	GMatrix* pOut = transform.transformBatch(*pData);
	Holder<GMatrix> hOut(pOut);
	pOut->print(cout);
}
예제 #12
0
파일: main.cpp 프로젝트: kslazarev/waffles
void unsupervisedBackProp(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int targetDims = args.pop_uint();

	// Parse Options
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GUnsupervisedBackProp* pUBP = new GUnsupervisedBackProp(targetDims, &prng);
	Holder<GUnsupervisedBackProp> hUBP(pUBP);
	vector<size_t> paramRanges;
	string sModelOut;
	string sProgress;
	bool inputBias = true;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else if(args.if_pop("-addlayer"))
			pUBP->neuralNet()->addLayer(args.pop_uint());
		else if(args.if_pop("-params"))
		{
			if(pUBP->jitterer())
				throw Ex("You can't change the params after you add an image jitterer");
			size_t paramDims = args.pop_uint();
			for(size_t i = 0; i < paramDims; i++)
				paramRanges.push_back(args.pop_uint());
		}
		else if(args.if_pop("-modelin"))
		{
			GDom doc;
			doc.loadJson(args.pop_string());
			GLearnerLoader ll(prng);
			pUBP = new GUnsupervisedBackProp(doc.root(), ll);
			hUBP.reset(pUBP);
		}
		else if(args.if_pop("-modelout"))
			sModelOut = args.pop_string();
		else if(args.if_pop("-intrinsicin"))
		{
			GMatrix* pInt = new GMatrix();
			pInt->loadArff(args.pop_string());
			pUBP->setIntrinsic(pInt);
		}
		else if(args.if_pop("-jitter"))
		{
			if(paramRanges.size() != 2)
				throw Ex("The params must be set to 2 before a tweaker is set");
			size_t channels = args.pop_uint();
			double rot = args.pop_double();
			double trans = args.pop_double();
			double zoom = args.pop_double();
			GImageJitterer* pJitterer = new GImageJitterer(paramRanges[0], paramRanges[1], channels, rot, trans, zoom);
			pUBP->setJitterer(pJitterer);
		}
		else if(args.if_pop("-noinputbias"))
			inputBias = false;
		else if(args.if_pop("-progress"))
		{
			sProgress = args.pop_string();
			pUBP->trackProgress();
		}
		else if(args.if_pop("-onepass"))
			pUBP->onePass();
		else
			throw Ex("Invalid option: ", args.peek());
	}
	pUBP->setParams(paramRanges);
	pUBP->setUseInputBias(inputBias);

	// Transform the data
	GMatrix* pDataAfter = pUBP->doit(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);

	// Save the model (if requested)
	if(sModelOut.length() > 0)
	{
		GDom doc;
		doc.setRoot(pUBP->serialize(&doc));
		doc.saveJson(sModelOut.c_str());
	}
	if(sProgress.length() > 0)
		pUBP->progress().saveArff(sProgress.c_str());
}
예제 #13
0
파일: main.cpp 프로젝트: BaskWind/waffles
void Extrapolate(GArgReader &args)
{
	// Load the model
	if(args.size() < 1)
	{
		throw Ex("Model not specified.");
	}
	GDom doc;
	doc.loadJson(args.pop_string());
	GLearnerLoader ll(true);
	GSupervisedLearner *pLearner = ll.loadLearner(doc.root());
	std::unique_ptr<GSupervisedLearner> hLearner(pLearner);
	
	// Parse options
	
	double start = 1.0;
	double length = 1.0;
	double step = 0.0002;
	bool useFeatures = false;
	bool outputFeatures = true;
	
	GNeuralDecomposition *nd = (GNeuralDecomposition *) pLearner;
	std::unique_ptr<GMatrix> hFeatures;
	
	while(args.next_is_flag())
	{
		if(args.if_pop("-start"))
		{
			start = args.pop_double();
		}
		else if(args.if_pop("-length"))
		{
			length = args.pop_double();
		}
		else if(args.if_pop("-step"))
		{
			step = args.pop_double();
		}
		else if(args.if_pop("-features"))
		{
			LoadData(args, hFeatures);
			useFeatures = true;
		}
		else if(args.if_pop("-outputFeatures"))
		{
			outputFeatures = true;
		}
		else
		{
			throw Ex("Invalid option: ", args.peek());
		}
	}
	
	// Extrapolate
	GMatrix *pOutput;
	if(useFeatures)
		pOutput = nd->extrapolate(*hFeatures.get());
	else
		pOutput = nd->extrapolate(start, length, step, outputFeatures);
	std::unique_ptr<GMatrix> hOutput(pOutput);
	
	// Output predictions
	pOutput->print(cout);
}
예제 #14
0
파일: main.cpp 프로젝트: BaskWind/waffles
void Train(GArgReader &args)
{
	// Load series from file
	std::unique_ptr<GMatrix> hSeries, hFeatures;
	LoadData(args, hSeries);
	GMatrix *pSeries = hSeries.get();
	
	// Split features/labels
	if(pSeries->cols() == 2)
	{
		GMatrix *pFeatures = pSeries->cloneSub(0, 0, pSeries->rows(), 1);
		GMatrix *pLabels = pSeries->cloneSub(0, 1, pSeries->rows(), 1);
		hFeatures.reset(pFeatures);
		hSeries.reset(pLabels);
		pSeries = pLabels;
	}
	else if(pSeries->cols() > 2)
	{
		throw Ex("Too many columns!");
	}
	
	// Parse options
	GNeuralDecomposition *nd = new GNeuralDecomposition();
	while(args.next_is_flag())
	{
		if(args.if_pop("-regularization"))
			nd->setRegularization(args.pop_double());
		else if(args.if_pop("-learningRate"))
			nd->setLearningRate(args.pop_double());
		else if(args.if_pop("-linearUnits"))
			nd->setLinearUnits(args.pop_uint());
		else if(args.if_pop("-softplusUnits"))
			nd->setSoftplusUnits(args.pop_uint());
		else if(args.if_pop("-sigmoidUnits"))
			nd->setSigmoidUnits(args.pop_uint());
		else if(args.if_pop("-epochs"))
			nd->setEpochs(args.pop_uint());
		else if(args.if_pop("-features"))
			LoadData(args, hFeatures);
		else if(args.if_pop("-filterLogarithm"))
			nd->setFilterLogarithm(true);
		else
			throw Ex("Invalid option: ", args.peek());
	}
	
	if(hFeatures.get() == NULL)
	{
		// Generate features
		GMatrix *pFeatures = new GMatrix(pSeries->rows(), 1);
		for(size_t i = 0; i < pSeries->rows(); i++)
		{
			pFeatures->row(i)[0] = i / (double) pSeries->rows();
		}
		hFeatures.reset(pFeatures);
	}
	
	// Train
	GMatrix *pFeatures = hFeatures.get();
	nd->train(*pFeatures, *pSeries);
	
	// Output the trained model
	GDom doc;
	doc.setRoot(nd->serialize(&doc));
	doc.writeJson(cout);
}
예제 #15
0
파일: main.cpp 프로젝트: kslazarev/waffles
void selfOrganizingMap(GArgReader& args){
  // Load the file
  GMatrix* pData = loadData(args.pop_string());
  Holder<GMatrix> hData(pData);

  // Parse arguments
  std::vector<double> netDims;
  unsigned numNodes = 1;
  while(args.next_is_uint()){
    unsigned dim = args.pop_uint();
    netDims.push_back(dim);
    numNodes *= dim;
  }
  if(netDims.size() < 1){
    throw Ex("No dimensions specified for self organizing map.  ",
	       "A map must be at least 1 dimensional.");
  }

  Holder<SOM::ReporterChain> reporters(new SOM::ReporterChain);
  Holder<SOM::TrainingAlgorithm> alg(NULL);
  Holder<GDistanceMetric> weightDist(new GRowDistance);
  Holder<GDistanceMetric> nodeDist(new GRowDistance);
  Holder<SOM::NodeLocationInitialization> topology(new SOM::GridTopology);
  Holder<SOM::NodeWeightInitialization> weightInit
    (new SOM::NodeWeightInitializationTrainingSetSample(NULL));
  Holder<SOM::NeighborhoodWindowFunction> 
    windowFunc(new SOM::GaussianWindowFunction());

  //Loading and saving
  string loadFrom = "";
  string saveTo = "";

  //Parameters for different training algorithms
  string algoName = "batch";
  double startWidth = -1;//Start width - set later if still negative
  double endWidth   = -1;//End width   - set later if still negative
  double startRate = -1;//Start learning rate
  double endRate   = -1;//End learning rate
  unsigned numIter     = 100;//Total iterations
  unsigned numConverge = 1;//#steps for batch to converge

  while(args.next_is_flag()){
    if(args.if_pop("-tofile")){
      saveTo = args.pop_string();
    }else if(args.if_pop("-fromfile")){
      loadFrom = args.pop_string();
    }else if(args.if_pop("-seed")){
      GRand::global().setSeed(args.pop_uint());
    }else if(args.if_pop("-neighborhood")){
      string name = args.pop_string();
      if(name == "gaussian"){
	windowFunc.reset(new SOM::GaussianWindowFunction());
      }else if(name == "uniform"){
	windowFunc.reset(new SOM::UniformWindowFunction());
      }else{
	throw Ex("Only gaussian and uniform are acceptible ",
		   "neighborhood types");
      }
    }else if(args.if_pop("-printMeshEvery")){
      using namespace SOM;
      unsigned interval = args.pop_uint();
      string baseFilename = args.pop_string();
      unsigned xDim = args.pop_uint();
      unsigned yDim = args.pop_uint();
      bool showTrain = false;
      if(args.if_pop("showTrain") || args.if_pop("showtrain")){
	showTrain = true;
      }
      smart_ptr<Reporter> weightReporter
	(new SVG2DWeightReporter(baseFilename, xDim, yDim, showTrain));
      Holder<IterationIntervalReporter> intervalReporter
	(new IterationIntervalReporter(weightReporter, interval));
      reporters->add(intervalReporter.release());
    }else if(args.if_pop("-batchTrain")){
      algoName = "batch";
      startWidth = args.pop_double();
      endWidth = args.pop_double();
      numIter = args.pop_uint();
      numConverge = args.pop_uint();
    }else if(args.if_pop("-stdTrain")){
      algoName = "standard";
      startWidth = args.pop_double();
      endWidth = args.pop_double();
      startRate = args.pop_double();
      endRate = args.pop_double();
      numIter = args.pop_uint();
    }else{
      throw Ex("Invalid option: ", args.peek());
    }
  }

  //Create the training algorithm
  Holder<SOM::TrainingAlgorithm> algo;
  if(algoName == "batch"){
    double netRadius = *std::max_element(netDims.begin(), netDims.end());
    if(startWidth < 0){ startWidth = 2*netRadius; }
    if(endWidth < 0){ endWidth = 1; }
    algo.reset( new SOM::BatchTraining
      (startWidth, endWidth, numIter, numConverge,
       weightInit.release(), windowFunc.release(),
       reporters.release()));
  }else if(algoName == "standard"){
    algo.reset( new SOM::TraditionalTraining
      (startWidth, endWidth, startRate, endRate, numIter,
       weightInit.release(), windowFunc.release(),
       reporters.release()));
  }else{
    throw Ex("Unknown type of training algorithm: \"",
	       algoName, "\"");
  }

  //Create the network & transform the data
  Holder<GSelfOrganizingMap> som;
  Holder<GMatrix> out;
  
  if(loadFrom == ""){
    //Create map from arguments given
    som.reset(new GSelfOrganizingMap
      (netDims, numNodes, topology.release(), algo.release(), 
       weightDist.release(), nodeDist.release()));
    //Train the network and transform the data in place
    out.reset(som->doit(*pData));
  }else{
    //Create map from file
    GDom source;
    source.loadJson(loadFrom.c_str());
    som.reset(new GSelfOrganizingMap(source.root()));
    //Transform using the loaded network
    out.reset(som->transformBatch(*pData));
  }

  //Save the trained network
  if(saveTo != ""){
    GDom serialized;
    GDomNode* root = som->serialize(&serialized);
    serialized.setRoot(root);
    serialized.saveJson(saveTo.c_str());
  }

  //Print the result
  out->print(cout);
}
예제 #16
0
파일: main.cpp 프로젝트: litaoshao/waffles
void significance(GArgReader& args)
{
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int attr1 = args.pop_uint();
	int attr2 = args.pop_uint();

	// Parse options
	double tolerance = 0.001;
	while(args.size() > 0)
	{
		if(args.if_pop("-tol"))
			tolerance = args.pop_double();
		else
			ThrowError("Invalid option: ", args.peek());
	}

	// Print some basic stats
	cout.precision(8);
	{
		cout << "### Some basic stats\n";
		cout << "Medians = " << pData->median(attr1) << ", " << pData->median(attr2) << "\n";
		double mean1 = pData->mean(attr1);
		double mean2 = pData->mean(attr2);
		cout << "Means = " << mean1 << ", " << mean2 << "\n";
		double var1 = pData->variance(attr1, mean1);
		double var2 = pData->variance(attr2, mean2);
		cout << "Standard deviations = " << sqrt(var1) << ", " << sqrt(var2) << "\n";
		int less = 0;
		int eq = 0;
		int more = 0;
		for(size_t i = 0; i < pData->rows(); i++)
		{
			double* pRow = pData->row(i);
			if(std::abs(pRow[attr1] - pRow[attr2]) < tolerance)
				eq++;
			else if(pRow[attr1] < pRow[attr2])
				less++;
			else
				more++;
		}
		cout << less << " less, " << eq << " same, " << more << " greater\n";
	}

	// Perform the significance tests
	{
		cout << "\n### Paired T-test\n";
		size_t v;
		double t;
		pData->pairedTTest(&v, &t, attr1, attr2, false);
		double p = GMath::tTestAlphaValue(v, t);
		cout << "v=" << v << ", t=" << t << ", p=" << p << "\n";
	}
	{
		cout << "\n### Paired T-test with normalized values\n";
		size_t v;
		double t;
		pData->pairedTTest(&v, &t, attr1, attr2, true);
		double p = GMath::tTestAlphaValue(v, t);
		cout << "v=" << v << ", t=" << t << ", p=" << p << "\n";
	}
	{
		cout << "\n### Wilcoxon Signed Ranks Test";
		int num;
		double wMinus, wPlus;
		pData->wilcoxonSignedRanksTest(attr1, attr2, tolerance, &num, &wMinus, &wPlus);
		cout << "Number of signed ranks: " << num << "\n";
		double w_min = std::min(wMinus, wPlus);
		double w_sum = wPlus - wMinus;
		cout << "W- = " << wMinus << ", W+ = " << wPlus << ", W_min = " << w_min << ", W_sum = " << w_sum << "\n";

		double p_min = 0.5 * GMath::wilcoxonPValue(num, w_min);
		if(num < 10)
			cout << "Because the number of signed ranks is small, you should use a lookup table, rather than rely on the normal approximation for the P-value.\n";
		cout << "One-tailed P-value (for directional comparisons) computed with a normal approximation using W_min = " << 0.5 * p_min << "\n";
		cout << "Two-tailed P-value (for non-directional comparisons) computed with a normal approximation using W_min = " << p_min << "\n";
		cout << "To show that something is \"better\" than something else, use the one-tailed P-value.\n";
		cout << "Commonly, a P-value less that 0.05 is considered to be significant.\n";
/*
			double p_sum = GMath::wilcoxonPValue(num, w_sum);
			cout << "Directional (one-tailed) P-value computed with W_sum = " << p_sum << "\n";
*/
	}
}
예제 #17
0
파일: main.cpp 프로젝트: litaoshao/waffles
void sampleRows(GArgReader& args)
{
	const char* filename = args.pop_string();
	double portion = args.pop_double();
	if(portion < 0 || portion > 1)
		ThrowError("The portion must be between 0 and 1");
	PathData pd;
	GFile::parsePath(filename, &pd);
	bool arff = false;
	if(_stricmp(filename + pd.extStart, ".arff") == 0)
		arff = true;

	// Parse Options
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			seed = args.pop_uint();
		else
			ThrowError("Invalid option: ", args.peek());
	}
	GRand rand(seed);

	size_t size = 0;
	std::ifstream s;
	s.exceptions(std::ios::failbit|std::ios::badbit);
	try
	{
		s.open(filename, std::ios::binary);
		s.seekg(0, std::ios::end);
		size = (size_t)s.tellg();
		s.seekg(0, std::ios::beg);
	}
	catch(const std::exception&)
	{
		if(GFile::doesFileExist(filename))
			ThrowError("Error while trying to open the existing file: ", filename);
		else
			ThrowError("File not found: ", filename);
	}
	char* pLine = new char[MAX_LINE_LENGTH];
	ArrayHolder<char> hLine(pLine);
	size_t line = 1;
	while(size > 0)
	{
		s.getline(pLine, std::min(size + 1, size_t(MAX_LINE_LENGTH)));
		size_t linelen = std::min(size, size_t(s.gcount()));
		if(linelen >= MAX_LINE_LENGTH - 1)
			ThrowError("Line ", to_str(line), " is too long"); // todo: just resize the buffer here
		if(arff)
		{
			if(_strnicmp(pLine, "@DATA", 5) == 0)
				arff = false;
			cout << pLine << "\n";
		}
		else if(rand.uniform() < portion)
			cout << pLine << "\n";
		size -= linelen;
		line++;
	}
}