Ejemplo n.º 1
0
IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
                   const unsigned *CurSize, unsigned NewSize[],
                   unsigned Position, bool Grow) {
  assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
  assert(Position <= Elements && "Invalid position");
  if (!Nodes)
    return IdxPair();

  // Trivial algorithm: left-leaning even distribution.
  const unsigned PerNode = (Elements + Grow) / Nodes;
  const unsigned Extra = (Elements + Grow) % Nodes;
  IdxPair PosPair = IdxPair(Nodes, 0);
  unsigned Sum = 0;
  for (unsigned n = 0; n != Nodes; ++n) {
    Sum += NewSize[n] = PerNode + (n < Extra);
    if (PosPair.first == Nodes && Sum > Position)
      PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
  }
  assert(Sum == Elements + Grow && "Bad distribution sum");

  // Subtract the Grow element that was added.
  if (Grow) {
    assert(PosPair.first < Nodes && "Bad algebra");
    assert(NewSize[PosPair.first] && "Too few elements to need Grow");
    --NewSize[PosPair.first];
  }

#ifndef NDEBUG
  Sum = 0;
  for (unsigned n = 0; n != Nodes; ++n) {
    assert(NewSize[n] <= Capacity && "Overallocated node");
    Sum += NewSize[n];
  }
  assert(Sum == Elements && "Bad distribution sum");
#endif

  return PosPair;
}
Ejemplo n.º 2
0
// chooses split operators that can be usesful in separating the classes
// this is done before an intial model is created
// This function does not create the appropriate features, and should be
// called on a temporary list.
// verbose = 0, minimal chatter
// verbose = 1, one line summary for each feature
// verbose > 1, more details
void MlOperatorList::selectInitialSplitOperators(MlTrainingContainer& mtc,
												 const MlFeatureSet& featureSet,
												 bool  addBooleanIndicators,
												 size_t verboseLevel)
{
	MlOperatorSearchData* auxilaryData = mtc.getAuxilarySearchData();
	const size_t featureGenerationType = mtc.getFeatureGenerationType();
	const double minRelativeInformationGain = mtc.getMinRelativeInformationGain();
	const size_t maxNumSplits				= mtc.getMaxNumSplits();
	const vector<size_t>& numFeatureValues  = auxilaryData->numFeatureValues_;
	const vector<MlFeature>& features       = featureSet.getFeatures();

	assert( auxilaryData->getNumBasicFeatures() == features.size());
	if (operatorFeatureSpaceSize_ < features.size())
		operatorFeatureSpaceSize_ = features.size();

	for (size_t i=0; i<features.size(); i++)
	{
		if (numFeatureValues[i] < 2)
			continue;

		if (verboseLevel>1)
			cout << endl << "FEATURE " << i << " - " << features[i].getName() << endl;
		
		SplitOperator sop;
		double gain = auxilaryData->findOptimalSplit(i, sop.thresholds, -0.333, minRelativeInformationGain, 
									   maxNumSplits, 300, (verboseLevel>1) );

		if (verboseLevel == 1)
		{
			cout << "F" << i << "\t" << features[i].getName() << "\tgain:" << fixed << setprecision(4) << gain;
			cout << " [";
			for (size_t i=0; i<sop.thresholds.size(); i++)
				cout << " " << sop.thresholds[i];
			cout << " ]" << endl;
		}		

		if (sop.thresholds.size()>0)
		{
			// add for now
			sop.sourceIdx = i;
			sop.indexesForBinValues.resize(sop.thresholds.size()+1,MAX_UINT);
			for (size_t i=0; i<=sop.thresholds.size(); i++)
				sop.indexesForBinValues[i]=operatorFeatureSpaceSize_++;
		
			if (addBooleanIndicators)
			{
				sop.indexesForBinIndicators.resize(sop.thresholds.size()+1,MAX_UINT);
				for (size_t i=0; i<=sop.thresholds.size(); i++)
					sop.indexesForBinIndicators[i]=operatorFeatureSpaceSize_++;
			}
			
			executionOrder_.push_back(IdxPair(OT_SPLIT,splits_.size()));
			splits_.push_back(sop);
			
			if (verboseLevel>1)
			{
				cout << endl << "[" << setprecision(4);
				for (size_t j=0; j<sop.thresholds.size(); j++)
					cout << " " << sop.thresholds[j];
				cout << "]" << endl;
			}
		}
	}
}
Ejemplo n.º 3
0
bool MlOperatorList::readOperatorList(ifstream& ifs)
{
	char buffer[1024];
	while (ifs.good() && ifs.getline(buffer,1024))
		if (ifs.gcount()>0 && buffer[0] != '#')
			break;

	size_t n=0;
	if (sscanf(buffer,"%d",&n) != 1)
		error("expected line with number of operaotrs");

	executionOrder_.clear();
	executionOrder_.reserve(n);
	drops_.clear();
	indicators_.clear();
	normalizations_.clear();
	functions_.clear();
	splits_.clear();
	conditionals_.clear();

	for (size_t i=0; i<n; i++)
	{
		if (! ifs.good())
			return false;

		ifs.getline(buffer,1024);
		if (ifs.gcount()<=0)
			return false;

		istringstream iss(buffer);

		char type=' ';
		iss >> type;
		switch (type) {
			case 'D':
				{
					size_t dropIdx=0;
					iss >> dropIdx;
					if (iss.fail())
						error("Error reading line:",buffer);

					executionOrder_.push_back(IdxPair(OT_DROP,drops_.size()));
					drops_.push_back(dropIdx);
				}
				break;

			case 'I':
				{
					IndicatorOperator iop;
					iss >> iop.sourceIdx >> iop.targetIdx;
					if (iss.fail())
						error("Error reading line:",buffer);
					executionOrder_.push_back(IdxPair(OT_INDICATOR,indicators_.size()));
					indicators_.push_back(iop);
					break;
				}

			case 'F':
				{
					FunctionOperator fop;
					iss >> fop.sourceIdx >> fop.targetIdx;
					string typeStr;
					iss >> typeStr;
					size_t i;
					for (i=0; i<numConditionalValueLabels; i++)
						if (! strcmp(typeStr.c_str(),conditionalValueLabels[i]))
							break;
					if (i == numConditionalValueLabels)
						error("Error reading line:",buffer);
					fop.type = i;
					executionOrder_.push_back(IdxPair(OT_FUNCTION,functions_.size()));
					functions_.push_back(fop);
				}

			case 'N':
				{
					NormalizationOperator nop;
					iss >> nop.sourceIdx >> nop.targetIdx >> nop.mu >> nop.sigma;
					if (iss.fail())
						error("Error reading line:",buffer);
					executionOrder_.push_back(IdxPair(OT_NORMALIZATION,normalizations_.size()));
					normalizations_.push_back(nop);
				}
				break;

			case 'S':
				{
					SplitOperator sop;
					iss >> sop.sourceIdx;

					size_t n;
					iss >> n;
					if (iss.fail())
						error("Error reading line:",buffer);
					sop.thresholds.resize(n);
					sop.indexesForBinValues.resize(n+1);
					sop.indexesForBinIndicators.resize(n+1);
					for (size_t i=0; i<n; i++)
						iss >> sop.thresholds[i];
					for (size_t i=0; i<=n; i++)
					{
						// a fix so the model file can use -1 for MAX_UINT (so it won't hurt my eyes...)
						int index;
						iss >> index;
						sop.indexesForBinValues[i] = (index>=0 ? static_cast<size_t>(index) : MAX_UINT);
					}
					for (size_t i=0; i<=n; i++)
					{
						// a fix so the model file can use -1 for MAX_UINT (so it won't hurt my eyes...)
						int index;
						iss >> index;
						sop.indexesForBinIndicators[i] = (index>=0 ? static_cast<size_t>(index) : MAX_UINT);
					}
					if (iss.fail())
						error("Error reading line:",buffer);
					executionOrder_.push_back(IdxPair(OT_SPLIT,splits_.size()));
					splits_.push_back(sop);
				}
				break;

			case 'C':
				{
					ConditionalOperator cod;
					size_t n;
					iss >> n;
					cod.sourceIdxs.resize(n);
					for (size_t i=0; i<n; i++)
						iss >> cod.sourceIdxs[i];
					iss >> cod.targetIdx;

					// a fix so the model file can use -1 for MAX_UINT (so it won't hurt my eyes...)
					int indexForBool;
					iss >> indexForBool;
					cod.indexForBool = (indexForBool>=0 ? static_cast<size_t>(indexForBool) : MAX_UINT);
					
					string resultStr, conditionStr;
					iss >> conditionStr >> resultStr;
					if (iss.fail())
						error("Error reading line:",buffer);
					size_t idxType, idxResult;
					for (idxType=0; idxType<numConditionalOperatorLabels; idxType++)
						if (! strcmp(conditionStr.c_str(),conditionalOperatorLabels[idxType]))
							break;
					if (idxType == numConditionalOperatorLabels)
						error("Error reading line:",buffer);
					for (idxResult=0; idxResult<numConditionalValueLabels; idxResult++)
						if (! strcmp(resultStr.c_str(),conditionalValueLabels[idxResult]))
							break;
					if (idxResult == numConditionalValueLabels)
						error("Error reading line:",buffer);
					cod.conditionType = idxType;
					cod.resultType    = idxResult;

					executionOrder_.push_back(IdxPair(OT_CONDITIONAL,conditionals_.size()));
					conditionals_.push_back(cod);
				}
				break;
		};
	}
	return true;
}