void lle(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else throw Ex("Invalid option: ", args.peek()); } // Transform the data GLLE transform(pNF->neighborCount(), targetDims, &prng); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.doit(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void isomap(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options bool tolerant = false; while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else if(args.if_pop("-tolerant")) tolerant = true; else throw Ex("Invalid option: ", args.peek()); } // Transform the data GIsomap transform(pNF->neighborCount(), targetDims, &prng); transform.setNeighborFinder(pNF); if(tolerant) transform.dropDisconnectedPoints(); GMatrix* pDataAfter = transform.reduce(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void breadthFirstUnfolding(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); size_t nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options size_t reps = 1; Holder<GMatrix> hControlData(NULL); while(args.size() > 0) { if(args.if_pop("-seed")) nSeed = args.pop_uint(); else if(args.if_pop("-reps")) reps = args.pop_uint(); else throw Ex("Invalid option: ", args.peek()); } // Transform the data GBreadthFirstUnfolding transform(reps, pNF->neighborCount(), targetDims); transform.rand().setSeed(nSeed); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.reduce(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void ManifoldSculpting(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); size_t targetDims = args.pop_uint(); // Parse Options const char* szPreprocessedData = NULL; double scaleRate = 0.999; while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else if(args.if_pop("-continue")) szPreprocessedData = args.pop_string(); else if(args.if_pop("-scalerate")) scaleRate = args.pop_double(); else throw Ex("Invalid option: ", args.peek()); } // Load the hint data GMatrix* pDataHint = NULL; Holder<GMatrix> hDataHint(NULL); if(szPreprocessedData) { pDataHint = loadData(szPreprocessedData); hDataHint.reset(pDataHint); if(pDataHint->relation()->size() != targetDims) throw Ex("Wrong number of dims in the hint data"); if(pDataHint->rows() != pData->rows()) throw Ex("Wrong number of patterns in the hint data"); } // Transform the data GManifoldSculpting transform(pNF->neighborCount(), targetDims, &prng); transform.setSquishingRate(scaleRate); if(pDataHint) transform.setPreprocessedData(hDataHint.release()); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.doit(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void blendEmbeddings(GArgReader& args) { // Load the files and params GMatrix* pDataOrig = loadData(args.pop_string()); Holder<GMatrix> hDataOrig(pDataOrig); unsigned int seed = getpid() * (unsigned int)time(NULL); GRand prng(seed); GNeighborFinder* pNF = instantiateNeighborFinder(pDataOrig, &prng, args); Holder<GNeighborFinder> hNF(pNF); GMatrix* pDataA = loadData(args.pop_string()); Holder<GMatrix> hDataA(pDataA); GMatrix* pDataB = loadData(args.pop_string()); Holder<GMatrix> hDataB(pDataB); if(pDataA->rows() != pDataOrig->rows() || pDataB->rows() != pDataOrig->rows()) throw Ex("mismatching number of rows"); if(pDataA->cols() != pDataB->cols()) throw Ex("mismatching number of cols"); // Parse Options while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else throw Ex("Invalid option: ", args.peek()); } // Get a neighbor table if(!pNF->isCached()) { GNeighborFinderCacheWrapper* pNF2 = new GNeighborFinderCacheWrapper(hNF.release(), true); hNF.reset(pNF2); pNF = pNF2; } ((GNeighborFinderCacheWrapper*)pNF)->fillCache(); size_t* pNeighborTable = ((GNeighborFinderCacheWrapper*)pNF)->cache(); // Do the blending size_t startPoint = (size_t)prng.next(pDataA->rows()); double* pRatios = new double[pDataA->rows()]; ArrayHolder<double> hRatios(pRatios); GVec::setAll(pRatios, 0.5, pDataA->rows()); GMatrix* pDataC = GManifold::blendEmbeddings(pDataA, pRatios, pDataB, pNF->neighborCount(), pNeighborTable, startPoint); Holder<GMatrix> hDataC(pDataC); pDataC->print(cout); }
// virtual void GNeighborTransducer::transduce(GData* pDataLabeled, GData* pDataUnlabeled, int labelDims) { if(labelDims != 1) ThrowError("Only 1 nominal label is supported"); if(!pDataLabeled->relation()->areNominal(pDataLabeled->relation()->size() - 1, 1)) ThrowError("Only nominal labels are supported"); if(!pDataLabeled->relation()->areContinuous(0, pDataLabeled->relation()->size() - 1)) ThrowError("Only continuous features are supported"); if(pDataLabeled->cols() != pDataUnlabeled->cols()) ThrowError("relations don't match"); // Make a dataset containing all rows GData dataAll(pDataLabeled->relation()); dataAll.reserve(pDataLabeled->rows() + pDataUnlabeled->rows()); GReleaseDataHolder hDataAll(&dataAll); for(size_t i = 0; i < pDataUnlabeled->rows(); i++) dataAll.takeRow(pDataUnlabeled->row(i)); for(size_t i = 0; i < pDataLabeled->rows(); i++) dataAll.takeRow(pDataLabeled->row(i)); int featureDims = pDataLabeled->cols() - labelDims; sp_relation pRelInputs = new GUniformRelation(featureDims, 0); dataAll.setRelation(pRelInputs); // Find friends GNeighborFinder* pNF; if(m_intrinsicDims == 0) pNF = new GNeighborFinderCacheWrapper(new GKdTree(&dataAll, 0, m_friendCount, NULL, true), true); else pNF = new GManifoldNeighborFinder( &dataAll, m_friendCount, // littleK m_friendCount * 4, // bigK m_intrinsicDims, // intrinsicDims m_alpha, // alpha m_beta, // beta false, // prune? m_pRand); Holder<GNeighborFinder> hNF(pNF); GTEMPBUF(size_t, neighbors, m_friendCount); int labelValues = pDataLabeled->relation()->valueCount(featureDims); GTEMPBUF(double, tallys, labelValues); // Label the unlabeled patterns GBitTable labeled(pDataUnlabeled->rows()); GData labelList(3); // pattern index, most likely label, confidence labelList.newRows(pDataUnlabeled->rows()); for(size_t i = 0; i < pDataUnlabeled->rows(); i++) labelList.row(i)[0] = i; while(labelList.rows() > 0) { // Compute the most likely label and the confidence for each pattern for(size_t i = 0; i < labelList.rows(); i++) { // Find the most common label double* pRow = labelList.row(i); size_t index = (size_t)pRow[0]; pNF->neighbors(neighbors, index); GVec::setAll(tallys, 0.0, labelValues); for(int j = 0; j < m_friendCount; j++) { if(neighbors[j] >= dataAll.rows()) continue; double* pFriend = dataAll.row(neighbors[j]); if(neighbors[j] >= pDataUnlabeled->rows()) { if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues) tallys[(int)pFriend[featureDims]] += 1.0; } else if(labeled.bit(neighbors[j])) { if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues) tallys[(int)pFriend[featureDims]] += 0.6; } } int label = GVec::indexOfMax(tallys, labelValues, m_pRand); double conf = tallys[label]; // Penalize for dissenting votes for(int j = 0; j < m_friendCount; j++) { if(neighbors[j] >= dataAll.rows()) continue; double* pFriend = dataAll.row(neighbors[j]); if(neighbors[j] >= pDataUnlabeled->rows()) { if((int)pFriend[featureDims] != label) conf *= 0.5; } else if(labeled.bit(neighbors[j])) { if((int)pFriend[featureDims] != label) conf *= 0.8; } } pRow[1] = label; pRow[2] = conf; } labelList.sort(2); // Assign the labels to the patterns we are most confident about size_t maxCount = MAX((size_t)5, pDataLabeled->rows() / 5); size_t count = 0; for(size_t i = labelList.rows() - 1; i < labelList.rows(); i--) { double* pRow = labelList.row(i); size_t index = (size_t)pRow[0]; int label = (int)pRow[1]; pDataUnlabeled->row(index)[featureDims] = label; labeled.set(index); labelList.deleteRow(i); if(count >= maxCount) break; count++; } } }