void lle(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else throw Ex("Invalid option: ", args.peek()); } // Transform the data GLLE transform(pNF->neighborCount(), targetDims, &prng); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.doit(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void addNoise(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); double dev = args.pop_double(); // Parse the options unsigned int seed = getpid() * (unsigned int)time(NULL); int excludeLast = 0; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-excludelast")) excludeLast = args.pop_uint(); else ThrowError("Invalid neighbor finder option: ", args.peek()); } GRand prng(seed); size_t cols = pData->cols() - excludeLast; for(size_t r = 0; r < pData->rows(); r++) { double* pRow = pData->row(r); for(size_t c = 0; c < cols; c++) *(pRow++) += dev * prng.normal(); } pData->print(cout); }
void breadthFirstUnfolding(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); size_t nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options size_t reps = 1; Holder<GMatrix> hControlData(NULL); while(args.size() > 0) { if(args.if_pop("-seed")) nSeed = args.pop_uint(); else if(args.if_pop("-reps")) reps = args.pop_uint(); else throw Ex("Invalid option: ", args.peek()); } // Transform the data GBreadthFirstUnfolding transform(reps, pNF->neighborCount(), targetDims); transform.rand().setSeed(nSeed); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.reduce(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void nominalToCat(GArgReader& args) { // Load the file GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Parse Options int maxValues = 12; while(args.size() > 0) { if(args.if_pop("-maxvalues")) maxValues = args.pop_uint(); else ThrowError("Invalid option: ", args.peek()); } // Transform the data GNominalToCat transform(maxValues); transform.train(*pData); GMatrix* pDataNew = transform.transformBatch(*pData); Holder<GMatrix> hDataNew(pDataNew); // Print results pDataNew->print(cout); }
void AddIndexAttribute(GArgReader& args) { // Parse args const char* filename = args.pop_string(); double nStartValue = 0.0; double nIncrement = 1.0; while(args.size() > 0) { if(args.if_pop("-start")) nStartValue = args.pop_double(); else if(args.if_pop("-increment")) nIncrement = args.pop_double(); else ThrowError("Invalid option: ", args.peek()); } GMatrix* pData = loadData(filename); Holder<GMatrix> hData(pData); GArffRelation* pIndexRelation = new GArffRelation(); pIndexRelation->addAttribute("index", 0, NULL); sp_relation pIndexRel = pIndexRelation; GMatrix indexes(pIndexRel); indexes.newRows(pData->rows()); for(size_t i = 0; i < pData->rows(); i++) indexes.row(i)[0] = nStartValue + i * nIncrement; GMatrix* pUnified = GMatrix::mergeHoriz(&indexes, pData); Holder<GMatrix> hUnified(pUnified); pUnified->print(cout); }
void enumerateValues(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); size_t col = args.pop_uint(); if(pData->relation()->valueCount(col) > 0) ((GArffRelation*)pData->relation().get())->setAttrValueCount(col, 0); else { size_t n = 0; map<double,size_t> themap; for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); map<double,size_t>::iterator it = themap.find(pRow[col]); if(it == themap.end()) { themap[pRow[col]] = n; pRow[col] = (double)n; n++; } else pRow[col] = (double)it->second; } } pData->print(cout); }
void isomap(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); int targetDims = args.pop_uint(); // Parse Options bool tolerant = false; while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else if(args.if_pop("-tolerant")) tolerant = true; else throw Ex("Invalid option: ", args.peek()); } // Transform the data GIsomap transform(pNF->neighborCount(), targetDims, &prng); transform.setNeighborFinder(pNF); if(tolerant) transform.dropDisconnectedPoints(); GMatrix* pDataAfter = transform.reduce(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void cholesky(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); pA->cholesky(); pA->print(cout); }
void GRecommenderLib::precisionRecall(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); bool ideal = false; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-ideal")) ideal = true; else throw Ex("Invalid option: ", args.peek()); } // Load the data if(args.size() < 1) throw Ex("No dataset specified."); GMatrix data; loadData(data, args.pop_string()); // Instantiate the recommender GCollaborativeFilter* pModel = InstantiateAlgorithm(args); std::unique_ptr<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) throw Ex("Superfluous argument: ", args.peek()); pModel->rand().setSeed(seed); // Generate precision-recall data GMatrix* pResults = pModel->precisionRecall(data, ideal); std::unique_ptr<GMatrix> hResults(pResults); pResults->deleteColumns(2, 1); // we don't need the false-positive rate column pResults->print(cout); }
void precisionRecall(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); bool ideal = false; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-ideal")) ideal = true; else ThrowError("Invalid option: ", args.peek()); } // Load the data if(args.size() < 1) ThrowError("No dataset specified."); GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Instantiate the recommender GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Generate precision-recall data GMatrix* pResults = pModel->precisionRecall(*pData, ideal); Holder<GMatrix> hResults(pResults); pResults->deleteColumn(2); // we don't need the false-positive rate column pResults->print(cout); }
void reducedRowEchelonForm(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); pA->toReducedRowEchelonForm(); pA->print(cout); }
void fillMissingValues(GArgReader& args) { // Load GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Parse options unsigned int nSeed = getpid() * (unsigned int)time(NULL); bool random = false; while(args.size() > 0) { if(args.if_pop("-seed")) nSeed = args.pop_uint(); else if(args.if_pop("-random")) random = true; else ThrowError("Invalid option: ", args.peek()); } // Replace missing values and print GRand prng(nSeed); if(random) { for(size_t i = 0; i < pData->relation()->size(); i++) pData->replaceMissingValuesRandomly(i, &prng); } else { for(size_t i = 0; i < pData->relation()->size(); i++) pData->replaceMissingValuesWithBaseline(i); } pData->print(cout); }
void aggregateCols(GArgReader& args) { size_t c = args.pop_uint(); vector<string> files; GFile::fileList(files); GMatrix* pResults = NULL; Holder<GMatrix> hResults; size_t i = 0; for(vector<string>::iterator it = files.begin(); it != files.end(); it++) { PathData pd; GFile::parsePath(it->c_str(), &pd); if(strcmp(it->c_str() + pd.extStart, ".arff") != 0) continue; GMatrix* pData = loadData(it->c_str()); Holder<GMatrix> hData(pData); if(!pResults) { pResults = new GMatrix(pData->rows(), files.size()); hResults.reset(pResults); } pResults->copyColumns(i, pData, c, 1); i++; } pResults->print(cout); }
void kmeans(GArgReader& args) { // Load the file and params GMatrix data; loadData(data, args.pop_string()); int clusters = args.pop_uint(); // Parse Options unsigned int nSeed = getpid() * (unsigned int)time(NULL); size_t reps = 1; while(args.size() > 0) { if(args.if_pop("-seed")) nSeed = args.pop_uint(); else if(args.if_pop("-reps")) reps = args.pop_uint(); else throw Ex("Invalid option: ", args.peek()); } // Do the clustering GRand prng(nSeed); GKMeans clusterer(clusters, &prng); clusterer.setReps(reps); GMatrix* pOut = clusterer.reduce(data); std::unique_ptr<GMatrix> hOut(pOut); pOut->print(cout); }
void DropMissingValues(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); GRelation* pRelation = pData->relation().get(); size_t dims = pRelation->size(); for(size_t i = pData->rows() - 1; i < pData->rows(); i--) { double* pPat = pData->row(i); bool drop = false; for(size_t j = 0; j < dims; j++) { if(pRelation->valueCount(j) == 0) { if(pPat[j] == UNKNOWN_REAL_VALUE) { drop = true; break; } } else { if(pPat[j] == UNKNOWN_DISCRETE_VALUE) { drop = true; break; } } } if(drop) pData->deleteRow(i); } pData->print(cout); }
void Transpose(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); GMatrix* pTransposed = pData->transpose(); Holder<GMatrix> hTransposed(pTransposed); pTransposed->print(cout); }
void pseudoInverse(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); GMatrix* pInverted = pData->pseudoInverse(); Holder<GMatrix> hInverted(pInverted); pInverted->print(cout); }
void zeroMean(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); if(args.size() > 0) ThrowError("Superfluous arg: ", args.pop_string()); pA->centerMeanAtOrigin(); pA->print(cout); }
void dropRows(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); size_t newSize = args.pop_uint(); while(pData->rows() > newSize) pData->deleteRow(pData->rows() - 1); pData->print(cout); }
void addMatrices(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); GMatrix* pB = loadData(args.pop_string()); Holder<GMatrix> hB(pB); pA->add(pB, false); pA->print(cout); }
void multiplyScalar(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); double scale = args.pop_double(); if(args.size() > 0) ThrowError("Superfluous arg: ", args.pop_string()); pA->multiply(scale); pA->print(cout); }
void align(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); GMatrix* pB = loadData(args.pop_string()); Holder<GMatrix> hB(pB); GMatrix* pC = GMatrix::align(pA, pB); Holder<GMatrix> hC(pC); pC->print(cout); }
void dropColumns(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); vector<size_t> colList; size_t attrCount = pData->cols(); parseAttributeList(colList, args, attrCount); std::sort(colList.begin(), colList.end()); std::reverse(colList.begin(), colList.end()); for(size_t i = 0; i < colList.size(); i++) pData->deleteColumn(colList[i]); pData->print(cout); }
void agglomerativeclusterer(GArgReader& args) { // Load the file and params GMatrix data; loadData(data, args.pop_string()); int clusters = args.pop_uint(); // Do the clustering GAgglomerativeClusterer clusterer(clusters); GMatrix* pOut = clusterer.reduce(data); std::unique_ptr<GMatrix> hOut(pOut); pOut->print(cout); }
void SwapAttributes(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); size_t nAttr1 = args.pop_uint(); size_t nAttr2 = args.pop_uint(); size_t attrCount = pData->relation()->size(); if(nAttr1 >= attrCount) ThrowError("Index out of range"); if(nAttr2 >= attrCount) ThrowError("Index out of range"); pData->swapColumns(nAttr1, nAttr2); pData->print(cout); }
///TODO: this command should be documented void center(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int r = args.pop_uint(); size_t cols = pData->cols(); double* pRow = pData->row(r); for(size_t i = 0; i < r; ++i) GVec::subtract(pData->row(i), pRow, cols); for(size_t i = r + 1; i < pData->rows(); ++i) GVec::subtract(pData->row(i), pRow, cols); GVec::setAll(pRow, 0.0, cols); pData->print(cout); }
void ManifoldSculpting(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args); Holder<GNeighborFinder> hNF(pNF); size_t targetDims = args.pop_uint(); // Parse Options const char* szPreprocessedData = NULL; double scaleRate = 0.999; while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else if(args.if_pop("-continue")) szPreprocessedData = args.pop_string(); else if(args.if_pop("-scalerate")) scaleRate = args.pop_double(); else throw Ex("Invalid option: ", args.peek()); } // Load the hint data GMatrix* pDataHint = NULL; Holder<GMatrix> hDataHint(NULL); if(szPreprocessedData) { pDataHint = loadData(szPreprocessedData); hDataHint.reset(pDataHint); if(pDataHint->relation()->size() != targetDims) throw Ex("Wrong number of dims in the hint data"); if(pDataHint->rows() != pData->rows()) throw Ex("Wrong number of patterns in the hint data"); } // Transform the data GManifoldSculpting transform(pNF->neighborCount(), targetDims, &prng); transform.setSquishingRate(scaleRate); if(pDataHint) transform.setPreprocessedData(hDataHint.release()); transform.setNeighborFinder(pNF); GMatrix* pDataAfter = transform.doit(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); }
void shiftColumns(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); vector<size_t> cols; parseAttributeList(cols, args, pA->cols()); double offset = args.pop_double(); for(size_t i = 0; i < pA->rows(); i++) { double* pRow = pA->row(i); for(vector<size_t>::iterator it = cols.begin(); it != cols.end(); it++) pRow[*it] += offset; } pA->print(cout); }
void dropRandomValues(GArgReader& args) { GMatrix* pData = loadData(args.pop_string()); double portion = args.pop_double(); // Parse the options unsigned int seed = getpid() * (unsigned int)time(NULL); while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else ThrowError("Invalid option: ", args.peek()); } GRand rand(seed); size_t n = pData->rows() * pData->cols(); size_t k = size_t(portion * n); for(size_t i = 0; i < pData->cols(); i++) { size_t vals = pData->relation()->valueCount(i); if(vals == 0) { for(size_t j = 0; j < pData->rows(); j++) { if(rand.next(n) < k) { pData->row(j)[i] = UNKNOWN_REAL_VALUE; k--; } n--; } } else { for(size_t j = 0; j < pData->rows(); j++) { if(rand.next(n) < k) { pData->row(j)[i] = UNKNOWN_DISCRETE_VALUE; k--; } n--; } } } pData->print(cout); }
void cumulativeColumns(GArgReader& args) { GMatrix* pA = loadData(args.pop_string()); Holder<GMatrix> hA(pA); vector<size_t> cols; parseAttributeList(cols, args, pA->cols()); double* pPrevRow = pA->row(0); for(size_t i = 1; i < pA->rows(); i++) { double* pRow = pA->row(i); for(vector<size_t>::iterator it = cols.begin(); it != cols.end(); it++) pRow[*it] += pPrevRow[*it]; pPrevRow = pRow; } pA->print(cout); }