void splitClass(GArgReader& args) { const char* filename = args.pop_string(); GMatrix* pData = loadData(filename); Holder<GMatrix> hData(pData); size_t classAttr = args.pop_uint(); bool dropClass = false; while(args.size() > 0) { if(args.if_pop("-dropclass")) dropClass = true; else ThrowError("Invalid option: ", args.peek()); } for(size_t i = 0; i < pData->relation()->valueCount(classAttr); i++) { GMatrix tmp(pData->relation(), pData->heap()); pData->splitByNominalValue(&tmp, classAttr, i); std::ostringstream oss; PathData pd; GFile::parsePath(filename, &pd); string fn; fn.assign(filename + pd.fileStart, pd.extStart - pd.fileStart); oss << fn << "_"; pData->relation()->printAttrValue(oss, classAttr, (double)i); oss << ".arff"; string s = oss.str(); if(dropClass) tmp.deleteColumn(classAttr); tmp.saveArff(s.c_str()); } }
// virtual void GBag::trainInnerInner(GMatrix& features, GMatrix& labels) { // Train all the models size_t nLearnerCount = m_models.size(); size_t nDrawSize = size_t(m_trainSize * features.rows()); GMatrix drawnFeatures(features.relation(), features.heap()); GMatrix drawnLabels(labels.relation(), labels.heap()); drawnFeatures.reserve(nDrawSize); drawnLabels.reserve(nDrawSize); { for(size_t i = 0; i < nLearnerCount; i++) { if(m_pCB) m_pCB(m_pThis, i, nLearnerCount); // Randomly draw some data (with replacement) GReleaseDataHolder hDrawnFeatures(&drawnFeatures); GReleaseDataHolder hDrawnLabels(&drawnLabels); for(size_t j = 0; j < nDrawSize; j++) { size_t r = (size_t)m_rand.next(features.rows()); drawnFeatures.takeRow(features[r]); drawnLabels.takeRow(labels[r]); } // Train the learner with the drawn data m_models[i]->m_pModel->train(drawnFeatures, drawnLabels); } if(m_pCB) m_pCB(m_pThis, nLearnerCount, nLearnerCount); } // Determine the weights determineWeights(features, labels); normalizeWeights(); }