// virtual void GResamplingAdaBoost::trainInnerInner(const GMatrix& features, const GMatrix& labels) { clear(); // Initialize all instances with uniform weights GVec pDistribution(features.rows()); pDistribution.fill(1.0 / features.rows()); size_t drawRows = size_t(m_trainSize * features.rows()); size_t* pDrawnIndexes = new size_t[drawRows]; std::unique_ptr<size_t[]> hDrawnIndexes(pDrawnIndexes); // Train the ensemble size_t labelDims = labels.cols(); double penalty = 1.0 / labelDims; GVec prediction(labelDims); for(size_t es = 0; es < m_ensembleSize; es++) { // Draw a training set from the distribution GCategoricalSamplerBatch csb(features.rows(), pDistribution, m_rand); csb.draw(drawRows, pDrawnIndexes); GMatrix drawnFeatures(features.relation().clone()); GReleaseDataHolder hDrawnFeatures(&drawnFeatures); GMatrix drawnLabels(labels.relation().clone()); GReleaseDataHolder hDrawnLabels(&drawnLabels); size_t* pIndex = pDrawnIndexes; for(size_t i = 0; i < drawRows; i++) { drawnFeatures.takeRow((GVec*)&features[*pIndex]); drawnLabels.takeRow((GVec*)&labels[*pIndex]); pIndex++; } // Train an instance of the model and store a clone of it m_pLearner->train(drawnFeatures, drawnLabels); GDom doc; GSupervisedLearner* pClone = m_pLoader->loadLearner(m_pLearner->serialize(&doc)); // Compute model weight double err = 0.5; for(size_t i = 0; i < features.rows(); i++) { pClone->predict(features[i], prediction); const GVec& target = labels[i]; for(size_t j = 0; j < labelDims; j++) { if((int)target[j] != (int)prediction[j]) err += penalty; } } err /= features.rows(); if(err >= 0.5) { delete(pClone); break; } double weight = 0.5 * log((1.0 - err) / err); m_models.push_back(new GWeightedModel(weight, pClone)); // Update the distribution to favor mis-classified instances for(size_t i = 0; i < features.rows(); i++) { err = 0.0; pClone->predict(features[i], prediction); const GVec& target = labels[i]; for(size_t j = 0; j < labelDims; j++) { if((int)target[j] != (int)prediction[j]) err += penalty; } err /= labelDims; pDistribution[i] *= exp(weight * (err * 2.0 - 1.0)); } pDistribution.sumToOne(); } normalizeWeights(); }
// virtual void GResamplingAdaBoost::trainInnerInner(GMatrix& features, GMatrix& labels) { clear(); // Initialize all instances with uniform weights double* pDistribution = new double[features.rows()]; ArrayHolder<double> hDistribution(pDistribution); GVec::setAll(pDistribution, 1.0 / features.rows(), features.rows()); size_t drawRows = size_t(m_trainSize * features.rows()); size_t* pDrawnIndexes = new size_t[drawRows]; ArrayHolder<size_t> hDrawnIndexes(pDrawnIndexes); // Train the ensemble size_t labelDims = labels.cols(); double penalty = 1.0 / labelDims; GTEMPBUF(double, prediction, labelDims); for(size_t es = 0; es < m_ensembleSize; es++) { // Draw a training set from the distribution GCategoricalSamplerBatch csb(features.rows(), pDistribution, m_rand); csb.draw(drawRows, pDrawnIndexes); GMatrix drawnFeatures(features.relation()); GReleaseDataHolder hDrawnFeatures(&drawnFeatures); GMatrix drawnLabels(labels.relation()); GReleaseDataHolder hDrawnLabels(&drawnLabels); size_t* pIndex = pDrawnIndexes; for(size_t i = 0; i < drawRows; i++) { drawnFeatures.takeRow(features[*pIndex]); drawnLabels.takeRow(labels[*pIndex]); pIndex++; } // Train an instance of the model and store a clone of it m_pLearner->train(drawnFeatures, drawnLabels); GDom doc; GSupervisedLearner* pClone = m_pLoader->loadSupervisedLearner(m_pLearner->serialize(&doc)); // Compute model weight double err = 0.0; for(size_t i = 0; i < features.rows(); i++) { pClone->predict(features[i], prediction); double* pTarget = labels[i]; double* pPred = prediction; for(size_t j = 0; j < labelDims; j++) { if((int)*(pTarget++) != (int)*(pPred++)) err += penalty; } } err /= features.rows(); if(err >= 0.5) { delete(pClone); break; } double weight = 0.5 * log((1.0 - err) / err); m_models.push_back(new GWeightedModel(weight, pClone)); // Update the distribution to favor mis-classified instances double* pDist = pDistribution; for(size_t i = 0; i < features.rows(); i++) { err = 0.0; pClone->predict(features[i], prediction); double* pTarget = labels[i]; double* pPred = prediction; for(size_t j = 0; j < labelDims; j++) { if((int)*(pTarget++) != (int)*(pPred++)) err += penalty; } err /= labelDims; *pDist *= exp(weight * (err * 2.0 - 1.0)); pDist++; } GVec::sumToOne(pDistribution, features.rows()); } normalizeWeights(); }