/** Optimizes the objective function and writes the learned weights into params. * \params The initial params, which must have correct size. */ FloatT optimize(DataObjectiveFunction<FloatT>& objFct, FloatT *params, FeatureId numParams) { const std::size_t numExamples = objFct.getNumExamples(); SDL_INFO(OnlineOptimizer, "Starting online optimization on " << numExamples << " training examples with " << opts_.numEpochs << " epochs"); // Create a vector of indices that we can shuffle, for processing // the training examples in random order: std::vector<std::size_t> randomOrder; randomOrder.reserve(numExamples); for (std::size_t i = 0; i < numExamples; ++i) { randomOrder.push_back(i); } const std::size_t numUpdates = opts_.numEpochs * objFct.getNumExamples(); shared_ptr<ILearningRate> pLearningRate = makeLearningRate(numUpdates, opts_.learningRateOptions); bool useAdagrad = opts_.learningRateOptions.method == kAdagrad; bool useAdagradL1 = opts_.learningRateOptions.adagradL1Strength > 0.0f; boost::scoped_ptr<ParameterUpdate<FloatT> > update; if (useAdagrad) { if (useAdagradL1) update.reset(new AdagradL1ParameterUpdate<FloatT>( params, numParams, opts_.learningRateOptions.adagradRate, opts_.learningRateOptions.adagradL1Strength)); else update.reset(new AdagradParameterUpdate<FloatT>( params, numParams, opts_.learningRateOptions.adagradRate)); } else update.reset(new ParameterUpdate<FloatT>(params, numParams)); // Iterate over all training examples opts_.numEpochs times: std::size_t cntSteps = 0; for (std::size_t epoch = 0; epoch < opts_.numEpochs; ++epoch) { std::random_shuffle(randomOrder.begin(), randomOrder.end()); objFct.initFunctionValue(); for (std::size_t i = 0; i < numExamples; ++i, ++cntSteps) { objFct.setFeatureWeights(i, i + 1, params, numParams); if (!useAdagrad) // AdaGrad sets its own learning rate update->setRate(static_cast<FloatT>((*pLearningRate)(cntSteps))); FloatT fctDiff = objFct.getUpdates(i, i + 1, *update); objFct.increaseFunctionValue(fctDiff); update->incTimeStep(); } SDL_INFO(OnlineOptimizer, "Epoch " << epoch << ", function value: " << objFct.getFunctionValue()); } // epochs SDL_INFO(OnlineOptimizer, "Finished online optimization, function value: " << objFct.getFunctionValue()); return objFct.getFunctionValue(); }
void writeLabelsFile(std::string const& fname) { SDL_INFO(CrfDemo, "Writing labels file '" << fname << "'"); Util::Output output(fname); for (Sym labelId : allLabels_) { *output << pVoc_->str(labelId) << '\n'; } }
// TODO: test AdagradL1ParameterUpdate(FloatT* params, FeatureId numParams, FloatT eta, FloatT l1Strength) : ParameterUpdate<FloatT>(params, numParams) , eta_(eta) , l1Strength_(l1Strength) , timeStep_(1) , prevGrads_(numParams, (FloatT)0) , prevGradsSquared_(numParams, (FloatT)0) { SDL_INFO(OnlineOptimizer, "Adagrad L1 strength: " << l1Strength_ << ", eta: " << eta_); }
void loadFeatureWeightsFile(std::string const& filename, Map* pMap) { SDL_INFO(Optimization, "Loading feature weights from '" << filename << "'"); Util::Input input(filename); typename Map::key_type featureId; typename Map::mapped_type featureWeight; std::istream& inStream = *input; std::size_t linenum = 1; while (inStream >> featureId) { if (inStream.eof()) { SDL_THROW_LOG(Optimization, ParseException, filename << ":" << linenum << ": No feature weight found for ID " << featureId); } inStream >> featureWeight; (*pMap)[featureId] = featureWeight; ++linenum; } SDL_INFO(Optimization, "Loaded " << (linenum - 1) << " feature weights from '" << filename << "'"); }
void toHypergraph(std::string const& line, IMutableHypergraph<A>* phg, std::size_t lineNum = 0) const { Strings words = parseTokens(line, (ParseTokensOptions const&)*this); SDL_DEBUG(Hypergraph.HgConvertString, lineNum << ": " << printer(words, Util::RangeSep(" ", "", ""))); SDL_INFO(Hypergraph.HgConvertString, lineNum << ": len=" << words.size()); phg->clear(properties()); assert(phg->storesArcs()); assert(phg->getVocabulary()); stringToHypergraph(words, phg); }
void writeLabelsPerPosFile(std::string const& fname) { SDL_INFO(CrfDemo, "Writing labels-per-pos file '" << fname << "'"); Util::Output output(fname); for (LabelsPerPosMap::const_iterator it = labelsPerPos_.begin(); it != labelsPerPos_.end(); ++it) { *output << pVoc_->str(it->first); for (Sym labelId : it->second) { *output << "\t" << pVoc_->str(labelId); } *output << '\n'; } }
/** remove all addSymbol since last freeze (if no freeze, then all of them except whatever was permanent on vocab creation e.g. from grammar db). */ void clearSinceFreeze() { SymInt sz = size(); assert(freezeEndIndex_ <= sz); if (sz != freezeEndIndex_) { SDL_INFO(evict.Vocabulary, "Shrinking " << (SymbolType)type_ << " vocabulary from " << sz << " to " << freezeEndIndex_ << " symbols (these " << (sz - freezeEndIndex_) << " removed symbols should all be novel words seen in inputs recently processed - " "if not, call IVocabulary::freeze() to keep your permanent symbols permanent"); symbols_.shrink(freezeEndIndex_); } }
void readLabelsPerPosFile(std::string const& fname) { SDL_INFO(CrfDemo, "Reading labels-per-pos file '" << fname << "'"); Util::Input input(fname); std::string line; while (std::getline(*input, line)) { std::stringstream ss(line); std::string pos, label; ss >> pos; std::set<Sym>& s = labelsPerPos_[pVoc_->add(pos, kTerminal)]; while (ss >> label) { s.insert(pVoc_->add(label, kTerminal)); } } }
// TODO: test AdagradParameterUpdate(FloatT* params, FeatureId numParams, FloatT eta) : ParameterUpdate<FloatT>(params, numParams), eta_(eta), prevGrads_(numParams, (FloatT)0) { SDL_INFO(OnlineOptimizer, "Adagrad eta: " << eta_); }