void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); vector<Instance> trainInsts, devInsts, testInsts; static vector<Instance> decodeInstResults; static Instance curDecodeInst; bool bCurIterBetter = false; m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); //Ensure that each file in m_options.testFiles exists! vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance); } //std::cout << "Training example number: " << trainInsts.size() << std::endl; //std::cout << "Dev example number: " << trainInsts.size() << std::endl; //std::cout << "Test example number: " << trainInsts.size() << std::endl; createAlphabet(trainInsts); if (!m_options.wordEmbFineTune) { addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestWordAlpha(otherInsts[idx]); } cout << "Remain words num: " << m_wordAlphabet.size() << endl; } NRMat<dtype> wordEmb; if (wordEmbFile != "") { readWordEmbeddings(wordEmbFile, wordEmb); } else { wordEmb.resize(m_wordAlphabet.size(), m_options.wordEmbSize); wordEmb.randu(1000); } NRVec<NRMat<dtype> > tagEmbs(m_tagAlphabets.size()); for (int idx = 0; idx < tagEmbs.size(); idx++) { tagEmbs[idx].resize(m_tagAlphabets[idx].size(), m_options.tagEmbSize); tagEmbs[idx].randu(1002 + idx); } m_classifier.init(m_labelAlphabet.size(), m_featAlphabet.size()); m_classifier.setDropValue(m_options.dropProb); vector<Example> trainExamples, devExamples, testExamples; initialExamples(trainInsts, trainExamples); initialExamples(devInsts, devExamples); initialExamples(testInsts, testExamples); vector<int> otherInstNums(otherInsts.size()); vector<vector<Example> > otherExamples(otherInsts.size()); for (int idx = 0; idx < otherInsts.size(); idx++) { initialExamples(otherInsts[idx], otherExamples[idx]); otherInstNums[idx] = otherExamples[idx].size(); } dtype bestDIS = 0; int inputSize = trainExamples.size(); int batchBlock = inputSize / m_options.batchSize; if (inputSize % m_options.batchSize != 0) batchBlock++; srand(0); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; static vector<Example> subExamples; int devNum = devExamples.size(), testNum = testExamples.size(); for (int iter = 0; iter < m_options.maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; random_shuffle(indexes.begin(), indexes.end()); eval.reset(); for (int updateIter = 0; updateIter < batchBlock; updateIter++) { subExamples.clear(); int start_pos = updateIter * m_options.batchSize; int end_pos = (updateIter + 1) * m_options.batchSize; if (end_pos > inputSize) end_pos = inputSize; for (int idy = start_pos; idy < end_pos; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * batchBlock + updateIter; dtype cost = m_classifier.process(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((curUpdateIter + 1) % m_options.verboseIter == 0) { //m_classifier.checkgrads(subExamples, curUpdateIter+1); std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { vector<string> result_labels; predict(devExamples[idx].m_features, result_labels, devInsts[idx].words); if (m_options.seg) devInsts[idx].SegEvaluate(result_labels, metric_dev); else devInsts[idx].Evaluate(result_labels, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { vector<string> result_labels; predict(testExamples[idx].m_features, result_labels, testInsts[idx].words); if (m_options.seg) testInsts[idx].SegEvaluate(result_labels, metric_test); else testInsts[idx].Evaluate(result_labels, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { vector<string> result_labels; predict(otherExamples[idx][idy].m_features, result_labels, otherInsts[idx][idy].words); if (m_options.seg) otherInsts[idx][idy].SegEvaluate(result_labels, metric_test); else otherInsts[idx][idy].Evaluate(result_labels, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); writeModelFile(modelFile); } } // Clear gradients } }
void Segmentor::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile, const string& charEmbFile, const string& bicharEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); vector<Instance> trainInsts, devInsts, testInsts; m_pipe.readInstances(trainFile, trainInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance); vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_classifier.MAX_SENTENCE_SIZE - 2, m_options.maxInstance); } createAlphabet(trainInsts); addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); NRMat<dtype> wordEmb, allwordEmb; if (wordEmbFile != "") { allWordAlphaEmb(wordEmbFile, allwordEmb); } else { std::cout << "must not be here, allword must be pretrained." << std::endl; } wordEmb.resize(m_classifier.fe._wordAlphabet.size(), m_options.wordEmbSize); wordEmb.randu(1000); cout << "word emb dim is " << wordEmb.ncols() << std::endl; NRMat<dtype> charEmb; if (charEmbFile != "") { readEmbeddings(m_classifier.fe._charAlphabet, charEmbFile, charEmb); } else { charEmb.resize(m_classifier.fe._charAlphabet.size(), m_options.charEmbSize); charEmb.randu(2000); } cout << "char emb dim is " << charEmb.ncols() << std::endl; NRMat<dtype> bicharEmb; if (bicharEmbFile != "") { readEmbeddings(m_classifier.fe._bicharAlphabet, bicharEmbFile, bicharEmb); } else { bicharEmb.resize(m_classifier.fe._bicharAlphabet.size(), m_options.bicharEmbSize); bicharEmb.randu(2000); } cout << "bichar emb dim is " << bicharEmb.ncols() << std::endl; NRMat<dtype> actionEmb; actionEmb.resize(m_classifier.fe._actionAlphabet.size(), m_options.actionEmbSize); actionEmb.randu(3000); cout << "action emb dim is " << actionEmb.ncols() << std::endl; NRMat<dtype> lengthEmb; lengthEmb.resize(6, m_options.lengthEmbSize); lengthEmb.randu(3000); cout << "length emb dim is " << actionEmb.ncols() << std::endl; m_classifier.init(wordEmb, allwordEmb, lengthEmb, m_options.wordNgram, m_options.wordHiddenSize, m_options.wordRNNHiddenSize, charEmb, bicharEmb, m_options.charcontext, m_options.charHiddenSize, m_options.charRNNHiddenSize, actionEmb, m_options.actionNgram, m_options.actionHiddenSize, m_options.actionRNNHiddenSize, m_options.sepHiddenSize, m_options.appHiddenSize, m_options.delta); m_classifier.setDropValue(m_options.dropProb); m_classifier.setOOVFreq(m_options.wordCutOff); m_classifier.setOOVRatio(m_options.oovRatio); m_classifier.setWordFreq(m_word_stat); vector<vector<CAction> > trainInstGoldactions; getGoldActions(trainInsts, trainInstGoldactions); double bestFmeasure = 0; int inputSize = trainInsts.size(); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1); int oneIterMaxRound = (inputSize + m_options.batchSize -1) / m_options.batchSize; std::cout << "maxIter = " << maxIter << std::endl; int devNum = devInsts.size(), testNum = testInsts.size(); static vector<vector<string> > decodeInstResults; static vector<string> curDecodeInst; static bool bCurIterBetter; static vector<vector<string> > subInstances; static vector<vector<CAction> > subInstGoldActions; for (int iter = 0; iter < maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; srand(iter); random_shuffle(indexes.begin(), indexes.end()); std::cout << "random: " << indexes[0] << ", " << indexes[indexes.size() - 1] << std::endl; bool bEvaluate = false; if(m_options.batchSize == 1){ eval.reset(); bEvaluate = true; for (int idy = 0; idy < inputSize; idy++) { subInstances.clear(); subInstGoldActions.clear(); subInstances.push_back(trainInsts[indexes[idy]].chars); subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]); double cost = m_classifier.train(subInstances, subInstGoldActions); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((idy + 1) % (m_options.verboseIter*10) == 0) { std::cout << "current: " << idy + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps, m_options.clip); } std::cout << "current: " << iter + 1 << ", Correct(%) = " << eval.getAccuracy() << std::endl; } else{ if(iter == 0)eval.reset(); subInstances.clear(); subInstGoldActions.clear(); for (int idy = 0; idy < m_options.batchSize; idy++) { subInstances.push_back(trainInsts[indexes[idy]].chars); subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]); } double cost = m_classifier.train(subInstances, subInstGoldActions); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((iter + 1) % (m_options.verboseIter) == 0) { std::cout << "current: " << iter + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl; eval.reset(); bEvaluate = true; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps, m_options.clip); } if (bEvaluate && devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devInsts.size(); idx++) { predict(devInsts[idx], curDecodeInst); devInsts[idx].evaluate(curDecodeInst, metric_dev); if (!m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "dev:" << std::endl; metric_dev.print(); if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestFmeasure) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testInsts.size(); idx++) { predict(testInsts[idx], curDecodeInst); testInsts[idx].evaluate(curDecodeInst, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherInsts.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherInsts[idx].size(); idy++) { predict(otherInsts[idx][idy], curDecodeInst); otherInsts[idx][idy].evaluate(curDecodeInst, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestFmeasure) { std::cout << "Exceeds best previous DIS of " << bestFmeasure << ". Saving model file.." << std::endl; bestFmeasure = metric_dev.getAccuracy(); writeModelFile(modelFile); } } } }
void Segmentor::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); vector<Instance> trainInsts, devInsts, testInsts; m_pipe.readInstances(trainFile, trainInsts, m_classifier.MAX_SENTENCE_SIZE, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_classifier.MAX_SENTENCE_SIZE, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_classifier.MAX_SENTENCE_SIZE, m_options.maxInstance); vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_classifier.MAX_SENTENCE_SIZE, m_options.maxInstance); } createAlphabet(trainInsts); addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestWordAlpha(otherInsts[idx]); } m_classifier.init(); m_classifier.setDropValue(m_options.dropProb); vector<vector<CAction> > trainInstGoldactions; getGoldActions(trainInsts, trainInstGoldactions); double bestFmeasure = 0; int inputSize = trainInsts.size(); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; int maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1); int oneIterMaxRound = (inputSize + m_options.batchSize -1) / m_options.batchSize; std::cout << "maxIter = " << maxIter << std::endl; int devNum = devInsts.size(), testNum = testInsts.size(); static vector<vector<string> > decodeInstResults; static vector<string> curDecodeInst; static bool bCurIterBetter; static vector<vector<string> > subInstances; static vector<vector<CAction> > subInstGoldActions; //m_classifier.setAlphaIncreasing(true); for (int iter = 0; iter < maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; srand(iter); //random_shuffle(indexes.begin(), indexes.end()); std::cout << "random: " << indexes[0] << ", " << indexes[indexes.size() - 1] << std::endl; eval.reset(); for (int updateIter = 0; updateIter < oneIterMaxRound; updateIter++) { int start_pos = updateIter * m_options.batchSize; int end_pos = (updateIter + 1) * m_options.batchSize; if (end_pos > inputSize) end_pos = inputSize; subInstances.clear(); subInstGoldActions.clear(); for (int idy = start_pos; idy < end_pos; idy++) { subInstances.push_back(trainInsts[indexes[idy]].chars); subInstGoldActions.push_back(trainInstGoldactions[indexes[idy]]); } double cost = m_classifier.train(subInstances, subInstGoldActions); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; //if ((updateIter + 1) % (m_options.verboseIter*10) == 0) { //std::cout << "current: " << updateIter + 1 << ", Cost = " << cost << ", Correct(%) = " << eval.getAccuracy() << std::endl; //} m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } std::cout << "current: " << iter + 1 << ", Correct(%) = " << eval.getAccuracy() << std::endl; if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devInsts.size(); idx++) { predict(devInsts[idx], curDecodeInst); devInsts[idx].evaluate(curDecodeInst, metric_dev); if (!m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "dev:" << std::endl; metric_dev.print(); if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestFmeasure) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testInsts.size(); idx++) { predict(testInsts[idx], curDecodeInst); testInsts[idx].evaluate(curDecodeInst, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherInsts.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherInsts[idx].size(); idy++) { predict(otherInsts[idx][idy], curDecodeInst); otherInsts[idx][idy].evaluate(curDecodeInst, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestFmeasure) { std::cout << "Exceeds best previous DIS of " << bestFmeasure << ". Saving model file.." << std::endl; bestFmeasure = metric_dev.getAccuracy(); writeModelFile(modelFile); } } } }
void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile, const string& charEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); m_linearfeat = 0; vector<Instance> trainInsts, devInsts, testInsts; static vector<Instance> decodeInstResults; static Instance curDecodeInst; bool bCurIterBetter = false; m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); //Ensure that each file in m_options.testFiles exists! vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance); } //std::cout << "Training example number: " << trainInsts.size() << std::endl; //std::cout << "Dev example number: " << trainInsts.size() << std::endl; //std::cout << "Test example number: " << trainInsts.size() << std::endl; createAlphabet(trainInsts); if (!m_options.wordEmbFineTune) { addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestWordAlpha(otherInsts[idx]); } cout << "Remain words num: " << m_textWordAlphabet.size() << endl; } if (!m_options.charEmbFineTune) { addTestCharAlpha(devInsts); addTestCharAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestCharAlpha(otherInsts[idx]); } cout << "Remain char num: " << m_charAlphabet.size() << endl; } NRMat<double> wordEmb; if (wordEmbFile != "") { readWordEmbeddings(wordEmbFile, wordEmb); } else { wordEmb.resize(m_textWordAlphabet.size(), m_options.wordEmbSize); wordEmb.randu(1000); } NRMat<double> charEmb; if (charEmbFile != "") { readWordEmbeddings(charEmbFile, charEmb); } else { charEmb.resize(m_charAlphabet.size(), m_options.charEmbSize); charEmb.randu(1001); } m_classifier.init(wordEmb, m_options.wordcontext, charEmb, m_options.charcontext, m_headWordAlphabet.size(), m_options.wordHiddenSize, m_options.charHiddenSize, m_options.hiddenSize); m_classifier.resetRemove(m_options.removePool, m_options.removeCharPool); m_classifier.setDropValue(m_options.dropProb); m_classifier.setWordEmbFinetune(m_options.wordEmbFineTune, m_options.charEmbFineTune); vector<Example> trainExamples, devExamples, testExamples; initialExamples(trainInsts, trainExamples); initialExamples(devInsts, devExamples); initialExamples(testInsts, testExamples); vector<int> otherInstNums(otherInsts.size()); vector<vector<Example> > otherExamples(otherInsts.size()); for (int idx = 0; idx < otherInsts.size(); idx++) { initialExamples(otherInsts[idx], otherExamples[idx]); otherInstNums[idx] = otherExamples[idx].size(); } double bestDIS = 0; int inputSize = trainExamples.size(); srand(0); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; static vector<Example> subExamples; int devNum = devExamples.size(), testNum = testExamples.size(); int maxIter = m_options.maxIter; if (m_options.batchSize > 1) maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1); double cost = 0.0; std::cout << "maxIter = " << maxIter << std::endl; for (int iter = 0; iter < m_options.maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; eval.reset(); if (m_options.batchSize == 1) { random_shuffle(indexes.begin(), indexes.end()); for (int updateIter = 0; updateIter < inputSize; updateIter++) { subExamples.clear(); int start_pos = updateIter; int end_pos = (updateIter + 1); if (end_pos > inputSize) end_pos = inputSize; for (int idy = start_pos; idy < end_pos; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * inputSize + updateIter; cost = m_classifier.process(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((curUpdateIter + 1) % m_options.verboseIter == 0) { //m_classifier.checkgrads(subExamples, curUpdateIter+1); std::cout << "current: " << updateIter + 1 << ", total instances: " << inputSize << std::endl; std::cout << "Cost = " << cost << ", SA Correct(%) = " << eval.getAccuracy() << std::endl; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } } else { cost = 0.0; for (int updateIter = 0; updateIter < m_options.verboseIter; updateIter++) { random_shuffle(indexes.begin(), indexes.end()); subExamples.clear(); for (int idy = 0; idy < m_options.batchSize; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * m_options.verboseIter + updateIter; cost += m_classifier.process(subExamples, curUpdateIter); //m_classifier.checkgrads(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } std::cout << "current iter: " << iter + 1 << ", total iter: " << maxIter << std::endl; std::cout << "Cost = " << cost << ", SA Correct(%) = " << eval.getAccuracy() << std::endl; } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { string result_label; double confidence = predict(devExamples[idx].m_features, result_label); devInsts[idx].Evaluate(result_label, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if ((!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS)) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { string result_label; double confidence = predict(testExamples[idx].m_features, result_label); testInsts[idx].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { string result_label; double confidence = predict(otherExamples[idx][idy].m_features, result_label); otherInsts[idx][idy].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if ((m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS)) { if (metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); } writeModelFile(modelFile); } } // Clear gradients } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { string result_label; double confidence = predict(devExamples[idx].m_features, result_label); devInsts[idx].Evaluate(result_label, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if ((!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS)) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { string result_label; double confidence = predict(testExamples[idx].m_features, result_label); testInsts[idx].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { string result_label; double confidence = predict(otherExamples[idx][idy].m_features, result_label); otherInsts[idx][idy].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if ((m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS)) { if (metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); } writeModelFile(modelFile); } } else { writeModelFile(modelFile); } }