void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); vector<Instance> trainInsts, devInsts, testInsts; static vector<Instance> decodeInstResults; static Instance curDecodeInst; bool bCurIterBetter = false; m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); //Ensure that each file in m_options.testFiles exists! vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance); } //std::cout << "Training example number: " << trainInsts.size() << std::endl; //std::cout << "Dev example number: " << trainInsts.size() << std::endl; //std::cout << "Test example number: " << trainInsts.size() << std::endl; createAlphabet(trainInsts); if (!m_options.wordEmbFineTune) { addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestWordAlpha(otherInsts[idx]); } cout << "Remain words num: " << m_wordAlphabet.size() << endl; } NRMat<dtype> wordEmb; if (wordEmbFile != "") { readWordEmbeddings(wordEmbFile, wordEmb); } else { wordEmb.resize(m_wordAlphabet.size(), m_options.wordEmbSize); wordEmb.randu(1000); } NRVec<NRMat<dtype> > tagEmbs(m_tagAlphabets.size()); for (int idx = 0; idx < tagEmbs.size(); idx++) { tagEmbs[idx].resize(m_tagAlphabets[idx].size(), m_options.tagEmbSize); tagEmbs[idx].randu(1002 + idx); } m_classifier.init(m_labelAlphabet.size(), m_featAlphabet.size()); m_classifier.setDropValue(m_options.dropProb); vector<Example> trainExamples, devExamples, testExamples; initialExamples(trainInsts, trainExamples); initialExamples(devInsts, devExamples); initialExamples(testInsts, testExamples); vector<int> otherInstNums(otherInsts.size()); vector<vector<Example> > otherExamples(otherInsts.size()); for (int idx = 0; idx < otherInsts.size(); idx++) { initialExamples(otherInsts[idx], otherExamples[idx]); otherInstNums[idx] = otherExamples[idx].size(); } dtype bestDIS = 0; int inputSize = trainExamples.size(); int batchBlock = inputSize / m_options.batchSize; if (inputSize % m_options.batchSize != 0) batchBlock++; srand(0); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; static vector<Example> subExamples; int devNum = devExamples.size(), testNum = testExamples.size(); for (int iter = 0; iter < m_options.maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; random_shuffle(indexes.begin(), indexes.end()); eval.reset(); for (int updateIter = 0; updateIter < batchBlock; updateIter++) { subExamples.clear(); int start_pos = updateIter * m_options.batchSize; int end_pos = (updateIter + 1) * m_options.batchSize; if (end_pos > inputSize) end_pos = inputSize; for (int idy = start_pos; idy < end_pos; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * batchBlock + updateIter; dtype cost = m_classifier.process(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((curUpdateIter + 1) % m_options.verboseIter == 0) { //m_classifier.checkgrads(subExamples, curUpdateIter+1); std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { vector<string> result_labels; predict(devExamples[idx].m_features, result_labels, devInsts[idx].words); if (m_options.seg) devInsts[idx].SegEvaluate(result_labels, metric_dev); else devInsts[idx].Evaluate(result_labels, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { vector<string> result_labels; predict(testExamples[idx].m_features, result_labels, testInsts[idx].words); if (m_options.seg) testInsts[idx].SegEvaluate(result_labels, metric_test); else testInsts[idx].Evaluate(result_labels, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { vector<string> result_labels; predict(otherExamples[idx][idy].m_features, result_labels, otherInsts[idx][idy].words); if (m_options.seg) otherInsts[idx][idy].SegEvaluate(result_labels, metric_test); else otherInsts[idx][idy].Evaluate(result_labels, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_labels); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if (!m_options.outBest.empty() && bCurIterBetter) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); writeModelFile(modelFile); } } // Clear gradients } }
void Labeler::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile, const string& wordEmbFile, const string& charEmbFile) { if (optionFile != "") m_options.load(optionFile); m_options.showOptions(); m_linearfeat = 0; vector<Instance> trainInsts, devInsts, testInsts; static vector<Instance> decodeInstResults; static Instance curDecodeInst; bool bCurIterBetter = false; m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); if (devFile != "") m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); if (testFile != "") m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); //Ensure that each file in m_options.testFiles exists! vector<vector<Instance> > otherInsts(m_options.testFiles.size()); for (int idx = 0; idx < m_options.testFiles.size(); idx++) { m_pipe.readInstances(m_options.testFiles[idx], otherInsts[idx], m_options.maxInstance); } //std::cout << "Training example number: " << trainInsts.size() << std::endl; //std::cout << "Dev example number: " << trainInsts.size() << std::endl; //std::cout << "Test example number: " << trainInsts.size() << std::endl; createAlphabet(trainInsts); if (!m_options.wordEmbFineTune) { addTestWordAlpha(devInsts); addTestWordAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestWordAlpha(otherInsts[idx]); } cout << "Remain words num: " << m_textWordAlphabet.size() << endl; } if (!m_options.charEmbFineTune) { addTestCharAlpha(devInsts); addTestCharAlpha(testInsts); for (int idx = 0; idx < otherInsts.size(); idx++) { addTestCharAlpha(otherInsts[idx]); } cout << "Remain char num: " << m_charAlphabet.size() << endl; } NRMat<double> wordEmb; if (wordEmbFile != "") { readWordEmbeddings(wordEmbFile, wordEmb); } else { wordEmb.resize(m_textWordAlphabet.size(), m_options.wordEmbSize); wordEmb.randu(1000); } NRMat<double> charEmb; if (charEmbFile != "") { readWordEmbeddings(charEmbFile, charEmb); } else { charEmb.resize(m_charAlphabet.size(), m_options.charEmbSize); charEmb.randu(1001); } m_classifier.init(wordEmb, m_options.wordcontext, charEmb, m_options.charcontext, m_headWordAlphabet.size(), m_options.wordHiddenSize, m_options.charHiddenSize, m_options.hiddenSize); m_classifier.resetRemove(m_options.removePool, m_options.removeCharPool); m_classifier.setDropValue(m_options.dropProb); m_classifier.setWordEmbFinetune(m_options.wordEmbFineTune, m_options.charEmbFineTune); vector<Example> trainExamples, devExamples, testExamples; initialExamples(trainInsts, trainExamples); initialExamples(devInsts, devExamples); initialExamples(testInsts, testExamples); vector<int> otherInstNums(otherInsts.size()); vector<vector<Example> > otherExamples(otherInsts.size()); for (int idx = 0; idx < otherInsts.size(); idx++) { initialExamples(otherInsts[idx], otherExamples[idx]); otherInstNums[idx] = otherExamples[idx].size(); } double bestDIS = 0; int inputSize = trainExamples.size(); srand(0); std::vector<int> indexes; for (int i = 0; i < inputSize; ++i) indexes.push_back(i); static Metric eval, metric_dev, metric_test; static vector<Example> subExamples; int devNum = devExamples.size(), testNum = testExamples.size(); int maxIter = m_options.maxIter; if (m_options.batchSize > 1) maxIter = m_options.maxIter * (inputSize / m_options.batchSize + 1); double cost = 0.0; std::cout << "maxIter = " << maxIter << std::endl; for (int iter = 0; iter < m_options.maxIter; ++iter) { std::cout << "##### Iteration " << iter << std::endl; eval.reset(); if (m_options.batchSize == 1) { random_shuffle(indexes.begin(), indexes.end()); for (int updateIter = 0; updateIter < inputSize; updateIter++) { subExamples.clear(); int start_pos = updateIter; int end_pos = (updateIter + 1); if (end_pos > inputSize) end_pos = inputSize; for (int idy = start_pos; idy < end_pos; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * inputSize + updateIter; cost = m_classifier.process(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; if ((curUpdateIter + 1) % m_options.verboseIter == 0) { //m_classifier.checkgrads(subExamples, curUpdateIter+1); std::cout << "current: " << updateIter + 1 << ", total instances: " << inputSize << std::endl; std::cout << "Cost = " << cost << ", SA Correct(%) = " << eval.getAccuracy() << std::endl; } m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } } else { cost = 0.0; for (int updateIter = 0; updateIter < m_options.verboseIter; updateIter++) { random_shuffle(indexes.begin(), indexes.end()); subExamples.clear(); for (int idy = 0; idy < m_options.batchSize; idy++) { subExamples.push_back(trainExamples[indexes[idy]]); } int curUpdateIter = iter * m_options.verboseIter + updateIter; cost += m_classifier.process(subExamples, curUpdateIter); //m_classifier.checkgrads(subExamples, curUpdateIter); eval.overall_label_count += m_classifier._eval.overall_label_count; eval.correct_label_count += m_classifier._eval.correct_label_count; m_classifier.updateParams(m_options.regParameter, m_options.adaAlpha, m_options.adaEps); } std::cout << "current iter: " << iter + 1 << ", total iter: " << maxIter << std::endl; std::cout << "Cost = " << cost << ", SA Correct(%) = " << eval.getAccuracy() << std::endl; } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { string result_label; double confidence = predict(devExamples[idx].m_features, result_label); devInsts[idx].Evaluate(result_label, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if ((!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS)) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { string result_label; double confidence = predict(testExamples[idx].m_features, result_label); testInsts[idx].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { string result_label; double confidence = predict(otherExamples[idx][idy].m_features, result_label); otherInsts[idx][idy].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if ((m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS)) { if (metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); } writeModelFile(modelFile); } } // Clear gradients } if (devNum > 0) { bCurIterBetter = false; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_dev.reset(); for (int idx = 0; idx < devExamples.size(); idx++) { string result_label; double confidence = predict(devExamples[idx].m_features, result_label); devInsts[idx].Evaluate(result_label, metric_dev); if (!m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(devInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } metric_dev.print(); if ((!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS)) { m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); bCurIterBetter = true; } if (testNum > 0) { if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idx = 0; idx < testExamples.size(); idx++) { string result_label; double confidence = predict(testExamples[idx].m_features, result_label); testInsts[idx].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(testInsts[idx]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); } } for (int idx = 0; idx < otherExamples.size(); idx++) { std::cout << "processing " << m_options.testFiles[idx] << std::endl; if (!m_options.outBest.empty()) decodeInstResults.clear(); metric_test.reset(); for (int idy = 0; idy < otherExamples[idx].size(); idy++) { string result_label; double confidence = predict(otherExamples[idx][idy].m_features, result_label); otherInsts[idx][idy].Evaluate(result_label, metric_test); if (bCurIterBetter && !m_options.outBest.empty()) { curDecodeInst.copyValuesFrom(otherInsts[idx][idy]); curDecodeInst.assignLabel(result_label, confidence); decodeInstResults.push_back(curDecodeInst); } } std::cout << "test:" << std::endl; metric_test.print(); if ((!m_options.outBest.empty() && bCurIterBetter)) { m_pipe.outputAllInstances(m_options.testFiles[idx] + m_options.outBest, decodeInstResults); } } if ((m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS)) { if (metric_dev.getAccuracy() > bestDIS) { std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; bestDIS = metric_dev.getAccuracy(); } writeModelFile(modelFile); } } else { writeModelFile(modelFile); } }