void runner(Cases& cases, int i) { if (i != -1) run(i, cases[i].first, cases[i].second); else { for (int i = 0; i < (int)cases.size(); ++i) run(i, cases[i].first, cases[i].second); } }
// train(S, T) train M on S, T times void PA::train(Cases& cases, int T) { unsigned _S_ = cases.size(); vector<int> perm(_S_); for (int t = 0; t < T; ++t) { rand_permutation(perm); int updates = 0; for (int i = 0; i < _S_; ++i) { Case& c = cases[perm[i]]; X& xt = c.first; Y yt = c.second; // Find the highest ranked irrelevant label s. // The lowest ranked relevant label r = yt (there is just one). int s = -1; Float score_r = score(yt, xt); Float score_s = 0.0; for (int l = 0; l < k; ++l) { if (l != yt) { // xi in Yt Float xs = score(l, xt); if (xs > score_r && xs > score_s) { s = l; score_s = xs; } } } // update r and s rows Float loss = margin - (score_r - score_s); // special handling of most popular label (S). if (yt == 0) loss = 1.0 - (score_r - score_s); if (loss > 0.0) { updates++; int xtNormSq = xt.size(); Float tau = loss / (2 * xtNormSq); update(yt, tau, xt); if (s >= 0) update(s, -tau, xt); } } float updPercent = (100.0*updates) / _S_; if (verbose) cerr << "\tupds_" << t << " = " << updates << " (" << updPercent << "%)" << endl; if (updates == 0 || updPercent < updatePercent) break; # ifdef DEBUG save(cerr, true); # endif } }
void TypeChecker::after(typcase_class *node) { map<Symbol, int> branch_declared_types; vector<Symbol> branch_expression_types; Cases cases = node->get_cases(); cases->traverse([this, &branch_expression_types, &branch_declared_types](Case case_) { Symbol declared_type = case_->get_type_decl(); if (branch_declared_types[declared_type] == 1) { semant_error.semant_error(type_env.current_class, case_) << "Branch with type '" << declared_type << "' declared multiple times in the same case statement" << endl; } else { branch_declared_types[declared_type] = 1; } if (case_->get_expr()->is_empty()) { return; } Symbol case_expr_type = case_->get_expr()->get_type(); if (!case_expr_type) { return; } branch_expression_types.push_back(case_expr_type); }); node->set_type(type_env.class_table.join(branch_expression_types)); }
int main(int argc, char* argv[]) { #ifdef TESTING_ cases.push_back(make_pair( "3 3\n" "111\n" "110\n" "101\n" , "4\n" )); cases.push_back(make_pair( "5 10\n" "1011011111\n" "0111111110\n" "1111111111\n" "1011111111\n" "1101110111\n" , "21\n" )); cases.push_back(make_pair( "3 3\n" "111\n" "111\n" "111\n" , "9\n" )); cases.push_back(make_pair( "7 7\n" "1101101\n" "1111110\n" "1010100\n" "0011100\n" "1000010\n" "1100111\n" "1001110\n" , "6\n" )); cases.push_back(make_pair( "7 7\n" "1101101\n" "1111110\n" "1011100\n" "0011100\n" "1000010\n" "1100111\n" "1001110\n" , "9\n" )); cases.push_back(make_pair( "2 2\n" "00\n" "10\n" , "1\n" )); cases.push_back(make_pair( "2 2\n" "00\n" "00\n" , "0\n" )); cases.push_back(make_pair( "2 2\n" "11\n" "11\n" , "4\n" )); cases.push_back(make_pair( "1 1\n" "1\n" , "1\n" )); cases.push_back(make_pair( "1 1\n" "0\n" , "0\n" )); cases.push_back(make_pair( "4 4\n" "1111\n" "1011\n" "1111\n" "1111\n" , "8\n" )); cases.push_back(make_pair( "4 4\n" "1111\n" "1111\n" "1111\n" "1111\n" , "16\n" )); cases.push_back(make_pair( "2 2\n" "00\n" "01\n" , "1\n" )); cases.push_back(make_pair( "2 2\n" "01\n" "00\n" , "1\n" )); cases.push_back(make_pair( "2 2\n" "10\n" "00\n" , "1\n" )); cases.push_back(make_pair( "2 2\n" "01\n" "10\n" , "1\n" )); cases.push_back(make_pair( "4 4\n" "1011\n" "1111\n" "1111\n" "1111\n" , "12\n" )); runner(cases, -1); getchar(); #else ifstream is("INPUT.TXT"); ofstream os("OUTPUT.TXT"); solve(is, os); #endif }
void ApParser::train(SentenceReader* sentenceReader, char const* modelFile) { WordIndex labelIndex; vector<string> labels; vector<string> predLabels; // collect events list<Tanl::Classifier::Event*> events; WordCounts predCount; // count predicate occurrences int evCount = 0; Tanl::Classifier::PID pID = 1; // leave 0 for bias // create inverted index of predicate names // used to create vector of pIDs EventStream eventStream(sentenceReader, &info); while (eventStream.hasNext()) { Tanl::Classifier::Event* ev = eventStream.next(); events.push_back(ev); evCount++; // count them explicitly, since size() is costly if (config.verbose) { if (evCount % 10000 == 0) cerr << '+' << flush; else if (evCount % 1000 == 0) cerr << '.' << flush; } vector<string>& ec = ev->features; // ec = {p1, ... , pn} for (unsigned j = 0; j < ec.size(); j++) { string& pred = ec[j]; // decide whether to retain it (# occurrences > cutoff) if (predIndex.find(pred.c_str()) == predIndex.end()) { // not yet among those retained WordCounts::iterator wcit = predCount.find(pred); // increment # of occurrences int count; if (wcit == predCount.end()) count = predCount[pred] = 1; else count = ++wcit->second; if (count >= config.featureCutoff) { predLabels.push_back(pred); // accept it into predLabels predIndex[pred.c_str()] = pID++; predCount.erase(pred); } } } } if (config.verbose) cerr << endl; // build cases Cases cases; cases.reserve(evCount); int n = 0; Tanl::Classifier::ClassID oID = 0; while (!events.empty()) { Tanl::Classifier::Event* ev = events.front(); events.pop_front(); cases.push_back(Case()); X& x = cases[n].first; // features // add features vector<string>& ec = ev->features; // ec = {p1, ... , pn} char const* c = ev->className.c_str(); for (unsigned j = 0; j < ec.size(); j++) { string& pred = ec[j]; WordIndex::const_iterator pit = predIndex.find(pred.c_str()); if (pit != predIndex.end()) { x.push_back(pit->second); } } if (x.size()) { if (labelIndex.find(c) == labelIndex.end()) { labelIndex[c] = oID++; labels.push_back(c); } cases[n].second = labelIndex[c]; n++; if (config.verbose) { if (n % 10000 == 0) cerr << '+' << flush; else if (n % 1000 == 0) cerr << '.' << flush; } x.push_back(0); // bias } delete ev; } cases.resize(n); if (config.verbose) cerr << endl; int predSize = predLabels.size(); predSize++; // bias APSV ap(labels.size(), predSize); ofstream ofs(modelFile, ios::binary | ios::trunc); // dump configuration settings config.writeHeader(ofs); // dump labels ofs << labels.size() << endl; FOR_EACH (vector<string>, labels, pit) ofs << *pit << endl; // dump predLabels ofs << predLabels.size() << endl; FOR_EACH (vector<string>, predLabels, pit) ofs << *pit << endl; // free memory predIndex.clear(); WordIndex().swap(predIndex); // STL map do not deallocate. resize(0) has no effect labelIndex.clear(); WordIndex().swap(labelIndex); // clear memory for unfrequent entities info.clearRareEntities(); // perform training ap.train(cases, iter); // dump parameters ap.save(ofs); // dump global info info.save(ofs); }