Beispiel #1
0
void runner(Cases& cases, int i) {
  if (i != -1) run(i, cases[i].first, cases[i].second);
  else {
    for (int i = 0; i < (int)cases.size(); ++i)
      run(i, cases[i].first, cases[i].second);
  }
}
Beispiel #2
0
// train(S, T) train M on S, T times
void PA::train(Cases& cases, int T) {
  unsigned _S_ = cases.size();
  vector<int> perm(_S_);
  for (int t = 0; t < T; ++t) {
    rand_permutation(perm);
    int updates = 0;
    for (int i = 0; i < _S_; ++i) {
      Case& c = cases[perm[i]];
      X& xt = c.first;
      Y yt = c.second;
      // Find the highest ranked irrelevant label s.
      // The lowest ranked relevant label r = yt (there is just one).
      int s = -1;
      Float score_r = score(yt, xt);
      Float score_s = 0.0;
      for (int l = 0; l < k; ++l) {
	if (l != yt) {	// xi in Yt
	  Float xs = score(l, xt);
	  if (xs > score_r && xs > score_s) {
	    s = l;
	    score_s = xs;
	  }
	}
      }
      // update r and s rows
      Float loss = margin - (score_r - score_s);
      // special handling of most popular label (S).
      if (yt == 0)
	loss = 1.0 - (score_r - score_s);
      if (loss > 0.0) {
	updates++;
	int xtNormSq = xt.size();
	Float tau = loss / (2 * xtNormSq);
	update(yt, tau, xt);
	if (s >= 0)
	  update(s, -tau, xt);
      }
    }
    float updPercent = (100.0*updates) / _S_;
    if (verbose)
      cerr << "\tupds_" << t << " = " << updates
	   << " (" << updPercent << "%)" << endl;
    if (updates == 0 || updPercent < updatePercent)
      break;
#  ifdef DEBUG
    save(cerr, true);
#  endif
  }
}
Beispiel #3
0
void TypeChecker::after(typcase_class *node) {
    map<Symbol, int> branch_declared_types;
    vector<Symbol> branch_expression_types;
    Cases cases = node->get_cases();
    cases->traverse([this, &branch_expression_types, &branch_declared_types](Case case_) {
        Symbol declared_type = case_->get_type_decl();
        if (branch_declared_types[declared_type] == 1) {
            semant_error.semant_error(type_env.current_class, case_) << "Branch with type '" << declared_type
            << "' declared multiple times in the same case statement" << endl;
        } else {
            branch_declared_types[declared_type] = 1;
        }
        if (case_->get_expr()->is_empty()) {
            return;
        }
        Symbol case_expr_type = case_->get_expr()->get_type();
        if (!case_expr_type) {
            return;
        }
        branch_expression_types.push_back(case_expr_type);

    });
    node->set_type(type_env.class_table.join(branch_expression_types));
}
Beispiel #4
0
int main(int argc, char* argv[]) {
#ifdef TESTING_

cases.push_back(make_pair(
"3 3\n"
"111\n"
"110\n"
"101\n"
,
"4\n"
));

cases.push_back(make_pair(
"5 10\n"
"1011011111\n"
"0111111110\n"
"1111111111\n"
"1011111111\n"
"1101110111\n"
,
"21\n"
));

cases.push_back(make_pair(
"3 3\n"
"111\n"
"111\n"
"111\n"
,
"9\n"
));

cases.push_back(make_pair(
"7 7\n"
"1101101\n"
"1111110\n"
"1010100\n"
"0011100\n"
"1000010\n"
"1100111\n"
"1001110\n"
,
"6\n"
));

cases.push_back(make_pair(
"7 7\n"
"1101101\n"
"1111110\n"
"1011100\n"
"0011100\n"
"1000010\n"
"1100111\n"
"1001110\n"
,
"9\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"10\n"
,
"1\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"00\n"
,
"0\n"
));

cases.push_back(make_pair(
"2 2\n"
"11\n"
"11\n"
,
"4\n"
));

cases.push_back(make_pair(
"1 1\n"
"1\n"
,
"1\n"
));

cases.push_back(make_pair(
"1 1\n"
"0\n"
,
"0\n"
));

cases.push_back(make_pair(
"4 4\n"
"1111\n"
"1011\n"
"1111\n"
"1111\n"
,
"8\n"
));

cases.push_back(make_pair(
"4 4\n"
"1111\n"
"1111\n"
"1111\n"
"1111\n"
,
"16\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"01\n"
,
"1\n"
));
cases.push_back(make_pair(
"2 2\n"
"01\n"
"00\n"
,
"1\n"
));

cases.push_back(make_pair(
"2 2\n"
"10\n"
"00\n"
,
"1\n"
));
cases.push_back(make_pair(
"2 2\n"
"01\n"
"10\n"
,
"1\n"
));

cases.push_back(make_pair(
"4 4\n"
"1011\n"
"1111\n"
"1111\n"
"1111\n"
,
"12\n"
));

runner(cases, -1);
getchar();

#else

ifstream is("INPUT.TXT");
ofstream os("OUTPUT.TXT");
solve(is, os);

#endif

}
Beispiel #5
0
void ApParser::train(SentenceReader* sentenceReader, char const* modelFile)
{
  WordIndex		labelIndex;
  vector<string>	labels;

  vector<string>	predLabels;

  // collect events
  list<Tanl::Classifier::Event*>	events;

  WordCounts		predCount; // count predicate occurrences
  int evCount = 0;
  Tanl::Classifier::PID pID = 1;		// leave 0 for bias
  // create inverted index of predicate names
  // used to create vector of pIDs
  EventStream eventStream(sentenceReader, &info);
  while (eventStream.hasNext()) {
    Tanl::Classifier::Event* ev = eventStream.next();
    events.push_back(ev);
    evCount++;		      // count them explicitly, since size() is costly
    if (config.verbose) {
      if (evCount % 10000 == 0)
	cerr << '+' << flush;
      else if (evCount % 1000 == 0)
	cerr << '.' << flush;
    }
    vector<string>& ec = ev->features; // ec = {p1, ... , pn}
    for (unsigned j = 0; j < ec.size(); j++) {
      string& pred = ec[j];
      // decide whether to retain it (# occurrences > cutoff)
      if (predIndex.find(pred.c_str()) == predIndex.end()) {
	// not yet among those retained
	WordCounts::iterator wcit = predCount.find(pred);
	// increment # of occurrences
	int count;
	if (wcit == predCount.end())
	  count = predCount[pred] = 1;
	else
	  count = ++wcit->second;
	if (count >= config.featureCutoff) {
	  predLabels.push_back(pred); // accept it into predLabels
	  predIndex[pred.c_str()] = pID++;
	  predCount.erase(pred);
	}
      }
    }
  }
  if (config.verbose)
    cerr << endl;

  // build cases
  Cases cases;
  cases.reserve(evCount);
  int n = 0;
  Tanl::Classifier::ClassID oID = 0;
  while (!events.empty()) {
    Tanl::Classifier::Event* ev = events.front();
    events.pop_front();
    cases.push_back(Case());
    X& x = cases[n].first;	// features
    // add features
    vector<string>& ec = ev->features; // ec = {p1, ... , pn}
    char const* c = ev->className.c_str();
    for (unsigned j = 0; j < ec.size(); j++) {
      string& pred = ec[j];
      WordIndex::const_iterator pit = predIndex.find(pred.c_str());
      if (pit != predIndex.end()) {
	x.push_back(pit->second);
      }
    }
    if (x.size()) {
      if (labelIndex.find(c) == labelIndex.end()) {
	labelIndex[c] = oID++;
	labels.push_back(c);
      }
      cases[n].second = labelIndex[c];
      n++;
      if (config.verbose) {
	if (n % 10000 == 0)
	  cerr << '+' << flush;
	else if (n % 1000 == 0)
	  cerr << '.' << flush;
      }
      x.push_back(0);		// bias
    }
    delete ev;
  }
  cases.resize(n);
  if (config.verbose)
    cerr << endl;

  int predSize = predLabels.size();
  predSize++;			// bias
  APSV ap(labels.size(), predSize);
  
  ofstream ofs(modelFile, ios::binary | ios::trunc);
  // dump configuration settings
  config.writeHeader(ofs);
  // dump labels
  ofs << labels.size() << endl;
  FOR_EACH (vector<string>, labels, pit)
    ofs << *pit << endl;
  // dump predLabels
  ofs << predLabels.size() << endl;
  FOR_EACH (vector<string>, predLabels, pit)
    ofs << *pit << endl;
  // free memory
  predIndex.clear();
  WordIndex().swap(predIndex); // STL map do not deallocate. resize(0) has no effect
  labelIndex.clear();
  WordIndex().swap(labelIndex);
  // clear memory for unfrequent entities
  info.clearRareEntities();
  // perform training
  ap.train(cases, iter);
  // dump parameters
  ap.save(ofs);
  // dump global info
  info.save(ofs);
}