Exemplo n.º 1
0
int main(int argc, char* argv[]) {
#ifdef TESTING_

cases.push_back(make_pair(
"3 3\n"
"111\n"
"110\n"
"101\n"
,
"4\n"
));

cases.push_back(make_pair(
"5 10\n"
"1011011111\n"
"0111111110\n"
"1111111111\n"
"1011111111\n"
"1101110111\n"
,
"21\n"
));

cases.push_back(make_pair(
"3 3\n"
"111\n"
"111\n"
"111\n"
,
"9\n"
));

cases.push_back(make_pair(
"7 7\n"
"1101101\n"
"1111110\n"
"1010100\n"
"0011100\n"
"1000010\n"
"1100111\n"
"1001110\n"
,
"6\n"
));

cases.push_back(make_pair(
"7 7\n"
"1101101\n"
"1111110\n"
"1011100\n"
"0011100\n"
"1000010\n"
"1100111\n"
"1001110\n"
,
"9\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"10\n"
,
"1\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"00\n"
,
"0\n"
));

cases.push_back(make_pair(
"2 2\n"
"11\n"
"11\n"
,
"4\n"
));

cases.push_back(make_pair(
"1 1\n"
"1\n"
,
"1\n"
));

cases.push_back(make_pair(
"1 1\n"
"0\n"
,
"0\n"
));

cases.push_back(make_pair(
"4 4\n"
"1111\n"
"1011\n"
"1111\n"
"1111\n"
,
"8\n"
));

cases.push_back(make_pair(
"4 4\n"
"1111\n"
"1111\n"
"1111\n"
"1111\n"
,
"16\n"
));

cases.push_back(make_pair(
"2 2\n"
"00\n"
"01\n"
,
"1\n"
));
cases.push_back(make_pair(
"2 2\n"
"01\n"
"00\n"
,
"1\n"
));

cases.push_back(make_pair(
"2 2\n"
"10\n"
"00\n"
,
"1\n"
));
cases.push_back(make_pair(
"2 2\n"
"01\n"
"10\n"
,
"1\n"
));

cases.push_back(make_pair(
"4 4\n"
"1011\n"
"1111\n"
"1111\n"
"1111\n"
,
"12\n"
));

runner(cases, -1);
getchar();

#else

ifstream is("INPUT.TXT");
ofstream os("OUTPUT.TXT");
solve(is, os);

#endif

}
Exemplo n.º 2
0
void ApParser::train(SentenceReader* sentenceReader, char const* modelFile)
{
  WordIndex		labelIndex;
  vector<string>	labels;

  vector<string>	predLabels;

  // collect events
  list<Tanl::Classifier::Event*>	events;

  WordCounts		predCount; // count predicate occurrences
  int evCount = 0;
  Tanl::Classifier::PID pID = 1;		// leave 0 for bias
  // create inverted index of predicate names
  // used to create vector of pIDs
  EventStream eventStream(sentenceReader, &info);
  while (eventStream.hasNext()) {
    Tanl::Classifier::Event* ev = eventStream.next();
    events.push_back(ev);
    evCount++;		      // count them explicitly, since size() is costly
    if (config.verbose) {
      if (evCount % 10000 == 0)
	cerr << '+' << flush;
      else if (evCount % 1000 == 0)
	cerr << '.' << flush;
    }
    vector<string>& ec = ev->features; // ec = {p1, ... , pn}
    for (unsigned j = 0; j < ec.size(); j++) {
      string& pred = ec[j];
      // decide whether to retain it (# occurrences > cutoff)
      if (predIndex.find(pred.c_str()) == predIndex.end()) {
	// not yet among those retained
	WordCounts::iterator wcit = predCount.find(pred);
	// increment # of occurrences
	int count;
	if (wcit == predCount.end())
	  count = predCount[pred] = 1;
	else
	  count = ++wcit->second;
	if (count >= config.featureCutoff) {
	  predLabels.push_back(pred); // accept it into predLabels
	  predIndex[pred.c_str()] = pID++;
	  predCount.erase(pred);
	}
      }
    }
  }
  if (config.verbose)
    cerr << endl;

  // build cases
  Cases cases;
  cases.reserve(evCount);
  int n = 0;
  Tanl::Classifier::ClassID oID = 0;
  while (!events.empty()) {
    Tanl::Classifier::Event* ev = events.front();
    events.pop_front();
    cases.push_back(Case());
    X& x = cases[n].first;	// features
    // add features
    vector<string>& ec = ev->features; // ec = {p1, ... , pn}
    char const* c = ev->className.c_str();
    for (unsigned j = 0; j < ec.size(); j++) {
      string& pred = ec[j];
      WordIndex::const_iterator pit = predIndex.find(pred.c_str());
      if (pit != predIndex.end()) {
	x.push_back(pit->second);
      }
    }
    if (x.size()) {
      if (labelIndex.find(c) == labelIndex.end()) {
	labelIndex[c] = oID++;
	labels.push_back(c);
      }
      cases[n].second = labelIndex[c];
      n++;
      if (config.verbose) {
	if (n % 10000 == 0)
	  cerr << '+' << flush;
	else if (n % 1000 == 0)
	  cerr << '.' << flush;
      }
      x.push_back(0);		// bias
    }
    delete ev;
  }
  cases.resize(n);
  if (config.verbose)
    cerr << endl;

  int predSize = predLabels.size();
  predSize++;			// bias
  APSV ap(labels.size(), predSize);
  
  ofstream ofs(modelFile, ios::binary | ios::trunc);
  // dump configuration settings
  config.writeHeader(ofs);
  // dump labels
  ofs << labels.size() << endl;
  FOR_EACH (vector<string>, labels, pit)
    ofs << *pit << endl;
  // dump predLabels
  ofs << predLabels.size() << endl;
  FOR_EACH (vector<string>, predLabels, pit)
    ofs << *pit << endl;
  // free memory
  predIndex.clear();
  WordIndex().swap(predIndex); // STL map do not deallocate. resize(0) has no effect
  labelIndex.clear();
  WordIndex().swap(labelIndex);
  // clear memory for unfrequent entities
  info.clearRareEntities();
  // perform training
  ap.train(cases, iter);
  // dump parameters
  ap.save(ofs);
  // dump global info
  info.save(ofs);
}