Exemple #1
0
void
goThroughSents(InputTree* trainingData[1301], int sc)
{
  int sentenceCount;
  for(sentenceCount = 0 ; sentenceCount < sc ; sentenceCount++)
    {
      InputTree* par = trainingData[sentenceCount];
      //if(sentenceCount%50 == 1)
      //cerr << sentenceCount << endl;
      makeSent(par);
      gatherFfCounts(par,0);
      if(whichInt == TTCALC)
	{
	  list<InputTree*> dummy2;
	  InputTree stopInputTree(par->finish(),par->finish(),
				  whichInt==TTCALC ? "" : "^^",
				  "STOP","",
				  dummy2,NULL,NULL);
	  stopInputTree.headTree() = &stopInputTree;
	  TreeHist treeh(&stopInputTree,0);
	  treeh.hpos = 0;
	  callProcG(&treeh);
	}
    }
}
int
tree_effEnd(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    int pos = tree->finish();
    bool ans;
    if(pos > endPos)
    {
        cout << "Pos > endPos" << endl;
        ans = 0;
    }
    else if(pos == endPos) ans = 1;
    else
    {
        ECString wrd = sentence[pos]->word();
        ECString trm = sentence[pos]->term();
        if(trm == "." || wrd == ";") ans = 1;
        else if((pos+2) > endPos) ans = 0;
        else if(wrd == ",")
        {
            if(sentence[pos+1]->word() == "''")
                ans = 1; // ,'' acts like end of sentence;
            else ans = 0;  //ans = 2 for alt version???
        }
        else ans = 0;
    }
    return ans;
}
int
tree_size(TreeHist* treeh)
{
    static int bucs[9] = {1, 3, 6, 10, 15, 21, 28, 36, 999};
    InputTree* tree = treeh->tree;
    int sz = tree->finish() - tree->start();
    for(int i = 0 ; i < 9 ; i++)
        if(sz <= bucs[i]) return i;
    assert("Never get here");
    return -1;
}
int
tree_B(TreeHist* treeh, int blInd)
{
    InputTree* tree = treeh->tree;
    int i;
    int pos = treeh->pos;
    int hpos = treeh->hpos;
    //cerr << "tb1 " << pos << " " << hpos << " " << *tree << endl;
    int sz = tree->subTrees().size();
    int wpos;
    assert(pos <= sz);
    //cerr << "tb " << pos << " " << hpos << " " << sz << endl;
    if(pos < 0) wpos = tree->start()-1;
    else if(sz == 0) wpos = tree->start()-1;
    else if(pos == sz) wpos = tree->finish();
    else
    {
        InputTreesIter iti = tree->subTrees().begin();
        i = 0;
        for( ; iti != tree->subTrees().end() ; iti++)
        {
            if(i < pos) {
                i++;
                continue;
            }
            InputTree* st = *iti;
            if(pos < hpos) wpos = st->start()-1;
            else if(pos > hpos) wpos = st->finish();
            else if(blInd) wpos = st->start()-1;
            else wpos = st->finish();
            //cerr << "tbf " << *st << " " << wpos << endl;
            break;
        }
    }
    //cerr << "tb2 " << wpos << endl;
    assert(wpos <= endPos);
    if(wpos < 0 || wpos == endPos) return Term::stopTerm->toInt();
    else return Term::get(sentence[wpos]->term())->toInt();
}
int
main(int argc, char *argv[])
{
   struct rlimit 	core_limits;
   core_limits.rlim_cur = 0;
   core_limits.rlim_max = 0;
   setrlimit( RLIMIT_CORE, &core_limits );

   ECArgs args( argc, argv );
   assert(args.nargs() == 2);
   if(args.isset('N')) numGram = atoi(args.value('N').c_str());
   Feature::setLM();
   if(args.isset('L')) Term::Language = args.value('L');
   string  path( args.arg( 1 ) );
   if(Term::Language == "Ch") readHeadInfoCh(path);
   else readHeadInfo(path);

   string  conditionedType( args.arg(0) );
   cerr << "start kn3Counts " <<  conditionedType << endl;
   int minCount = 1;
   if(args.isset('m')) minCount = atoi(args.value('m').c_str());
   Feat::Usage = KNCOUNTS;
   FeatureTree::minCount = minCount;

   Term::init(path);
   readHeadInfo(path);
   Pst pst(path);
   addSubFeatureFns();

   Feature::assignCalc(conditionedType);
       
   FeatureTree::root() = new FeatureTree();
   Feature::init(path, conditionedType);
   int wI = Feature::whichInt;
   int ceFunInt = Feature::conditionedFeatureInt[wI];

   Feature::conditionedEvent
     = SubFeature::Funs[ceFunInt];
   string trainingString( path );

   int sentenceCount = 0;
   for( ; ; sentenceCount++)
     {
       if(sentenceCount%10000 == 1)
	 {
	   cerr << "rCounts "
	     << sentenceCount << endl;
	 }
       InputTree     correct;  
       cin >> correct;
       //if(sentenceCount > 1000) break;
       if(correct.length() == 0) break;
       //cerr <<sentenceCount << correct << endl;
       EcSPairs wtList;
       correct.make(wtList); 
       InputTree* par;
       int strt = 0;
       par = &correct;

       makeSent(par);
       curS = par;
       gatherFfCounts(par, 0);
       if(wI == TTCALC || wI == WWCALC)
	 {
	   list<InputTree*> dummy2;
	   InputTree stopInputTree(par->finish(),par->finish(),
				   wI==TTCALC ? "" : "^^",
				   "STOP","",
				   dummy2,NULL,NULL);
	   stopInputTree.headTree() = &stopInputTree;
	   TreeHist treeh(&stopInputTree,0);
	   treeh.hpos = 0;
	   callProcG(&treeh);
	 }
     }
   finalProbComputation();
   string resS(path);
   resS += conditionedType;
   resS += ".g";
   ofstream res(resS.c_str());
   assert(res);
   FTreeMap& fts = FeatureTree::root()->subtree;
   FTreeMap::iterator fti = fts.begin();
   for( ; fti != fts.end() ; fti++)
     {
       int asVal = (*fti).first;
       (*fti).second->printFTree(asVal, res);
     }
   res.close();
   cout << "Tot words: " << totWords << endl;
   cout << "Total params for " << conditionedType << " = "
	<< FeatureTree::totParams << endl;
}