Ejemplo n.º 1
0
int
main(int argc, char *argv[])
{
   ECArgs args( argc, argv );
   /* l = length of sentence to be proceeds 0-100 is default
      n = work on each #'th line.
      d = print out debugging info at level #
      t = report timings */

   params.init( args );
   TimeIt timeIt;
   ECString  path( args.arg( 0 ) );
   generalInit(path);

   int      sentenceCount = 0;  //counts all sentences so we can use e.g,1/50;
   int totUnparsed = 0;
   double log600 = log2(600.0);

   ECString flnm = "dummy";
   if(args.nargs()==2) flnm = args.arg(1);
   ewDciTokStrm* tokStream = NULL;
   if(Bchart::tokenize)
     {
       tokStream = new ewDciTokStrm(flnm);
       if(args.nargs() ==1) tokStream->useCin = 1;
     }
   istream* nontokStream = NULL;
   if(args.nargs()==2) nontokStream = new ifstream(args.arg(1).c_str());
   else nontokStream = &cin;
   
   for( ;  ; sentenceCount++)
     {
       SentRep* srp;
       if(Bchart::tokenize) srp = new SentRep(*tokStream, SentRep::SGML);
       else srp = new SentRep(*nontokStream, SentRep::SGML);
       int len = srp->length();
       if(len > params.maxSentLen) continue;
       if(len == 0) break;
       if( !params.field().in(sentenceCount) ) continue;

       if(args.isset('t')) timeIt.befSent();

       MeChart*	chart = new MeChart( *srp );
       curChart = chart;
       
       if(args.isset('t') ) timeIt.lastTime = clock();

       chart->parse( );

       Item* topS = chart->topS();
       if(!topS)
	 {
	   totUnparsed++;
	   cerr << "Parse failed" << endl;
	   cerr << *srp << endl;
	   delete chart;
	   continue;
	 }
       if(args.isset('t') ) timeIt.betweenSent(chart);

       // compute the outside probabilities on the items so that we can
       // skip doing detailed computations on the really bad ones 
       chart->set_Alphas();

       AnsTreeStr& at = chart->findMapParse();
       if( at.probs[0] <= 0 ) error( "mapProbs did not return answer" );

       if(Feature::isLM)
	 {
	   double lgram = log2(at.sum);
	   lgram -= (srp->length()*log600);
	   double pgram = pow(2,lgram);
	   double iptri =chart->triGram();;
	   double ltri = (log2(iptri)-srp->length()*log600);
	   double ptri = pow(2.0,ltri);
	   double pcomb = (0.667 * pgram)+(0.333 * ptri);
	   double lmix = log2(pcomb);
	   cout << lgram << "\t" << ltri << "\t" << lmix << endl;
	 }

       int numVersions = 0;
       for(numVersions = 0 ; numVersions < NTH ; numVersions++)
	 if(at.probs[numVersions] <= 0) break;
       if(NTH > 1)cout << sentenceCount << "\t" << numVersions << endl;
       for(int i = 0 ; i < numVersions ; i++)
	 {
	   short pos = 0;
	   InputTree*  mapparse = inputTreeFromAnsTree(&at.trees[i], pos ,*srp);
	   double logP =log(at.probs[i]);
	   logP -= (srp->length()*log600);
	   if(NTH > 1) cout <<  logP << endl;
	   cout << *mapparse << endl << endl;
	   delete mapparse;
	 }
       cout << endl;
       if(args.isset('t') ) timeIt.aftSent();

       delete chart;
     }
   if( args.isset('t') ) timeIt.finish(sentenceCount);
   return 0;
}
Ejemplo n.º 2
0
int
main(int argc, char *argv[])
{
   ECArgs args( argc, argv );
   //AnsTreeHeap::print = true;
   /* o = basic, but not debugging, output.
      l = length of sentence to be proceeds 0-40 is default
      n = work on each #'th line.
      d = print out debugging info at level #
      t = report timings (requires o)
   */

   int i;
   params.init( args );
   //cerr << "Starting wwBCTest " << Feature::sLM << endl;

   if( args.nargs() > 2 || args.nargs() < 2 )	// require path name 
     error( "Need exactly two args.");
   ECString  path( args.arg( 0 ) );
   generalInit(path);
   for(int i = 0 ; i < 500 ; i++) histPoints[i] = false;
   histPoints[0] = true;
   if (Bchart::Nth == 50 || Bchart::Nth == 500 || Bchart::Nth == 1000)
     histPoints[1] = histPoints[9] = histPoints[24] = histPoints[49] = true;
   if (Bchart::Nth == 500 || Bchart::Nth == 1000)
     histPoints[99] = histPoints[249] = histPoints[499] = true;
   if(Bchart::Nth == 1000)
     histPoints[749] = histPoints[999] = true;

   TimeIt timeIt;

   ECString testSString = args.arg(1);
   ifstream testSStream(testSString.c_str());
   if( !testSStream )
     {
       cerr << "Could not find " << testSString << endl;
       error( "No testSstream");
     }

    ECString      pstatStreamName( params.fileString());
    pstatStreamName  += "PStatInfo/pStat";
    pstatStreamName += params.numString();
    pstatStreamName += ".txt";
    ofstream    pstatStream( pstatStreamName.c_str(), ios::out);
    if( !pstatStream )
      {
	cerr << "Looking to output to " << pstatStreamName << endl;
	error( "unable to open pstat stream");
      }

   for(i = 0 ; i < MAXNUMTHREADS ; i++) globalGi[i] =NULL;

   pthread_t thread[MAXNUMTHREADS];
   loopArg lA[MAXNUMTHREADS];
   for(i = 0 ; i < numThreads  ; i++){
     lA[i].id = i;
     lA[i].inpt=&testSStream;
     lA[i].outpt=&pstatStream;
     pthread_create(&thread[i],0,mainLoop, (void*)&lA[i]);
   }
  for(i=0; i<numThreads; i++){
    pthread_join(thread[i],0);
  }

   for(int i = 0 ; i < Bchart::Nth ; i++)
     if(histPoints[i])
       {
	 cerr << i << " " << totPst[i].fMeasure() << "\t";
       }
   cerr << endl;
   if(Feature::isLM)
     {
       cerr << pow(2.0,totGram/(double)totWords);
       cerr <<"\t" <<  pow(2.0,totTri/(double)totWords);
       cerr << "\t" << pow(2.0,totMix/(double)(totWords));
       cerr << endl;
     }
   if( args.isset('t') ) timeIt.finish(sentenceCount);
   pthread_exit(0);

   return 0;
}