コード例 #1
0
int
main(int argc, char *argv[])
{
   ECArgs args( argc, argv );
   /* o = basic, but not debugging, output.
      l = length of sentence to be proceeds 0-40 is default
      n = work on each #'th line.
      d = print out debugging info at level #
      W = use wwclasses
      R = use rwclasses
      t = report timings (requires o)
      s = maximum sleep time
      f = f# says multiply ctl2 counts by #
      p = p# use prepFactor #
      P = which types of prob models to use */

   // prevent core file creation;
   struct rlimit 	core_limits;
   core_limits.rlim_cur = 0;
   core_limits.rlim_max = 0;
   setrlimit( RLIMIT_CORE, &core_limits );

   params.init( args );
   if(args.isset('s'))
     {
       int  maxDelay = atoi(args.value('s').c_str());
       srand(params.whichSent());
       int randN = rand();
       int delay = randN%maxDelay;
       sleep(delay);
     }

   if(args.isset('T'))
     {
       int fac = atoi(args.value('T').c_str());
       float ffac = (float)fac;
       ffac /= 10;
       Bchart::timeFactor = ffac;
     }
	 
   int maxSentLen = 70;
   if(args.isset('l'))
     {
       maxSentLen = atoi(args.value('l').c_str());
     }
   int    totEdges = 0;
   int    totPopedEdges = 0;
   double totAccessTime = 0;
   double totParseTime = 0;
   double totSemParseTime = 0;
   clock_t lastTime, currTime;
   double lastTimeSec, currTimeSec, elapsedTime;

   endFactor = 1.2;
   midFactor = (1.0 - (.3684 * endFactor))/(1.0 - .3684);

   if( args.nargs() > 2 || args.nargs() == 0 )	// require path name 
     error( "Need exactly two arg." );
   ECString  path( args.arg( 0 ) );
   readHeadInfo(path);
   Term::init( path );
   InputTree::init();

   ECString testSString( args.arg(1) );

   ewDciTokStrm testSStream(testSString);
   //ifstream testSStream(testSString.c_str());
   if( !testSStream ) error( "No testSstream" );
   int      sentenceCount = 0;  //counts all sentences so we can use 1/50;

   ECString  probSumString( path );
   probSumString += "pSgT.txt";
   ifstream    probSumStream( probSumString.c_str() );
   if( !probSumStream ) error( "Failed to find probSum file" );

   Bchart::readTermProbs(path);

   if( args.isset('d') )
     {
       int lev = atoi(args.value('d').c_str());
       Bchart::printDebug() = lev;
     }
   int totSents = 0;
   int totUnparsed = 0;

   MeChart::init(path);
   Bchart::setPosStarts();
   for( ; !(!testSStream) ; )
     {
       SentRep sr(testSStream, SentRep::SGML); 
       int len = sr.length();
       if(len == 0) continue;
       if(len > maxSentLen) continue;
       if( !params.field().in(sentenceCount) )
	 {
	   sentenceCount++;
	   continue;
	 }
       if(len == 1)
	 {
	   if(sr[0].lexeme() == "</DOC>")
	     {
	       continue;
	     }
	 }
       sentenceCount++;

       //SentRep orgsr( wtList );  // used in precision calc;

       if( args.isset('t') ) lastTime = clock();
       if(args.isset('t') )
	 {
	   currTime = clock();
	   lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC;
	   currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC;
	   elapsedTime = currTimeSec - lastTimeSec;
	   if(elapsedTime < 0) elapsedTime += 2147;
	   cerr << "Reading data time = " << elapsedTime << endl;
	   totAccessTime += elapsedTime;
	   lastTime = currTime;
	 }

       MeChart*	chart = new MeChart( sr );
       curChart = chart;
       chart->ruleCountTimeout() = 250000;
       
       totSents++;
       if(args.isset('t') )
	 lastTime = clock();
       double tmpCrossEnt = chart->parse( );
       Item* topS = chart->topS();

       if(!topS)
	 {
	   if(len == 1)
	     {
	       delete chart;
	       continue;
	     }
	   Edge::DemFac = .9;
	   delete chart;
	   chart = new MeChart(sr);
	   chart->ruleCountTimeout() = 350000;
	   curChart = chart;
	   tmpCrossEnt = chart->parse( );
	   topS = chart->topS();
	   Edge::DemFac = .999;
	   if(!topS)
	     {
	       totUnparsed++;
	       cerr << "Parse failed on: " << sr << endl;

	       delete chart;
	       continue;
	     }
	 }
       
       // compute the outside probabilities on the items so that we can
       // skip doing detailed computations on the really bad ones 
       if(args.isset('t') )
	 {
	   currTime = clock();
	   lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC;
	   currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC;
	   elapsedTime = currTimeSec - lastTimeSec;
	   if(elapsedTime < 0) elapsedTime += 2147;
	   cerr << "Parsing time = " << elapsedTime
	     << "\tEdges created = " << chart->totEdgeCountAtS()
	       << "\tEdges poped = " << chart->popedEdgeCountAtS() << endl;
	   totParseTime += elapsedTime;
	   //totEdges += chart->totEdgeCountAtS();
	   //totPopedEdges += chart->popedEdgeCountAtS();
	   totEdges += chart->totEdgeCountAtS();
	   totPopedEdges += chart->popedEdgeCountAtS();
	   lastTime = clock();

	 }

       chart->set_Alphas();

       AnswerTree* at = chart->findMapParse();
       if( !at ) 
	 {
	   totUnparsed++;
	   cerr << "MapParse failed on: " << sr << endl;
	   delete chart;
	   continue;
	 }
       InputTree*  mapparse = inputTreeFromAnswerTree(at,topS);
       //at->deleteSubTrees();
       //delete at;
       cout << *mapparse << endl;
       delete mapparse;

       if(args.isset('t') )
	 {
	   currTime = clock();
	   lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC;
	   currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC;
	   elapsedTime = currTimeSec - lastTimeSec;
	   if(elapsedTime < 0) elapsedTime += 2147;
	   cerr << "Sem Parsing time = " << elapsedTime << endl;
	   totSemParseTime += elapsedTime;
	 }

       delete chart;
     }
   if( args.isset('t') )
     cout << "Av access time = " << totAccessTime/totSents
       << "\t Av parse time = "
	 << totParseTime/totSents
       << "\t Av stats time = "
	 << totSemParseTime/totSents
       << "\nAv edges created = "
	 << (float)totEdges/totSents
       << "\tAv edges poped = "
	 << (float)totPopedEdges/totSents
	   << endl;

   return 0;
}
コード例 #2
0
ファイル: parseAndEval.C プロジェクト: BLLIP/bllip-parser
int
main(int argc, char *argv[])
{
   ECArgs args( argc, argv );
   //AnsTreeHeap::print = true;
   /* o = basic, but not debugging, output.
      l = length of sentence to be proceeds 0-40 is default
      n = work on each #'th line.
      d = print out debugging info at level #
      t = report timings (requires o)
   */

   int i;
   params.init( args );
   //cerr << "Starting wwBCTest " << Feature::sLM << endl;

   if( args.nargs() > 2 || args.nargs() < 2 )	// require path name 
     error( "Need exactly two args.");
   ECString  path( args.arg( 0 ) );
   generalInit(path);
   for(int i = 0 ; i < 500 ; i++) histPoints[i] = false;
   histPoints[0] = true;
   if (Bchart::Nth == 50 || Bchart::Nth == 500 || Bchart::Nth == 1000)
     histPoints[1] = histPoints[9] = histPoints[24] = histPoints[49] = true;
   if (Bchart::Nth == 500 || Bchart::Nth == 1000)
     histPoints[99] = histPoints[249] = histPoints[499] = true;
   if(Bchart::Nth == 1000)
     histPoints[749] = histPoints[999] = true;

   TimeIt timeIt;

   ECString testSString = args.arg(1);
   ifstream testSStream(testSString.c_str());
   if( !testSStream )
     {
       cerr << "Could not find " << testSString << endl;
       error( "No testSstream");
     }

    ECString      pstatStreamName( params.fileString());
    pstatStreamName  += "PStatInfo/pStat";
    pstatStreamName += params.numString();
    pstatStreamName += ".txt";
    ofstream    pstatStream( pstatStreamName.c_str(), ios::out);
    if( !pstatStream )
      {
	cerr << "Looking to output to " << pstatStreamName << endl;
	error( "unable to open pstat stream");
      }

   for(i = 0 ; i < MAXNUMTHREADS ; i++) globalGi[i] =NULL;

   pthread_t thread[MAXNUMTHREADS];
   loopArg lA[MAXNUMTHREADS];
   for(i = 0 ; i < numThreads  ; i++){
     lA[i].id = i;
     lA[i].inpt=&testSStream;
     lA[i].outpt=&pstatStream;
     pthread_create(&thread[i],0,mainLoop, (void*)&lA[i]);
   }
  for(i=0; i<numThreads; i++){
    pthread_join(thread[i],0);
  }

   for(int i = 0 ; i < Bchart::Nth ; i++)
     if(histPoints[i])
       {
	 cerr << i << " " << totPst[i].fMeasure() << "\t";
       }
   cerr << endl;
   if(Feature::isLM)
     {
       cerr << pow(2.0,totGram/(double)totWords);
       cerr <<"\t" <<  pow(2.0,totTri/(double)totWords);
       cerr << "\t" << pow(2.0,totMix/(double)(totWords));
       cerr << endl;
     }
   if( args.isset('t') ) timeIt.finish(sentenceCount);
   pthread_exit(0);

   return 0;
}