Пример #1
0
Bchart::
Bchart(SentRep & sentence, int id)
  : ChartBase( sentence,id ),
    depth(0),
    curDir(-1),
    gcurVal(NULL),
    alreadyPopedNum( 0 )
{
     LARGE_INTEGER frequency, start;
      QueryPerformanceFrequency(&frequency);
      QueryPerformanceCounter(&start);
  pretermNum = 0;
  heap = new EdgeHeap();
  int len = sentence.length();
  lastWord[id]=lastKnownWord;
  int i,j;
  assert(len <= MAXSENTLEN);
  Char temp[512];
  for(i = 0 ; i < len ; i++)
    {
      ECString wl = toLower(sentence[i].lexeme().c_str(), temp);
      int val = wtoInt(wl);
      sentence_[i].toInt() = val;
    }
  for(i = 0 ; i < MAXSENTLEN ; i++)
    for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0;
}
Пример #2
0
ChartBase::
ChartBase(SentRep & sentence)
: crossEntropy_(0.0L), 
  wrd_count_(0),
  ruleiCounts_(0),
  popedEdgeCount_(0),
  sentence_( sentence )
{
#ifdef DEBUG
    extern int	rulei_high_water;
    rulei_high_water = 0;
#endif /* DEBUG */
    numItemsToDelete = 0;
    wrd_count_ = sentence.length();
    endPos = wrd_count_;
    const char* endwrd = NULL;
    if(wrd_count_ > 0) endwrd = sentence_[wrd_count_-1].lexeme().c_str();
    if(endwrd  && finalPunc(endwrd)) endPos = wrd_count_-1;
    else if(wrd_count_ > 2)
      {
	endwrd = sentence[wrd_count_-2].lexeme().c_str();
	if(finalPunc(endwrd)) endPos = wrd_count_-2;
	else
	  {
	    endwrd = sentence[wrd_count_-3].lexeme().c_str();
	    if(finalPunc(endwrd)) endPos = wrd_count_-3;
	  }
      }
}
Пример #3
0
Bchart::
Bchart(SentRep & sentence)
: ChartBase( sentence )
{
  pretermNum = 0;
  heap = new EdgeHeap();
  int len = sentence.length();
  int i,j;
  assert(len <= MAXSENTLEN);
  char temp[512];
  for(i = 0 ; i < len ; i++)
    {
      ECString wl = toLower(sentence[i].lexeme().c_str(), temp);
      int val = wtoInt(wl);
      sentence_[i].toInt() = val;
    }
  for(i = 0 ; i < MAXSENTLEN ; i++)
    for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0;
  initDenom();
}
Пример #4
0
Bchart::
Bchart(SentRep & sentence, int id)
  : ChartBase( sentence,id ),
    depth(0),
    curDir(-1),
    gcurVal(NULL),
    alreadyPoppedNum( 0 )
{
  pretermNum = 0;
  heap = new EdgeHeap();
  int len = sentence.length();
  lastWord[id]=lastKnownWord;
  int i,j;
  assert(len <= MAXSENTLEN);
  for(i = 0 ; i < len ; i++)
    {
      ECString wl = langAwareToLower(sentence[i].lexeme());
      int val = wtoInt(wl);
      sentence_[i].toInt() = val;
    }
  for(i = 0 ; i < MAXSENTLEN ; i++)
    for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0;
}
Пример #5
0
int
main(int argc, char *argv[])
{
   ECArgs args( argc, argv );
   /* l = length of sentence to be proceeds 0-100 is default
      n = work on each #'th line.
      d = print out debugging info at level #
      t = report timings */

   params.init( args );
   TimeIt timeIt;
   ECString  path( args.arg( 0 ) );
   generalInit(path);

   int      sentenceCount = 0;  //counts all sentences so we can use e.g,1/50;
   int totUnparsed = 0;
   double log600 = log2(600.0);

   ECString flnm = "dummy";
   if(args.nargs()==2) flnm = args.arg(1);
   ewDciTokStrm* tokStream = NULL;
   if(Bchart::tokenize)
     {
       tokStream = new ewDciTokStrm(flnm);
       if(args.nargs() ==1) tokStream->useCin = 1;
     }
   istream* nontokStream = NULL;
   if(args.nargs()==2) nontokStream = new ifstream(args.arg(1).c_str());
   else nontokStream = &cin;
   
   for( ;  ; sentenceCount++)
     {
       SentRep* srp;
       if(Bchart::tokenize) srp = new SentRep(*tokStream, SentRep::SGML);
       else srp = new SentRep(*nontokStream, SentRep::SGML);
       int len = srp->length();
       if(len > params.maxSentLen) continue;
       if(len == 0) break;
       if( !params.field().in(sentenceCount) ) continue;

       if(args.isset('t')) timeIt.befSent();

       MeChart*	chart = new MeChart( *srp );
       curChart = chart;
       
       if(args.isset('t') ) timeIt.lastTime = clock();

       chart->parse( );

       Item* topS = chart->topS();
       if(!topS)
	 {
	   totUnparsed++;
	   cerr << "Parse failed" << endl;
	   cerr << *srp << endl;
	   delete chart;
	   continue;
	 }
       if(args.isset('t') ) timeIt.betweenSent(chart);

       // compute the outside probabilities on the items so that we can
       // skip doing detailed computations on the really bad ones 
       chart->set_Alphas();

       AnsTreeStr& at = chart->findMapParse();
       if( at.probs[0] <= 0 ) error( "mapProbs did not return answer" );

       if(Feature::isLM)
	 {
	   double lgram = log2(at.sum);
	   lgram -= (srp->length()*log600);
	   double pgram = pow(2,lgram);
	   double iptri =chart->triGram();;
	   double ltri = (log2(iptri)-srp->length()*log600);
	   double ptri = pow(2.0,ltri);
	   double pcomb = (0.667 * pgram)+(0.333 * ptri);
	   double lmix = log2(pcomb);
	   cout << lgram << "\t" << ltri << "\t" << lmix << endl;
	 }

       int numVersions = 0;
       for(numVersions = 0 ; numVersions < NTH ; numVersions++)
	 if(at.probs[numVersions] <= 0) break;
       if(NTH > 1)cout << sentenceCount << "\t" << numVersions << endl;
       for(int i = 0 ; i < numVersions ; i++)
	 {
	   short pos = 0;
	   InputTree*  mapparse = inputTreeFromAnsTree(&at.trees[i], pos ,*srp);
	   double logP =log(at.probs[i]);
	   logP -= (srp->length()*log600);
	   if(NTH > 1) cout <<  logP << endl;
	   cout << *mapparse << endl << endl;
	   delete mapparse;
	 }
       cout << endl;
       if(args.isset('t') ) timeIt.aftSent();

       delete chart;
     }
   if( args.isset('t') ) timeIt.finish(sentenceCount);
   return 0;
}