Bchart:: Bchart(SentRep & sentence, int id) : ChartBase( sentence,id ), depth(0), curDir(-1), gcurVal(NULL), alreadyPopedNum( 0 ) { LARGE_INTEGER frequency, start; QueryPerformanceFrequency(&frequency); QueryPerformanceCounter(&start); pretermNum = 0; heap = new EdgeHeap(); int len = sentence.length(); lastWord[id]=lastKnownWord; int i,j; assert(len <= MAXSENTLEN); Char temp[512]; for(i = 0 ; i < len ; i++) { ECString wl = toLower(sentence[i].lexeme().c_str(), temp); int val = wtoInt(wl); sentence_[i].toInt() = val; } for(i = 0 ; i < MAXSENTLEN ; i++) for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0; }
ChartBase:: ChartBase(SentRep & sentence) : crossEntropy_(0.0L), wrd_count_(0), ruleiCounts_(0), popedEdgeCount_(0), sentence_( sentence ) { #ifdef DEBUG extern int rulei_high_water; rulei_high_water = 0; #endif /* DEBUG */ numItemsToDelete = 0; wrd_count_ = sentence.length(); endPos = wrd_count_; const char* endwrd = NULL; if(wrd_count_ > 0) endwrd = sentence_[wrd_count_-1].lexeme().c_str(); if(endwrd && finalPunc(endwrd)) endPos = wrd_count_-1; else if(wrd_count_ > 2) { endwrd = sentence[wrd_count_-2].lexeme().c_str(); if(finalPunc(endwrd)) endPos = wrd_count_-2; else { endwrd = sentence[wrd_count_-3].lexeme().c_str(); if(finalPunc(endwrd)) endPos = wrd_count_-3; } } }
Bchart:: Bchart(SentRep & sentence) : ChartBase( sentence ) { pretermNum = 0; heap = new EdgeHeap(); int len = sentence.length(); int i,j; assert(len <= MAXSENTLEN); char temp[512]; for(i = 0 ; i < len ; i++) { ECString wl = toLower(sentence[i].lexeme().c_str(), temp); int val = wtoInt(wl); sentence_[i].toInt() = val; } for(i = 0 ; i < MAXSENTLEN ; i++) for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0; initDenom(); }
Bchart:: Bchart(SentRep & sentence, int id) : ChartBase( sentence,id ), depth(0), curDir(-1), gcurVal(NULL), alreadyPoppedNum( 0 ) { pretermNum = 0; heap = new EdgeHeap(); int len = sentence.length(); lastWord[id]=lastKnownWord; int i,j; assert(len <= MAXSENTLEN); for(i = 0 ; i < len ; i++) { ECString wl = langAwareToLower(sentence[i].lexeme()); int val = wtoInt(wl); sentence_[i].toInt() = val; } for(i = 0 ; i < MAXSENTLEN ; i++) for(j = 0 ; j < MAXSENTLEN ; j++) curDemerits_[i][j] = 0; }
int main(int argc, char *argv[]) { ECArgs args( argc, argv ); /* l = length of sentence to be proceeds 0-100 is default n = work on each #'th line. d = print out debugging info at level # t = report timings */ params.init( args ); TimeIt timeIt; ECString path( args.arg( 0 ) ); generalInit(path); int sentenceCount = 0; //counts all sentences so we can use e.g,1/50; int totUnparsed = 0; double log600 = log2(600.0); ECString flnm = "dummy"; if(args.nargs()==2) flnm = args.arg(1); ewDciTokStrm* tokStream = NULL; if(Bchart::tokenize) { tokStream = new ewDciTokStrm(flnm); if(args.nargs() ==1) tokStream->useCin = 1; } istream* nontokStream = NULL; if(args.nargs()==2) nontokStream = new ifstream(args.arg(1).c_str()); else nontokStream = &cin; for( ; ; sentenceCount++) { SentRep* srp; if(Bchart::tokenize) srp = new SentRep(*tokStream, SentRep::SGML); else srp = new SentRep(*nontokStream, SentRep::SGML); int len = srp->length(); if(len > params.maxSentLen) continue; if(len == 0) break; if( !params.field().in(sentenceCount) ) continue; if(args.isset('t')) timeIt.befSent(); MeChart* chart = new MeChart( *srp ); curChart = chart; if(args.isset('t') ) timeIt.lastTime = clock(); chart->parse( ); Item* topS = chart->topS(); if(!topS) { totUnparsed++; cerr << "Parse failed" << endl; cerr << *srp << endl; delete chart; continue; } if(args.isset('t') ) timeIt.betweenSent(chart); // compute the outside probabilities on the items so that we can // skip doing detailed computations on the really bad ones chart->set_Alphas(); AnsTreeStr& at = chart->findMapParse(); if( at.probs[0] <= 0 ) error( "mapProbs did not return answer" ); if(Feature::isLM) { double lgram = log2(at.sum); lgram -= (srp->length()*log600); double pgram = pow(2,lgram); double iptri =chart->triGram();; double ltri = (log2(iptri)-srp->length()*log600); double ptri = pow(2.0,ltri); double pcomb = (0.667 * pgram)+(0.333 * ptri); double lmix = log2(pcomb); cout << lgram << "\t" << ltri << "\t" << lmix << endl; } int numVersions = 0; for(numVersions = 0 ; numVersions < NTH ; numVersions++) if(at.probs[numVersions] <= 0) break; if(NTH > 1)cout << sentenceCount << "\t" << numVersions << endl; for(int i = 0 ; i < numVersions ; i++) { short pos = 0; InputTree* mapparse = inputTreeFromAnsTree(&at.trees[i], pos ,*srp); double logP =log(at.probs[i]); logP -= (srp->length()*log600); if(NTH > 1) cout << logP << endl; cout << *mapparse << endl << endl; delete mapparse; } cout << endl; if(args.isset('t') ) timeIt.aftSent(); delete chart; } if( args.isset('t') ) timeIt.finish(sentenceCount); return 0; }