int main(int argc, char *argv[]) { ECArgs args( argc, argv ); /* o = basic, but not debugging, output. l = length of sentence to be proceeds 0-40 is default n = work on each #'th line. d = print out debugging info at level # W = use wwclasses R = use rwclasses t = report timings (requires o) s = maximum sleep time f = f# says multiply ctl2 counts by # p = p# use prepFactor # P = which types of prob models to use */ // prevent core file creation; struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); params.init( args ); if(args.isset('s')) { int maxDelay = atoi(args.value('s').c_str()); srand(params.whichSent()); int randN = rand(); int delay = randN%maxDelay; sleep(delay); } if(args.isset('T')) { int fac = atoi(args.value('T').c_str()); float ffac = (float)fac; ffac /= 10; Bchart::timeFactor = ffac; } int maxSentLen = 70; if(args.isset('l')) { maxSentLen = atoi(args.value('l').c_str()); } int totEdges = 0; int totPopedEdges = 0; double totAccessTime = 0; double totParseTime = 0; double totSemParseTime = 0; clock_t lastTime, currTime; double lastTimeSec, currTimeSec, elapsedTime; endFactor = 1.2; midFactor = (1.0 - (.3684 * endFactor))/(1.0 - .3684); if( args.nargs() > 2 || args.nargs() == 0 ) // require path name error( "Need exactly two arg." ); ECString path( args.arg( 0 ) ); readHeadInfo(path); Term::init( path ); InputTree::init(); ECString testSString( args.arg(1) ); ewDciTokStrm testSStream(testSString); //ifstream testSStream(testSString.c_str()); if( !testSStream ) error( "No testSstream" ); int sentenceCount = 0; //counts all sentences so we can use 1/50; ECString probSumString( path ); probSumString += "pSgT.txt"; ifstream probSumStream( probSumString.c_str() ); if( !probSumStream ) error( "Failed to find probSum file" ); Bchart::readTermProbs(path); if( args.isset('d') ) { int lev = atoi(args.value('d').c_str()); Bchart::printDebug() = lev; } int totSents = 0; int totUnparsed = 0; MeChart::init(path); Bchart::setPosStarts(); for( ; !(!testSStream) ; ) { SentRep sr(testSStream, SentRep::SGML); int len = sr.length(); if(len == 0) continue; if(len > maxSentLen) continue; if( !params.field().in(sentenceCount) ) { sentenceCount++; continue; } if(len == 1) { if(sr[0].lexeme() == "</DOC>") { continue; } } sentenceCount++; //SentRep orgsr( wtList ); // used in precision calc; if( args.isset('t') ) lastTime = clock(); if(args.isset('t') ) { currTime = clock(); lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC; currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC; elapsedTime = currTimeSec - lastTimeSec; if(elapsedTime < 0) elapsedTime += 2147; cerr << "Reading data time = " << elapsedTime << endl; totAccessTime += elapsedTime; lastTime = currTime; } MeChart* chart = new MeChart( sr ); curChart = chart; chart->ruleCountTimeout() = 250000; totSents++; if(args.isset('t') ) lastTime = clock(); double tmpCrossEnt = chart->parse( ); Item* topS = chart->topS(); if(!topS) { if(len == 1) { delete chart; continue; } Edge::DemFac = .9; delete chart; chart = new MeChart(sr); chart->ruleCountTimeout() = 350000; curChart = chart; tmpCrossEnt = chart->parse( ); topS = chart->topS(); Edge::DemFac = .999; if(!topS) { totUnparsed++; cerr << "Parse failed on: " << sr << endl; delete chart; continue; } } // compute the outside probabilities on the items so that we can // skip doing detailed computations on the really bad ones if(args.isset('t') ) { currTime = clock(); lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC; currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC; elapsedTime = currTimeSec - lastTimeSec; if(elapsedTime < 0) elapsedTime += 2147; cerr << "Parsing time = " << elapsedTime << "\tEdges created = " << chart->totEdgeCountAtS() << "\tEdges poped = " << chart->popedEdgeCountAtS() << endl; totParseTime += elapsedTime; //totEdges += chart->totEdgeCountAtS(); //totPopedEdges += chart->popedEdgeCountAtS(); totEdges += chart->totEdgeCountAtS(); totPopedEdges += chart->popedEdgeCountAtS(); lastTime = clock(); } chart->set_Alphas(); AnswerTree* at = chart->findMapParse(); if( !at ) { totUnparsed++; cerr << "MapParse failed on: " << sr << endl; delete chart; continue; } InputTree* mapparse = inputTreeFromAnswerTree(at,topS); //at->deleteSubTrees(); //delete at; cout << *mapparse << endl; delete mapparse; if(args.isset('t') ) { currTime = clock(); lastTimeSec = (double)lastTime/(double)CLOCKS_PER_SEC; currTimeSec = (double)currTime/(double)CLOCKS_PER_SEC; elapsedTime = currTimeSec - lastTimeSec; if(elapsedTime < 0) elapsedTime += 2147; cerr << "Sem Parsing time = " << elapsedTime << endl; totSemParseTime += elapsedTime; } delete chart; } if( args.isset('t') ) cout << "Av access time = " << totAccessTime/totSents << "\t Av parse time = " << totParseTime/totSents << "\t Av stats time = " << totSemParseTime/totSents << "\nAv edges created = " << (float)totEdges/totSents << "\tAv edges poped = " << (float)totPopedEdges/totSents << endl; return 0; }
int main(int argc, char *argv[]) { ECArgs args( argc, argv ); /* l = length of sentence to be proceeds 0-100 is default n = work on each #'th line. d = print out debugging info at level # t = report timings */ params.init( args ); TimeIt timeIt; ECString path( args.arg( 0 ) ); generalInit(path); int sentenceCount = 0; //counts all sentences so we can use e.g,1/50; int totUnparsed = 0; double log600 = log2(600.0); ECString flnm = "dummy"; if(args.nargs()==2) flnm = args.arg(1); ewDciTokStrm* tokStream = NULL; if(Bchart::tokenize) { tokStream = new ewDciTokStrm(flnm); if(args.nargs() ==1) tokStream->useCin = 1; } istream* nontokStream = NULL; if(args.nargs()==2) nontokStream = new ifstream(args.arg(1).c_str()); else nontokStream = &cin; for( ; ; sentenceCount++) { SentRep* srp; if(Bchart::tokenize) srp = new SentRep(*tokStream, SentRep::SGML); else srp = new SentRep(*nontokStream, SentRep::SGML); int len = srp->length(); if(len > params.maxSentLen) continue; if(len == 0) break; if( !params.field().in(sentenceCount) ) continue; if(args.isset('t')) timeIt.befSent(); MeChart* chart = new MeChart( *srp ); curChart = chart; if(args.isset('t') ) timeIt.lastTime = clock(); chart->parse( ); Item* topS = chart->topS(); if(!topS) { totUnparsed++; cerr << "Parse failed" << endl; cerr << *srp << endl; delete chart; continue; } if(args.isset('t') ) timeIt.betweenSent(chart); // compute the outside probabilities on the items so that we can // skip doing detailed computations on the really bad ones chart->set_Alphas(); AnsTreeStr& at = chart->findMapParse(); if( at.probs[0] <= 0 ) error( "mapProbs did not return answer" ); if(Feature::isLM) { double lgram = log2(at.sum); lgram -= (srp->length()*log600); double pgram = pow(2,lgram); double iptri =chart->triGram();; double ltri = (log2(iptri)-srp->length()*log600); double ptri = pow(2.0,ltri); double pcomb = (0.667 * pgram)+(0.333 * ptri); double lmix = log2(pcomb); cout << lgram << "\t" << ltri << "\t" << lmix << endl; } int numVersions = 0; for(numVersions = 0 ; numVersions < NTH ; numVersions++) if(at.probs[numVersions] <= 0) break; if(NTH > 1)cout << sentenceCount << "\t" << numVersions << endl; for(int i = 0 ; i < numVersions ; i++) { short pos = 0; InputTree* mapparse = inputTreeFromAnsTree(&at.trees[i], pos ,*srp); double logP =log(at.probs[i]); logP -= (srp->length()*log600); if(NTH > 1) cout << logP << endl; cout << *mapparse << endl << endl; delete mapparse; } cout << endl; if(args.isset('t') ) timeIt.aftSent(); delete chart; } if( args.isset('t') ) timeIt.finish(sentenceCount); return 0; }
static void* mainLoop(void* arg) { int *id = reinterpret_cast<int *>(arg); PrintStack printStack; for( ; ; ) { SentRep* srp = new SentRep(params.maxSentLen); pthread_mutex_lock(&readlock); if(Bchart::tokenize) *tokStream >> *srp; else *nontokStream >> *srp; int locCount = sentenceCount++; ExtPos extPos; if(params.extPosIfstream) extPos.read(params.extPosIfstream,*srp); pthread_mutex_unlock(&readlock); if( !params.field().in(sentenceCount) ) continue; printStruct printS; printS.name = srp->getName(); printS.sentenceCount = locCount; printS.numDiff = 0; int len = srp->length(); if (len == 0) { break; } if (len > params.maxSentLen) { ECString msg("skipping sentence longer than specified limit of "); msg += intToString(params.maxSentLen); WARN( msg.c_str() ); printSkipped(srp,NULL,printStack,printS); continue; } // handle input containing reserved word Bchart::HEADWORD_S1; could probably do // better (like undo replacement before printing) but this seems sufficient. int i; for (i = 0; i < len; ++i) { ECString& w = ((*srp)[i]).lexeme(); if (w == Bchart::HEADWORD_S1) { ECString msg = ECString("Replacing reserved token \"") + Bchart::HEADWORD_S1; msg += "\" at index " + intToString(i) + " of input with token \"^^^\""; WARN( msg.c_str() ); w = "^^^"; } } MeChart* chart = new MeChart( *srp,extPos,*id ); chart->parse( ); Item* topS = chart->topS(); if(!topS) { if (extPos.hasExtPos()) { WARN("Parse failed: !topS -- reparsing without POS constraints"); chart = new MeChart(*srp, *id); chart->parse(); topS = chart->topS(); if (!topS) { WARN("Reparsing without POS constraints failed too: !topS"); printSkipped(srp, chart, printStack, printS); continue; } } else { WARN( "Parse failed: !topS" ); printSkipped(srp,chart,printStack,printS); continue; } } bool failed = decodeParses(len, locCount, srp, chart, printS, printStack); if (failed) { continue; } if( printS.numDiff == 0) { if (extPos.hasExtPos()) { WARN("Parse failed from 0, inf or nan probabililty -- reparsing without POS constraints"); chart = new MeChart(*srp, *id); chart->parse(); bool failed = decodeParses(len, locCount, srp, chart, printS, printStack); if (failed || printS.numDiff == 0) { WARN("Parse failed from 0, inf or nan probabililty -- failed even without POS constraints"); printSkipped(srp,chart,printStack,printS); continue; } } else { WARN("Parse failed from 0, inf or nan probabililty"); printSkipped(srp,chart,printStack,printS); continue; } } /* put the sentence with which we just finished at the end of the printStack*/ printStack.push_back(printS); workOnPrintStack(&printStack); delete chart; delete srp; }
/* the function called by each thread is "mainLoop" */ void* mainLoop(void* arg) { loopArg *loopA = (loopArg*)arg; istream* testSStream = loopA->inpt; ostream* pstatStream = loopA->outpt; int id = loopA->id; double log600 = log2(600.0); PrintStack printStack; for( ; ; ) { InputTree correct; InputTree* cuse; /* first lock to read in the material */ pthread_mutex_lock(&readlock); if( !*testSStream ) { pthread_mutex_unlock(&readlock); break; } *testSStream >> correct; if( !*testSStream ){ pthread_mutex_unlock(&readlock); break; } totWords += correct.length()+1; int locCount = sentenceCount++; list<ECString> wtList; correct.make(wtList); SentRep sr( wtList ); // used in precision calc ExtPos extPos; if(params.extPosIfstream) extPos.read(params.extPosIfstream,sr); pthread_mutex_unlock(&readlock); cuse = &correct; int len = correct.length(); if(len > params.maxSentLen) continue; //cerr << "Len = " << len << endl; /* if( !params.field().in(sentenceCount) ) { sentenceCount++; continue; } if(sentenceCount < -1) { sentenceCount++; continue; } sentenceCount++; */ vector<ECString> poslist; correct.makePosList(poslist); ScoreTree sc; sc.setEquivInts(poslist); MeChart* chart = new MeChart( sr,extPos,id ); chart->parse( ); Item* topS = chart->topS(); if(!topS) { cerr << "Parse failed" << endl; cerr << correct << endl; error(" could not parse "); delete chart; continue; } // compute the outside probabilities on the items so that we can // skip doing detailed computations on the really bad ones chart->set_Alphas(); Bst& bst = chart->findMapParse(); if( bst.empty()) error( "mapProbs did not return answer"); float bestF = -1; int i; int numVersions = 0; Link diffs(0); //cerr << "Need num diff: " << Bchart::Nth << endl; printStruct printS; printS.sentenceCount = locCount; printS.numDiff = 0; for(numVersions = 0 ; ; numVersions++) { short pos = 0; Val* val = bst.next(numVersions); if(!val) { //cerr << "Breaking" << endl; break; } InputTree* mapparse = inputTreeFromBsts(val,pos,sr); bool isU; int dummy = 0; diffs.is_unique(mapparse, isU, dummy); // cerr << "V " << isU << " " << numVersions << *mapparse << endl; if(isU) { printS.probs.push_back(val->prob()); printS.trees.push_back(mapparse); printS.numDiff++; } else { delete mapparse; } if(printS.numDiff >= Bchart::Nth) break; if(numVersions > 20000) break; } ParseStats* locPst = new ParseStats[Bchart::Nth]; ParseStats bestPs; for(i = 0 ; i <printS.numDiff ; i++) { InputTree *mapparse = printS.trees[i]; assert(mapparse); sc.trips.clear(); ParseStats pSt; sc.recordGold(cuse,pSt); sc.precisionRecall(mapparse,pSt); float newF = pSt.fMeasure(); cerr << printS.sentenceCount << "\t" << newF << endl; if(newF > bestF) { bestF = newF; bestPs = pSt; } if(histPoints[i]) { locPst[i] += bestPs; } } if(printS.numDiff < Bchart::Nth) { for(i = printS.numDiff ; i < Bchart::Nth ; i++) { if(histPoints[i]) locPst[i] += bestPs; } } pthread_mutex_lock(&scorelock); for(i = 0 ; i < Bchart::Nth ; i++) totPst[i]+=locPst[i]; pthread_mutex_unlock(&scorelock); int numPrinted; /* put the sentence with which we just finished at the end of the printStack*/ printStack.push_back(printS); PrintStack::iterator psi = printStack.begin(); /* now look at each item from the front of the print stack to see if it should be printed now */ pthread_mutex_lock(&writelock); for( numPrinted =0; psi != printStack.end(); numPrinted++ ) { printStruct& pstr=(*psi); if(pstr.sentenceCount != printCount) break; *pstatStream << pstr.sentenceCount << "\t" << pstr.numDiff << "\n"; printCount++; for(i = 0 ; i < pstr.numDiff ; i++) { InputTree* mapparse = pstr.trees[i]; assert(mapparse); double logP =log2(pstr.probs[i]); logP -= (sr.length()*log600); *pstatStream << logP << "\n"; if(Bchart::prettyPrint) *pstatStream << *mapparse << "\n\n"; else { mapparse->printproper(*pstatStream); *pstatStream << "\n"; } delete mapparse; } *pstatStream << endl; psi++; } pthread_mutex_unlock(&writelock); for(i = 0 ; i < numPrinted ; i++) printStack.pop_front(); if(Feature::isLM) { double lgram = log2(bst.sum()); lgram -= (sr.length()*log600); double pgram = pow(2,lgram); double iptri = chart->triGram();; double ltri = (log2(iptri)-sr.length()*log600); double ptri = pow(2.0,ltri); double pcomb1 = (0.667 * pgram)+(0.333 * ptri); double lcom1 = log2(pcomb1); totGram -= lgram; totTri -= ltri; totMix -= lcom1; if(locCount%10 == 9) { cerr << locCount << "\t"; cerr << pow(2.0,totGram/(double)totWords); cerr <<"\t" << pow(2.0,totTri/(double)totWords); cerr << "\t" << pow(2.0,totMix/(double)(totWords)); cerr << endl; } } if(locCount%50 == 1) { cerr << sentenceCount << "\t"; for(int i = 0 ; i < Bchart::Nth ; i++) if(histPoints[i]) { cerr << i << " " << totPst[i].fMeasure() << "\t"; } cerr << endl; } delete chart; delete [] locPst; } return 0; }