Ejemplo n.º 1
0
int
ccIndFromTree(InputTree* tree)
{
    InputTreesIter  subTreeIter = tree->subTrees().begin();
    ECString trmNm = tree->term();
    bool sawComma = false;
    bool sawColen = false;
    bool sawCC = false;
    bool sawOTHNT = false;
    int numTrm = 0;
    int pos = 0;
    const Term* trm = Term::get(trmNm);
    int tint = trm->toInt();
    /*Change next line to indicate which non-terminals get specially
      marked to indicate that they are conjoined together */
    if(!trm->isNP() && !trm->isS() && !trm->isVP()) return tint;
    for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ )
    {
        InputTree* subTree = *subTreeIter;
        ECString strmNm = subTree->term();
        const Term* strm = Term::get(strmNm);
        if(pos != 0 && strm->isCC()) sawCC = true;
        else if(strmNm == trmNm) numTrm++;
        else if(pos != 0 && strm->isComma()) sawComma = true;
        else if(pos != 0 && strm->isColon()) sawColen = true;
        else if(!strm->terminal_p()) sawOTHNT = true;
        pos++;
    }
    if(trmNm == "NP" && numTrm == 2 && !sawCC) return Term::lastNTInt()+1;
    if((sawComma || sawColen || sawCC) && numTrm >= 2) return tint+Term::lastNTInt();
    return tint;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]) {
    ECArgs args(argc, argv);
    params.init(args);

    ECString path(args.arg(0));
    generalInit(path);

    // we don't use sentenceCount since it treeLogProb may parse the
    // sentence multiple times
    int index = 0;
    while (true) {
        if (!cin) {
            break;
        }
        InputTree correct;
        cin >> correct;
        int len = correct.length();
        if (len == 0) {
            break;
        }
        if (len > params.maxSentLen) {
            continue;
        }
        double logProb;
        try {
            logProb = treeLogProb(&correct);
        } catch (ParserError) {
            logProb = 0;
        }
        cout << index << "\t" << logProb << endl;
        index++;
    }
    return 0;
}
Ejemplo n.º 3
0
int
is_effEnd(InputTree* tree, InputTree* child)
{
    if(!tree) return 1;
    const Term* trm = Term::get(tree->term());
    if(trm->isRoot()) return 1;
    InputTreesIter iti = tree->subTrees().begin();
    for( ; ; iti++)
    {
        assert(iti != tree->subTrees().end());
        InputTree* nxt = (*iti);
        assert(nxt);
        if(nxt != child) continue;
        iti++;
        if(iti == tree->subTrees().end())
            return is_effEnd(tree->parent(),tree);
        nxt = (*iti);
        ECString ntrmNm = nxt->term();
        const Term* ntrm = Term::get(ntrmNm);
        if(ntrm== Term::stopTerm)
            return is_effEnd(tree->parent(),tree);
        if(ntrm->isColon() || ntrm->isFinal()) return 1;
        if(ntrm->isComma()) return 0;
        iti++;
        if(iti == tree->subTrees().end()) return 0;
        nxt = (*iti);
        if(nxt->term() == "''") return 1;
        return 0;
    }
    error("should not get here");
    return 0;
}
Ejemplo n.º 4
0
int
tree_grandparent_head(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    InputTree* pt = tree->parent();
    static int topInt = -1;
    if(topInt < 0)
    {
        ECString temp("^^");
        topInt = Pst::get(temp)->toInt();
    }
    if(!pt) return topInt;
    pt = pt->parent();
    if(!pt) return topInt;

    char temp[1024];
    ECString wrdStr(langAwareToLower(pt->head().c_str(),temp));
    const WordInfo* wi = Pst::get(wrdStr);
    if(!wi)
    {
        cerr << *tree << endl;
        assert(wi);
    }
    int ans = wi->toInt();
    assert(ans >= 0);
    return ans;
}
Ejemplo n.º 5
0
int
headPosFromTree(InputTree* tree)
{
    int   ansPriority = 10;
    ECString lhsString(tree->term());
    if(lhsString == "") lhsString = "S1";
    int   pos = -1;
    int   ans = -1;

    ConstInputTreesIter subTreeIter = tree->subTrees().begin();
    InputTree   *subTree;
    for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ )
    {
        subTree = *subTreeIter;
        pos++;
        ECString rhsString(subTree->term());
        int nextPriority = headPriority(lhsString, rhsString, ansPriority);
        if(nextPriority <= ansPriority)
        {
            ans = pos;
            ansPriority = nextPriority;
        }
    }
    return ans;
}
Ejemplo n.º 6
0
void
goThroughSents(InputTree* trainingData[1301], int sc)
{
  int sentenceCount;
  for(sentenceCount = 0 ; sentenceCount < sc ; sentenceCount++)
    {
      InputTree* par = trainingData[sentenceCount];
      //if(sentenceCount%50 == 1)
      //cerr << sentenceCount << endl;
      makeSent(par);
      gatherFfCounts(par,0);
      if(whichInt == TTCALC)
	{
	  list<InputTree*> dummy2;
	  InputTree stopInputTree(par->finish(),par->finish(),
				  whichInt==TTCALC ? "" : "^^",
				  "STOP","",
				  dummy2,NULL,NULL);
	  stopInputTree.headTree() = &stopInputTree;
	  TreeHist treeh(&stopInputTree,0);
	  treeh.hpos = 0;
	  callProcG(&treeh);
	}
    }
}
Ejemplo n.º 7
0
int
tree_grandparent_pos(TreeHist* treeh)
{
    static int stopint = 0;
    if(!stopint)
    {
        ECString stopnm("STOP");
        stopint = Term::get(stopnm)->toInt();
    }
    InputTree* tree = treeh->tree;
    InputTree* par1 = tree->parent();
    if(!par1) return stopint;
    InputTree* par = par1->parent();
    if(!par) return stopint;

    const ECString& trmStr  = par->hTag();
    const Term* trm = Term::get(trmStr);
    assert(trm);
    if(!trm->terminal_p())
    {
        cerr << "Bad head Part of Speech: " << *trm << " in " <<endl;
        cerr << *tree << endl;
        assert(trm->terminal_p());
    }
    return trm->toInt();
}
Ejemplo n.º 8
0
int
tree_noopenQl(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    int pos = treeh->pos;
    int hpos = treeh->hpos;
    InputTree  *subTree;
    InputTrees::reverse_iterator  subTreeIter = tree->subTrees().rbegin();
    int i = tree->subTrees().size()-1;
    bool sawOpen = false;
    bool sawClosed = false;

    for( ; ; subTreeIter++ )
    {
        if(i == pos) break;
        if(i > hpos) {
            i-- ;
            continue;
        }
        assert(i >= 0);
        subTree = *subTreeIter;
        const Term* trm = Term::get(subTree->term());

        if(trm->isClosed() && !sawOpen) sawOpen = true;
        else if(trm->isOpen() && sawOpen) sawOpen = false;

        i--;
    }
    if(sawOpen) return 0;
    else return 1;
}
Ejemplo n.º 9
0
void
UnitRules::
gatherData(InputTree* tree)
{
  const Term* trm = Term::get(tree->term());
  assert(trm);
  int parInt = trm->toInt();
  int rparI = parInt-( Term::lastTagInt() + 1);
  InputTreesIter iti = tree->subTrees().begin();
  int len = tree->subTrees().size();
  for( ; iti != tree->subTrees().end() ; iti++)
    {
      InputTree* stree = (*iti);
      if(len == 1)
	{
	  const Term* strm = Term::get(stree->term());
	  if(strm->terminal_p()) continue;
	  assert(strm);
	  int chiInt = strm->toInt();
	  if(chiInt == parInt) continue;
	  int rchiI = chiInt -( Term::lastTagInt() + 1);
	  treeData_[rparI][rchiI]++;
	  //cerr << "TD " << parInt<<" " << chiInt << " " << treeData_[rparI][rchiI] << endl;
	}
      gatherData(stree);
    }
}
Ejemplo n.º 10
0
int
tree_effEnd(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    int pos = tree->finish();
    bool ans;
    if(pos > endPos)
    {
        cout << "Pos > endPos" << endl;
        ans = 0;
    }
    else if(pos == endPos) ans = 1;
    else
    {
        ECString wrd = sentence[pos]->word();
        ECString trm = sentence[pos]->term();
        if(trm == "." || wrd == ";") ans = 1;
        else if((pos+2) > endPos) ans = 0;
        else if(wrd == ",")
        {
            if(sentence[pos+1]->word() == "''")
                ans = 1; // ,'' acts like end of sentence;
            else ans = 0;  //ans = 2 for alt version???
        }
        else ans = 0;
    }
    return ans;
}
Ejemplo n.º 11
0
int
tree_term(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    const ECString& trmStr  = tree->term();
    const Term* trm = Term::get(trmStr);
    assert(trm);
    return trm->toInt();
}
Ejemplo n.º 12
0
InputTree*
tree_parent_tree(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    InputTree* pt = tree->parent();
    if(!pt) return NULL;
    if(pt->headTree() == tree->headTree()) return NULL;
    return pt;
}
Ejemplo n.º 13
0
int
tree_size(TreeHist* treeh)
{
    static int bucs[9] = {1, 3, 6, 10, 15, 21, 28, 36, 999};
    InputTree* tree = treeh->tree;
    int sz = tree->finish() - tree->start();
    for(int i = 0 ; i < 9 ; i++)
        if(sz <= bucs[i]) return i;
    assert("Never get here");
    return -1;
}
Ejemplo n.º 14
0
int
tree_term_after(TreeHist* treeh)
{
    static int stopint = 0;
    if(!stopint)
    {
        ECString stopnm("STOP");
        stopint = Term::get(stopnm)->toInt();
    }
    InputTree* tree = treeh->tree;
    InputTree* par = tree->parent();
    if(!par) return stopint;
    InputTreesIter iti = par->subTrees().begin();
    for( ; iti != par->subTrees().end() ; iti++ )
    {
        InputTree* st = *iti;
        if(st != tree) continue;
        iti++;
        if(iti == par->subTrees().end()) return stopint;
        st = *iti;
        const ECString& trmStr  = st->term();
        const Term* trm = Term::get(trmStr);
        assert(trm);
        return trm->toInt();
    }
    error("Should never get here");
    return -1;
}
Ejemplo n.º 15
0
int
tree_pos(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    const ECString& trmStr  = tree->hTag();
    const Term* trm = Term::get(trmStr);
    assert(trm);
    if(!trm->terminal_p())
    {
        cerr << "Bad head Part of Speech: " << *trm << " in " <<endl;
        cerr << *tree << endl;
        assert(trm->terminal_p());
    }
    return trm->toInt();
}
Ejemplo n.º 16
0
int
tree_parent_term(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    static int s1int = 0;
    if(!s1int)
    {
        ECString s1nm("S1");
        s1int = Term::get(s1nm)->toInt();
    }
    InputTree* par = tree->parent();
    if(!par) return s1int;
    const ECString& trmStr  = par->term();
    const Term* trm = Term::get(trmStr);
    assert(trm);
    assert(!trm->terminal_p());
    return trm->toInt();
}
Ejemplo n.º 17
0
int
tree_ngram(TreeHist* treeh, int n, int l)
{
    static int stopTermInt = -1;
    if(stopTermInt < 0)
    {
        ECString stopStr("STOP");
        const Term* stopTerm = Term::get(stopStr);
        stopTermInt = stopTerm->toInt();
    }

    int pos = treeh->pos;
    int hp = treeh->hpos;
    int m = pos + (n * l);
    if(m < 0) return stopTermInt;
    InputTree* tree = treeh->tree;
    if(m >= tree->subTrees().size()) return stopTermInt;
    if(m > hp && l > 0) return stopTermInt;
    InputTree  *subTree;
    InputTreesIter  subTreeIter = tree->subTrees().begin();
    int i = 0;
    for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ )
    {
        if(i == m)
        {
            subTree = *subTreeIter;
            const Term* trm = Term::get(subTree->term());
            return trm->toInt();
        }
        i++;
    }
    assert("should never get here");
    return -1;
}
Ejemplo n.º 18
0
int
tree_ccparent_term(TreeHist* treeh)
{
    static int s1int = 0;
    if(!s1int)
    {
        ECString s1nm("S1");
        s1int = Term::get(s1nm)->toInt();
    }
    assert(treeh);
    InputTree* tree = treeh->tree;
    assert(tree);
    InputTree* par = tree->parent();
    if(!par) return s1int;
    const ECString& trmStr  = par->term();
    const Term* trm = Term::get(trmStr);
    assert(trm);
    int trmInt = trm->toInt();
    if(trmStr != tree->term()) return trmInt; //??? new;
    assert(!trm->terminal_p());
    int ccedtrmInt = ccIndFromTree(par);
    return ccedtrmInt;
}
Ejemplo n.º 19
0
InputTree*
tree_2rel_tree(TreeHist* treeh)
{
    //cerr << "t1r " << *treeh->tree << endl;
    int pos = treeh->pos;
    int hpos = treeh->hpos;
    if(pos == hpos || pos < hpos-1 || pos > hpos+1) return NULL;
    //cerr << "t2r " << *treeh->tree << endl;
    InputTree* sib;
    if(pos < hpos)
    {
        sib = tree_find(treeh, +1);
        int sibhp = headPosFromTree(sib);
        InputTree* sibch;
        if(sibhp > 0)
        {
            sibch = sib->subTrees().front();
        }
        else if(sib->subTrees().size() < 2) return NULL;
        else
        {
            InputTreesIter iti = sib->subTrees().begin();
            iti++;
            sibch = *iti;
        }
        return sibch;
    }
    else
    {
        sib = tree_find(treeh, -1);
        int sibhp = headPosFromTree(sib);
        InputTree* sibch;
        if(sibhp < sib->subTrees().size()-1)
        {
            sibch = sib->subTrees().back();
        }
        else if(sib->subTrees().size() < 2) return NULL;
        else
        {
            InputTrees::reverse_iterator iti = sib->subTrees().rbegin();
            iti++;
            sibch = *iti;
        }
        return sibch;
    }
}
Ejemplo n.º 20
0
InputTree*
tree_find(TreeHist* treeh, int n)
{
    int pos = treeh->pos;
    int hp = treeh->hpos;
    int m = pos + n;
    assert(m >= 0);
    InputTree* tree = treeh->tree;
    assert(!(m >= tree->subTrees().size()));
    InputTree  *subTree;
    InputTreesIter  subTreeIter = tree->subTrees().begin();
    int i = 0;
    for( ; subTreeIter != tree->subTrees().end() ; subTreeIter++ )
    {
        if(i == m)
        {
            subTree = *subTreeIter;
            return subTree;
        }
        i++;
    }
    assert("should never get here");
    return NULL;
}
Ejemplo n.º 21
0
int
tree_noopenQr(TreeHist* treeh)
{
    InputTree* tree = treeh->tree;
    int pos = treeh->pos;
    int sz = tree->subTrees().size();
    InputTree  *subTree;
    InputTreesIter  subTreeIter = tree->subTrees().begin();
    int i = 0;
    bool sawOpen = false;

    for( ; ; subTreeIter++ )
    {
        if(i == pos) break;
        subTree = *subTreeIter;
        assert(i < sz);
        const Term* trm = Term::get(subTree->term());
        if(trm->isOpen() && !sawOpen) sawOpen=true;
        if(trm->isClosed() && sawOpen ) sawOpen = false;
        i++;
    }
    if(sawOpen) return 0;
    else return 1;
}
Ejemplo n.º 22
0
/* the function called by each thread is "mainLoop" */
void*
mainLoop(void* arg)
{
  loopArg *loopA = (loopArg*)arg;
  istream* testSStream = loopA->inpt;
  ostream* pstatStream = loopA->outpt;
  int id = loopA->id;
  double log600 = log2(600.0);
  PrintStack printStack;
  for( ;  ; )
    {
      InputTree     correct;  
      InputTree*    cuse;

      /* first lock to read in the material */
      pthread_mutex_lock(&readlock);
      if( !*testSStream ) {
	pthread_mutex_unlock(&readlock);
	break;
      }
      *testSStream >> correct;
      if( !*testSStream ){
	pthread_mutex_unlock(&readlock);
	break;
      }
      totWords += correct.length()+1;
      int locCount = sentenceCount++;
      list<ECString>  wtList;
      correct.make(wtList);
      SentRep sr( wtList );  // used in precision calc

      ExtPos extPos;
      if(params.extPosIfstream)
	extPos.read(params.extPosIfstream,sr);
      pthread_mutex_unlock(&readlock);

      cuse = &correct;
      int len = correct.length();
      if(len > params.maxSentLen) continue;
      //cerr << "Len = " << len << endl;
      /*
	if( !params.field().in(sentenceCount) )
	{
	sentenceCount++;
	continue;
	}
	if(sentenceCount < -1)
	{
	sentenceCount++;
	continue;
	}
	sentenceCount++;
      */
      vector<ECString> poslist;
      correct.makePosList(poslist);
      ScoreTree sc;
      sc.setEquivInts(poslist);
      MeChart*	chart = new MeChart( sr,extPos,id );
       
      chart->parse( );
      Item* topS = chart->topS();
      if(!topS)
	{
	  cerr << "Parse failed" << endl;
	  cerr << correct << endl;
	  error(" could not parse "); 
	  delete chart;
	  continue;
	}
       
      // compute the outside probabilities on the items so that we can
      // skip doing detailed computations on the really bad ones 

      chart->set_Alphas();

      Bst& bst = chart->findMapParse();
      if( bst.empty()) error( "mapProbs did not return answer");
      float bestF = -1;
      int i;
      int numVersions = 0;
      Link diffs(0);
      //cerr << "Need num diff: " << Bchart::Nth << endl;
      printStruct printS;
      printS.sentenceCount = locCount;
      printS.numDiff = 0;
      for(numVersions = 0 ; ; numVersions++)
	{
	  short pos = 0;
	  Val* val = bst.next(numVersions);
	  if(!val)
	    {
	      //cerr << "Breaking" << endl;
	      break;
	    }
	  InputTree*  mapparse = inputTreeFromBsts(val,pos,sr);
	  bool isU;
	  int dummy = 0;
	  diffs.is_unique(mapparse, isU, dummy);
	  // cerr << "V " << isU << " " << numVersions << *mapparse << endl;
	  if(isU)
	    {
	      printS.probs.push_back(val->prob());
	      printS.trees.push_back(mapparse);
	      printS.numDiff++;
	    }
	  else
	    {
	      delete mapparse;
	    }
	  if(printS.numDiff >= Bchart::Nth) break;
	  if(numVersions > 20000) break;
	}

      ParseStats* locPst = new ParseStats[Bchart::Nth];
      ParseStats bestPs;
      for(i = 0 ; i <printS.numDiff ; i++)
	{
	  InputTree *mapparse = printS.trees[i];
	  assert(mapparse);
	  sc.trips.clear();
	  ParseStats pSt;
	  sc.recordGold(cuse,pSt);
	  sc.precisionRecall(mapparse,pSt);
	  float newF = pSt.fMeasure();
	  cerr << printS.sentenceCount << "\t" << newF << endl;
	  if(newF > bestF)
	    {
	      bestF = newF;
	      bestPs = pSt;
	    }
	  if(histPoints[i])
	    {
	      locPst[i] += bestPs;
	    }
	}
      if(printS.numDiff < Bchart::Nth)
	{
	  for(i = printS.numDiff ; i < Bchart::Nth ; i++)
	    {
	      if(histPoints[i]) locPst[i] += bestPs;
	    }
	}

      pthread_mutex_lock(&scorelock);
      for(i = 0 ; i < Bchart::Nth ; i++) totPst[i]+=locPst[i];
      pthread_mutex_unlock(&scorelock);

      int numPrinted;

      /* put the sentence with which we just finished at the end of the printStack*/
      printStack.push_back(printS);
      PrintStack::iterator psi = printStack.begin();
      /* now look at each item from the front of the print stack
	 to see if it should be printed now */
      pthread_mutex_lock(&writelock);
      for( numPrinted =0; psi != printStack.end(); numPrinted++ )
	{
	  printStruct& pstr=(*psi);
	  if(pstr.sentenceCount != printCount) break;
	  *pstatStream << pstr.sentenceCount << "\t" << pstr.numDiff << "\n";
	  printCount++;
	  for(i = 0 ; i < pstr.numDiff ; i++)
	    {
	      InputTree*  mapparse = pstr.trees[i];
	      assert(mapparse);
	      double logP =log2(pstr.probs[i]);
	      logP -= (sr.length()*log600);
	      *pstatStream <<  logP << "\n";
	      if(Bchart::prettyPrint) *pstatStream << *mapparse << "\n\n";
	      else
		{
		  mapparse->printproper(*pstatStream);
		  *pstatStream << "\n";
		}
	      delete mapparse;
	    }
	  *pstatStream << endl;
	  psi++;
	}
      pthread_mutex_unlock(&writelock);
      for(i = 0 ; i < numPrinted ; i++) printStack.pop_front();
      if(Feature::isLM)
	{
	  double lgram = log2(bst.sum());
	  lgram -= (sr.length()*log600);
	  double pgram = pow(2,lgram);
	  double iptri = chart->triGram();;
	  double ltri = (log2(iptri)-sr.length()*log600);
	  double ptri = pow(2.0,ltri);
	  double pcomb1 = (0.667 * pgram)+(0.333 * ptri);
	  double lcom1 = log2(pcomb1);
	  totGram -= lgram;
	  totTri -= ltri;
	  totMix -= lcom1;
	  if(locCount%10 == 9)
	    {
	      cerr << locCount << "\t";
	      cerr << pow(2.0,totGram/(double)totWords);
	      cerr <<"\t" <<  pow(2.0,totTri/(double)totWords);
	      cerr << "\t" << pow(2.0,totMix/(double)(totWords));
	      cerr << endl;
	    }
	}
      if(locCount%50 == 1)
	{
	  cerr << sentenceCount << "\t";
	  for(int i = 0 ; i < Bchart::Nth ; i++)
	    if(histPoints[i])
	      {
		cerr << i << " " << totPst[i].fMeasure() << "\t";
	      }
	  cerr << endl;
	}

      delete chart;
      delete [] locPst;
    }
  return 0;
}
Ejemplo n.º 23
0
int
main(int argc, char *argv[])
{
  ECArgs args( argc, argv );
  assert(args.nargs() == 1);
  ECString path(args.arg(0));
  cerr << "At start of pHsgt" << endl;

  for(int n = 0 ; n < MAXNUMNTS ; n++)
    numTerm[n] = 0;

  Term::init( path );
  readHeadInfo(path);

  int sentenceCount = 0;

  ECString s1lex("^^");
  ECString s1nm("S1");
  int s1Int = Term::get(s1nm)->toInt();
	
  UnitRules ur;
  ur.init();
  while(cin)
    {
      //if(sentenceCount > 4000) break;
      if(sentenceCount%10000 == 0) cerr << sentenceCount << endl;
      InputTree  parse;
      cin >> parse;
      //cerr << parse << endl;
      if(!cin) break;
      if(parse.length() == 0) break;
       EcSPairs wtList;
       parse.make(wtList); 
       InputTree* par;
       par = &parse;

      addWwData(par);
      incrWordData(s1Int, s1lex);
      ur.gatherData(par);
      sentenceCount++;
    }
  ECString resultsString(path);
  resultsString += "pSgT.txt";
  ofstream     resultsStream(resultsString.c_str());
  assert(resultsStream);

  int numWords = 0;
  resultsStream << "       \n";  //leave space for number of words;
  resultsStream.precision(3);
  ECString lastWord;
  int wordFreq = 0;
  WordMap::iterator wmi = wordMap.begin();
  resultsStream << wordMap.size() << "\n\n";
  for( ; wmi != wordMap.end() ; wmi++)
    {
      ECString w = (*wmi).first;
      resultsStream << w << "\t";
      PosD& posd = (*wmi).second;
      PosD::iterator pdi = posd.begin();
      int count = 0;
      for( ; pdi != posd.end(); pdi++)
	{
	  int posInt = (*pdi).first;
	  int c = (*pdi).second;
	  count += c;
	  float p = (float)c/(float)numTerm[posInt];
	  resultsStream << posInt << " " << p << " ";
	}
      resultsStream << "| " << count << "\n";
    }
  ur.setData(path);
  return 1;
}
Ejemplo n.º 24
0
int
tree_B(TreeHist* treeh, int blInd)
{
    InputTree* tree = treeh->tree;
    int i;
    int pos = treeh->pos;
    int hpos = treeh->hpos;
    //cerr << "tb1 " << pos << " " << hpos << " " << *tree << endl;
    int sz = tree->subTrees().size();
    int wpos;
    assert(pos <= sz);
    //cerr << "tb " << pos << " " << hpos << " " << sz << endl;
    if(pos < 0) wpos = tree->start()-1;
    else if(sz == 0) wpos = tree->start()-1;
    else if(pos == sz) wpos = tree->finish();
    else
    {
        InputTreesIter iti = tree->subTrees().begin();
        i = 0;
        for( ; iti != tree->subTrees().end() ; iti++)
        {
            if(i < pos) {
                i++;
                continue;
            }
            InputTree* st = *iti;
            if(pos < hpos) wpos = st->start()-1;
            else if(pos > hpos) wpos = st->finish();
            else if(blInd) wpos = st->start()-1;
            else wpos = st->finish();
            //cerr << "tbf " << *st << " " << wpos << endl;
            break;
        }
    }
    //cerr << "tb2 " << wpos << endl;
    assert(wpos <= endPos);
    if(wpos < 0 || wpos == endPos) return Term::stopTerm->toInt();
    else return Term::get(sentence[wpos]->term())->toInt();
}
Ejemplo n.º 25
0
int
main(int argc, char *argv[])
{
   struct rlimit 	core_limits;
   core_limits.rlim_cur = 0;
   core_limits.rlim_max = 0;
   setrlimit( RLIMIT_CORE, &core_limits );

   ECArgs args( argc, argv );
   assert(args.nargs() == 2);
   conditionedType = args.arg(0);
   cerr << "start trainRs: " << conditionedType << endl;

   ECString  path( args.arg( 1 ) );
   if(args.isset('L')) Feature::setLM();

   Term::init(path);
   readHeadInfo(path);

   Pst pst(path);
   if(Feature::isLM) ClassRule::readCRules(path);

   addSubFeatureFns();
   Feature::init(path, conditionedType); 

   whichInt = Feature::whichInt;
   int ceFunInt = Feature::conditionedFeatureInt[Feature::whichInt];
   Feature::conditionedEvent
     = SubFeature::Funs[ceFunInt];

   Feat::Usage = PARSE;
   ECString ftstr(path);
   ftstr += conditionedType;
   ftstr += ".g";
   ifstream fts(ftstr.c_str());
   if(!fts)
     {
       cerr << "Could not find " << ftstr << endl;
       assert(fts);
     }
   tRoot = new FeatureTree(fts); //puts it in root;

   cout.precision(3);
   cerr.precision(3);

   lamInit();

   InputTree* trainingData[1001];
   int usedCount = 0;
   sentenceCount = 0;
   for( ;  ; sentenceCount++)
     {
       if(sentenceCount%10000 == 1)
	 {
	   // cerr << conditionedType << ".tr "
	   //<< sentenceCount << endl;
	 }
       if(usedCount >= 1000) break;
       InputTree*     correct = new InputTree;  
       cin >> (*correct);
       if(correct->length() == 0) break;
       if(!cin) break;
       EcSPairs wtList;
       correct->make(wtList); 
       InputTree* par;
       par = correct;
       trainingData[usedCount++] = par;
     }
   if(Feature::isLM) pickLogBases(trainingData,sentenceCount);
   procGSwitch = true;
   for(pass = 0 ; pass < 10 ; pass++)
     {
       if(pass%2 == 1) cout << "Pass " << pass << endl;
       goThroughSents(trainingData, sentenceCount);
       updateLambdas();
       //printLambdas(cout);
       zeroData();
     }
   ECString resS(path);
   resS += conditionedType;
   resS += ".lambdas";
   ofstream res(resS.c_str());
   res.precision(3);
   printLambdas(res);
   printLambdas(cout);
   cout << "Total params = " << FeatureTree::totParams << endl;
   cout << "Done: " << (int)sbrk(0) << endl;
}
Ejemplo n.º 26
0
int
main(int argc, char *argv[])
{
   struct rlimit 	core_limits;
   core_limits.rlim_cur = 0;
   core_limits.rlim_max = 0;
   setrlimit( RLIMIT_CORE, &core_limits );

   ECArgs args( argc, argv );
   assert(args.nargs() == 2);
   if(args.isset('N')) numGram = atoi(args.value('N').c_str());
   Feature::setLM();
   if(args.isset('L')) Term::Language = args.value('L');
   string  path( args.arg( 1 ) );
   if(Term::Language == "Ch") readHeadInfoCh(path);
   else readHeadInfo(path);

   string  conditionedType( args.arg(0) );
   cerr << "start kn3Counts " <<  conditionedType << endl;
   int minCount = 1;
   if(args.isset('m')) minCount = atoi(args.value('m').c_str());
   Feat::Usage = KNCOUNTS;
   FeatureTree::minCount = minCount;

   Term::init(path);
   readHeadInfo(path);
   Pst pst(path);
   addSubFeatureFns();

   Feature::assignCalc(conditionedType);
       
   FeatureTree::root() = new FeatureTree();
   Feature::init(path, conditionedType);
   int wI = Feature::whichInt;
   int ceFunInt = Feature::conditionedFeatureInt[wI];

   Feature::conditionedEvent
     = SubFeature::Funs[ceFunInt];
   string trainingString( path );

   int sentenceCount = 0;
   for( ; ; sentenceCount++)
     {
       if(sentenceCount%10000 == 1)
	 {
	   cerr << "rCounts "
	     << sentenceCount << endl;
	 }
       InputTree     correct;  
       cin >> correct;
       //if(sentenceCount > 1000) break;
       if(correct.length() == 0) break;
       //cerr <<sentenceCount << correct << endl;
       EcSPairs wtList;
       correct.make(wtList); 
       InputTree* par;
       int strt = 0;
       par = &correct;

       makeSent(par);
       curS = par;
       gatherFfCounts(par, 0);
       if(wI == TTCALC || wI == WWCALC)
	 {
	   list<InputTree*> dummy2;
	   InputTree stopInputTree(par->finish(),par->finish(),
				   wI==TTCALC ? "" : "^^",
				   "STOP","",
				   dummy2,NULL,NULL);
	   stopInputTree.headTree() = &stopInputTree;
	   TreeHist treeh(&stopInputTree,0);
	   treeh.hpos = 0;
	   callProcG(&treeh);
	 }
     }
   finalProbComputation();
   string resS(path);
   resS += conditionedType;
   resS += ".g";
   ofstream res(resS.c_str());
   assert(res);
   FTreeMap& fts = FeatureTree::root()->subtree;
   FTreeMap::iterator fti = fts.begin();
   for( ; fti != fts.end() ; fti++)
     {
       int asVal = (*fti).first;
       (*fti).second->printFTree(asVal, res);
     }
   res.close();
   cout << "Tot words: " << totWords << endl;
   cout << "Total params for " << conditionedType << " = "
	<< FeatureTree::totParams << endl;
}
Ejemplo n.º 27
0
Item*
Bchart::
edgesFromTree(InputTree* tree)
{
  int b, b0;
  b0 = tree->num();
  const Term* trm = Term::get(tree->term());
  assert(trm);
  //cerr << "ARI " << *trm << " " << b0 << endl;
  if(printDebug() > 1005)
    cerr << "EFIE " << trm->name() << " " << b0 << endl;
  /* If this is a terminal node, the rhs will be a word; otherwise it
     will be a rule expansion consisting of several Item s.
   */
  if(trm->terminal_p())
    {
      ECString tmpW1 = tree->word();
      char chars[512];
      ECString tmpW = toLower(tmpW1.c_str(), chars);
      
      int wInt = wtoInt(tmpW);
      Item* lhs = add_item(b0, trm, tree->start());
      lhs->start() = tree->start();
      lhs->finish() = tree->finish();
      Item* rhs = add_item2(b0, trm, wInt,tmpW);
      rhs->finish() = tree->finish();
      rhs->start() = tree->start();
      if(!lhs && !rhs)
	{
	  return NULL;
	}

      Items subItems;
      subItems.push_back(stops[tree->start()]);
      subItems.push_back(rhs);
      subItems.push_back(stops[tree->finish()]);
      Edge* edg = add_edge(lhs, subItems);
      if(!edg)
	{
	  return NULL;
	}
      edg->prob() = pHst(wInt,trm->toInt());
      edg->num() = b0;
      if(printDebug() > 5)
	cerr << "LHS " << *lhs << " " << tmpW  << edg->prob() << endl;
	  
      return lhs;
    }
  else
    {
      Item* lhs = add_item(b0, trm, -1);
      lhs->start() = tree->start();
      lhs->finish() = tree->finish();
      assert(lhs);
      Items subItems;
      subItems.push_back(stops[tree->start()]);
      InputTreesIter iti = tree->subTrees().begin();
      for( ; iti != tree->subTrees().end() ; iti++)
	{
	  InputTree* stree = (*iti);
	  cerr << "WBA "<< stree->term() << *stree   << endl;
	  Item* itm = edgesFromTree(stree);
	  if(!itm)
	    {
	      return NULL;
	    }
	  subItems.push_back(itm);
	}
      subItems.push_back(stops[tree->finish()]);
      Edge* edg = add_edge(lhs, subItems);
      if(!edg)
	{
	  return false;
	}
      edg->num() = b0;
      assignRProb(edg);
      if (printDebug() > 5)
	{
	  cerr << "Saw edge " << *edg << ": p=" << edg->prob() << endl;
	}
      //cerr << "endeFE " << *edg << endl;
      return lhs;
      rPendFactor();
    }
}
Ejemplo n.º 28
0
int
main(int argc, char *argv[])
{
  ECArgs args( argc, argv );
  ECString path(args.arg(0));
  cerr << "At start of pUgT" << endl;

  Term::init( path );  
  if(args.isset('L')) Term::Language = args.value('L');
  readHeadInfo(path);
  Pst pst(path);

  int sentenceCount = 0;

  int i, j;
  for(i = 0 ; i < MAXNUMTS ; i++)
    {
      posCounts[i] = 0;
      posCapCounts[i] = 0;
      posDenoms[i] = 0;
      posUCounts[i] = 0;
      posDashCounts[i] = 0;
    }
  for(i = 0 ; i < MAXNUMTS ; i++) totCounts[i] = 0;

  i = 0;
  for( ; ; )
    {
      if(i++%10000 == 1) cerr << i << endl;
      //if(i > 1000) break;
      InputTree  parse;
      cin >> parse;
      //cerr << parse << endl;
      if(parse.length() == 0) break;
      if(!cin) break;
      curSent = &parse;
      addWwData(&parse);
      sentenceCount++;
    }

  ECString resultsString(path);
  resultsString += "pUgT.txt";
  ofstream     resultsStream(resultsString.c_str());
  assert(resultsStream);
  /* we print out p(unknown|tag)    p(Capital|tag)   p(hasDash|tag, unknown)
     note for Capital the denom is different because we ignore the first
     two words of the sentence */
  int nm = Term::lastTagInt()+1;
  for(i = 0 ; i < nm ; i++)
    {
      resultsStream << i << "\t";
      float pugt = 0;
      float pudenom = (float)posDenoms[i];
      if(pudenom > 0) pugt = (float)posUCounts[i]/pudenom;
      resultsStream << pugt << "\t";
      if(posCounts[i] == 0) resultsStream << 0 << "\t";
      else
	resultsStream << (float) posCapCounts[i]/ (float)posCounts[i] << "\t";
      if(posUCounts[i] == 0) resultsStream << 0;
      else resultsStream << (float)posDashCounts[i]/posUCounts[i] ;
      resultsStream << endl;
    }
  ECString resultsString2(path);
  resultsString2 += "nttCounts.txt";
  ofstream     resultsStream2(resultsString2.c_str());
  assert(resultsStream2);
  for(i = 0 ; i <= Term::lastNTInt() ; i++)
    {
      resultsStream2 << i << "\t";
      resultsStream2 << totCounts[i] << "\n";
    }
  return 0;
}
Ejemplo n.º 29
0
int
main(int argc, char *argv[])
{
  ECArgs args( argc, argv );
  assert(args.nargs() == 1);
  ECString path(args.arg(0));
  cerr << "At start of pSfgt" << endl;

  for(int n = 0 ; n < 140 ; n++)
    numTerm[n] = 0;

  ECString resultsString(path);
  resultsString += "endings.txt";

  Term::init( path );
  if(args.isset('L')) Term::Language = args.value('L');
  readHeadInfo(path);
  Pst pst(path); //???;

  int sentenceCount = 0;
  int wordCount = 0;
  int processedCount = 0;

  /*int i, j;
  for(i = 0 ; i < 60 ; i++)
    for(j = 0 ; j < 30 ; j++)
      data[i][j] = 0;
  */
  int i = 0;
  while(cin)
    {
      if(i++%5000 == 1) cerr << i << endl;
      InputTree  parse;
      cin >> parse;
      if(!cin) break;
      if(parse.length() == 0 && cin) continue;
      if(parse.length()==0 ||!cin) break;
      addWwData(&parse);
      processedCount++;
      wordCount += parse.length();
    }
  ofstream     resultsStream(resultsString.c_str());
  assert(resultsStream);
  /*int  totNt[30];
  for(i = 0 ; i < 30 ; i++) totNt[i] = 0;
  for(i = 0 ; i <= Term::lastTagInt() ; i++)
    {
      for(j = 0 ; j < (Term::lastNTInt() - Term::lastTagInt()) ; j++)
	totNt[j] += data[i][j];
    }
    */
  resultsStream << numEndings << "\n";

  for(i = 0 ; i < 140 ; i++)
    {
      endMap::iterator emi = endData[i].begin();
      for( ; emi != endData[i].end() ; emi++)
	{
	  ECString ending = (*emi).first;
	  int cnt = (*emi).second;
	  resultsStream << i << "\t" << ending << "\t"
			<< (float) cnt / (float) numTerm[i]
			<< endl;
	    //<< "\n";

	}
    }
  cout<<"totol sentence:"<<processedCount<<endl;
  cout<<"total suffix:"<<numEndings<<endl;

  return 0;
}
Ejemplo n.º 30
0
int
main(int argc, char *argv[])
{
  ECArgs args( argc, argv );
  assert(args.nargs() == 1);
  ECString path(args.arg(0));
  cerr << "At start of pTgNt" << endl;

  for(int n = 0 ; n < MAXNUMTS ; n++)
    numTerm[n] = 0;

  ECString resultsString(path);
  resultsString += "endings.txt";

  Term::init( path );  
  if(args.isset('L')) Term::Language = args.value('L');
  readHeadInfo(path);
  Pst pst(path);

  int sentenceCount = 0;
  int wordCount = 0;
  int processedCount = 0;

  int i, j;
  for(i = 0 ; i < MAXNUMTS ; i++)
    for(j = 0 ; j < MAXNUMNTS ; j++)
      data[i][j] = 0;

  i = 0;
  while(cin)
    {
      if(i%10000 == 0) cerr << i << endl;
      //if(i > 1000) break;
      InputTree  parse;
      cin >> parse;
      if(!cin) break;
      if(parse.length() == 0) break;
      const Term* resTerm = addWwData(&parse);
      processedCount++;
      wordCount += parse.length();
      i++;
    }
  ofstream     resultsStream(resultsString.c_str());
  assert(resultsStream);
  int  totNt[MAXNUMTS];
  for(i = 0 ; i < MAXNUMTS ; i++) totNt[i] = 0;
  for(i = 0 ; i <= Term::lastTagInt() ; i++)
    {
      for(j = 0 ; j < (Term::lastNTInt() - Term::lastTagInt()) ; j++)
	totNt[j] += data[i][j];
    }
  resultsStream << numEndings << "\n";
  for(i = 0 ; i < MAXNUMTS ; i++)
    {
      endMap::iterator emi = endData[i].begin();
      for( ; emi != endData[i].end() ; emi++)
	{
	  ECString ending = (*emi).first;
	  int cnt = (*emi).second;
	  resultsStream << i << "\t" << ending << "\t"
			<< (float) cnt / (float) numTerm[i]
			<< endl;
	    //<< "\n";
	}
    }
  return 0;
}