Пример #1
0
void
Feature::
createLam(int which, ECString tmp, ECString path)
{
  ECString npath("/ltmp/discrim/FOLDS/");
  int b,f;
  int tot = Feature::total[which];
  for(int i = 0 ; i < 20 ; i++)
    {
      ECString ftstr(npath);
      ftstr += intToString(i) + "/";
      ftstr += tmp;
      ftstr += ".lambdas";
      ifstream fts(ftstr.c_str());
      assert(fts);
      for(b = 1; b < 15 ; b++)
	{
	  int bb;
	  if(!fts)
	    {
	      cerr << "Trouble reading lambs for " << which << " in " << ftstr
		   << endl;
	      assert(fts);
	    }
	  fts >> bb ;
	  //cerr << bb << endl;
	  if(bb != b)
	    {
	      cerr << tmp << " " << b << " " << bb << endl;
	      assert(bb == b);
	    }
	  for(f = 2; f <= tot ; f++)
	    {
	      float lam;
	      assert(fts);
	      fts >> lam;
	      //cerr << which << " " << f << " " << b << " " << lam << endl;
	      if(i == 0) Feature::setLambda(which,f,b,lam);
	      else lamVal(which,f,b) += lam;
	    }
	}
    }
  for(b = 1; b < 15 ; b++)
    for(f = 2; f <= tot ; f++) lamVal(which,f,b) /= 20.0;
  
  ECString ftstr(path);
  ftstr += tmp;
  ftstr += ".lambdas";
  ofstream res(ftstr.c_str());
  assert(res);
  res.precision(3);
  printLambdas(res);
} 
Пример #2
0
void
Feature::
readLam(int which, ECString tmp, ECString path)
{
  ECString ftstr(path);
  ftstr += tmp;
  ftstr += ".lambdas";
  ifstream fts(ftstr.c_str());
  assert(fts);
  int b,f;
  // wul: lambdas文件有14行,每行的格式如下(第一列为序号):
  // wul: 2	0	0.329	0.336	0.00178	0.353	0.00129	0.158	0.417
  for(b = 1; b < 15 ; b++)
     {
       int bb;
       assert(fts);
       fts >> bb ;
       //cerr << bb << endl;
       assert(bb == b);
       // wul: 读取后面的各列(不同的Which,如ru, u...)有不同的列。根据它们的Feature数目
       // 而定
       for(f = 2; f <= Feature::total[which] ; f++)
	 {
	   float lam;
	   assert(fts);
	   fts >> lam;
	   //cerr << which << " " << f << " " << b << " " << lam << endl;
           // wul: Lambda Table有点数据库。在此记录下。b表示一个实例(行号)
	   Feature::setLambda(which,f,b,lam);
	 }
     }
} 
Пример #3
0
void
MeChart::
init(ECString path)
{
  Feat::Usage = PARSE;
  addEdgeSubFeatureFns();
  addSubFeatureFns();

  ECString tmpA[MAXNUMCALCS] = {"r","h","u","m","l","lm","ru","rm","tt",
				"s","t","ww","dummy","dummy","dummy"};

  for(int which = 0 ; which < Feature::numCalcs ; which++)
    {
      ECString tmp = tmpA[which];
      Feature::init(path, tmp); 
      if(tmp == "s" || tmp == "t") continue;
      ECString ftstr(path);
      ftstr += tmp;
      ftstr += ".g";
      ifstream fts(ftstr.c_str());
      if(!fts) cerr << "could not find " << ftstr << endl;
      assert(fts);
      FeatureTree* ft = new FeatureTree(fts); //puts it in root;
      if(tmp == "ww") continue;
      Feature::readLam(which, tmp, path);
    }
  int cntxSzReq = Feature::total[TCALC];
  int scSz = Feature::total[SCALC];
  if(scSz > cntxSzReq) cntxSzReq = scSz;
  //assert(CntxArray::sz == cntxSzReq);
  //assert(CntxArray::sz == (Feature::total[UCALC] -1));
} 
Пример #4
0
void
Feature::
readLam(int which, ECString tmp, ECString path)
{
  ECString ftstr(path);
  ftstr += tmp;
  ftstr += ".lambdas";
  ifstream fts(ftstr.c_str());
  assert(fts);
  int b,f;
  int tot = Feature::total[which];
  
    /* The standard training programs never read in lambdas.  Only
     getProbs does, and it uses the new bucketing, which uses the
     next for loop
  */

  for(f = 2 ; f <= tot ; f++)
    {
      float logBase;
      
      fts >> logBase;

      /*JT: this used to be  logFacs[f][which] = 1.0/log(logBase), but that
	occurs in bucket
       */
      logFacs[which][f] = logBase;  
    }
  
   
  for(b = 1; b < 15 ; b++)
     {
       int bb;
       if(!fts)
	 {
	   cerr << "Trouble reading lambs for " << which << " in " << ftstr
		<< endl;
	   assert(fts);
	 }
       fts >> bb ;
       //cerr << bb << endl;
       if(bb != b)
	 {
	   cerr << tmp << " " << b << " " << bb << endl;
	   assert(bb == b);
	 }
       for(f = 2; f <= tot ; f++)
	 {
	   float lam;
	   assert(fts);
	   fts >> lam;
	   //cerr << which << " " << f << " " << b << " " << lam << endl;
	   Feature::setLambda(which,f,b,lam);
	 }
     }
} 
Пример #5
0
void
Feature::
readLam(int which, ECString tmp, ECString path)
{
  ECString ftstr(path);
  ftstr += tmp;
  ftstr += ".lambdas";
  ifstream fts(ftstr.c_str());
  assert(fts);
  int b,f;
  int tot = Feature::total[which];

  if(Feature::isLM or Feature::useExtraConditioning)
    {
      /* This for loop is removed for old bucketing; */
      for(f = 2 ; f <= tot ; f++)
	{
	  float logBase;
	  fts >> logBase;
	  logFacs[which][f] = 1.0/log(logBase);  
	}
    }
  for(b = 1; b < 15 ; b++)
     {
       int bb;
       if(!fts)
	 {
	   cerr << "Trouble reading lambs for " << which << " in " << ftstr
		<< endl;
	   assert(fts);
	 }
       fts >> bb ;
       //cerr << bb << endl;
       if(bb != b)
	 {
	   cerr << tmp << " " << b << " " << bb << endl;
	   assert(bb == b);
	 }
       for(f = 2; f <= tot ; f++)
	 {
	   float lam;
	   assert(fts);
	   fts >> lam;
	   //cerr << which << " " << f << " " << b << " " << lam << endl;
	   Feature::setLambda(which,f,b,lam);
	 }
     }
} 
Пример #6
0
int
main(int argc, char *argv[])
{
   struct rlimit 	core_limits;
   core_limits.rlim_cur = 0;
   core_limits.rlim_max = 0;
   setrlimit( RLIMIT_CORE, &core_limits );

   ECArgs args( argc, argv );
   assert(args.nargs() == 2);
   conditionedType = args.arg(0);
   cerr << "start trainRs: " << conditionedType << endl;

   ECString  path( args.arg( 1 ) );
   if(args.isset('L')) Feature::setLM();

   Term::init(path);
   readHeadInfo(path);

   Pst pst(path);
   if(Feature::isLM) ClassRule::readCRules(path);

   addSubFeatureFns();
   Feature::init(path, conditionedType); 

   whichInt = Feature::whichInt;
   int ceFunInt = Feature::conditionedFeatureInt[Feature::whichInt];
   Feature::conditionedEvent
     = SubFeature::Funs[ceFunInt];

   Feat::Usage = PARSE;
   ECString ftstr(path);
   ftstr += conditionedType;
   ftstr += ".g";
   ifstream fts(ftstr.c_str());
   if(!fts)
     {
       cerr << "Could not find " << ftstr << endl;
       assert(fts);
     }
   tRoot = new FeatureTree(fts); //puts it in root;

   cout.precision(3);
   cerr.precision(3);

   lamInit();

   InputTree* trainingData[1001];
   int usedCount = 0;
   sentenceCount = 0;
   for( ;  ; sentenceCount++)
     {
       if(sentenceCount%10000 == 1)
	 {
	   // cerr << conditionedType << ".tr "
	   //<< sentenceCount << endl;
	 }
       if(usedCount >= 1000) break;
       InputTree*     correct = new InputTree;  
       cin >> (*correct);
       if(correct->length() == 0) break;
       if(!cin) break;
       EcSPairs wtList;
       correct->make(wtList); 
       InputTree* par;
       par = correct;
       trainingData[usedCount++] = par;
     }
   if(Feature::isLM) pickLogBases(trainingData,sentenceCount);
   procGSwitch = true;
   for(pass = 0 ; pass < 10 ; pass++)
     {
       if(pass%2 == 1) cout << "Pass " << pass << endl;
       goThroughSents(trainingData, sentenceCount);
       updateLambdas();
       //printLambdas(cout);
       zeroData();
     }
   ECString resS(path);
   resS += conditionedType;
   resS += ".lambdas";
   ofstream res(resS.c_str());
   res.precision(3);
   printLambdas(res);
   printLambdas(cout);
   cout << "Total params = " << FeatureTree::totParams << endl;
   cout << "Done: " << (int)sbrk(0) << endl;
}