Пример #1
0
int main(int argc, char** argv)
{
   FTAGArgs args;
   args.getArgs(argc, argv);
   srand48(args._seed);
   cout << "SEED=" << args._seed << endl;

   ofstream ofile(args._outFile.c_str());
   if (!ofile)
   {
      cerr << "Specifiy output file with ofile=<path>" << endl;
      exit(1);
   }

   ofstream pfile(args._fpFileOut.c_str());

   AxtReader ar;

   try
   {
      ar.read(args._inFile);
      
      const vector<AxtPair>& wholeThing = ar.getAlignments();
      Pairs allAlignments;
      for (size_t i = 0; i < wholeThing.size(); ++i)
      {
         allAlignments.push_back(wholeThing[i].getAlignment());
      }
      Pairs alignments = ar.sample(args._numPairs, args._maxLength / 2, 
                                   args._maxLength, 0, false);

      FTAGParams parAll = estParams(allAlignments, args._symmetric);
      FTAGParams parInput = estParams(alignments, args._symmetric);

      cout << "From Whole File: " << parAll << endl;
      cout << "From input: " << parInput << endl;

      ofile << alignments;

      if (pfile.is_open())
      {
         pfile << parInput;
      }
   }
   catch(string message)
   {
      cerr << message << endl;
      return 1;
   }

   return 0;
}
Пример #2
0
int main(int argc, char** argv)
{
   FTAGArgs args;
   args.getArgs(argc, argv);
   srand48(args._seed);
   cout << "SEED=" << args._seed << endl;

   FTAGParams params;
   ifstream pfile(args._fpFile.c_str());
   if (!pfile)
   {
      cout << "Specify params with fpfile=<path>" << endl;
   }
   pfile >> params;

   ContextModel em;
   em.setDoubleJC(params);
   TransitionModel tm;
   tm.setSimplified(params);

   FTAGGen sGen;
   sGen.setEmissionModel(em);
   sGen.setTransitionModel(tm);
      
   string a,b;
   deque<Trace> trace;
   sGen.genAlignment(a, b, trace, true, 10000);
   
   if (drand48() < args._flipProb)
   {
      reverse(a.begin(), a.end());
      reverse(b.begin(), b.end());
   }
   
   cout << a << endl << b << endl;
//   copy(trace.begin(), trace.end(), ostream_iterator<Trace>(cout, "\n"));
   for (deque<Trace>::iterator i = trace.begin(); i != trace.end(); ++i)
      cout << *i << endl;
      
   cout << trace.size() << " state transitions used" << endl;
   
   return 0;
}
Пример #3
0
int main(int argc, char** argv)
{
    FTAGArgs args;
    args.getArgs(argc, argv);

    srand48(args._seed);
    cout << "SEED=" << args._seed << endl;

    FTAGParams params;
    try
    {
        ifstream pfile(args._fpFile.c_str());
        if (!pfile)
        {
            cout << "Specify params with fpfile=<path>" << endl;
        }
        pfile >> params;
    }
    catch(string message)
    {
        cout << message << endl;
        return 1;
    }

    ContextModel em;
    em.setDoubleJC(params);
    TransitionModel tm;
    tm.setSimplified(params);

    AGDOptimizer optimizer;
    optimizer.setFixed(params);
    optimizer.setModels(em, tm);

    FTAGParams offset(params);
    offset.setAll(args._offset);
    FTAGParams parSeed(params);
    parSeed.setSymmetric(args._symmetric);
    parSeed.randomizeNonFixed();

    optimizer.setSavedInit(parSeed);
    optimizer.setRelative(args._relative);
    optimizer.setRandomOrder(args._randOrder);
    optimizer.setLoopParams(args._numOptTrials, args._maxOptIt,
                            args._optThreshold, offset);

    double mse = optimizer.optimize();

    double mymse = params.mse(optimizer.getParams());

    TransitionModel tmEst;
    tmEst.setSimplified(optimizer.getParams());
    cout << "ORIGINAL\n" << tm << "\nESTIMATED\n" << tmEst << endl << endl;

    cout << "ORIGINAL\n" << params
         << "\nNEW\n" << optimizer.getParams()
         << "\nDELTA\n" << params - optimizer.getParams()
         << endl;
    cout << "MSE=" << mse << endl;


    return 0;
}
Пример #4
0
int main(int argc, char** argv)
{
   FTAGArgs args;
   args.getArgs(argc, argv);
   
   srand48(args._seed);
   cout << "SEED=" << args._seed << endl;

   ofstream logFile;
   FTAGTrain::EMTrace emTrace;
   if (!args._outFile.empty())
   {
      logFile.open(args._outFile.c_str());
   }

   ofstream distFile;
   vector<size_t> dist, dist_nc;
   if (!args._distFile.empty())
   {
      distFile.open(args._distFile.c_str());
      dist = vector<size_t>(args._maxLength, 0);
      dist_nc = vector<size_t>(args._maxLength, 0);
   }

   AxtReader ar;
   FTAGParams parSeed;
   
   try
   {
      ar.read(args._inFile);
      ifstream pfile(args._fpFile.c_str());
      if (!pfile)
      {
         cout << "Specify params with fpfile=<path>" << endl;
      }
      pfile >> parSeed;
   }
   catch(string message)
   {
      cout << message << endl;
      return 1;
   }
   if (args._context == false)
   {
      parSeed.setRMD(0.);
      parSeed.setRMI(0.);
      parSeed.setPCD(1.);
      parSeed.setPCI(1.);
      parSeed.setRMDFixed(true);
      parSeed.setRMIFixed(true);
      parSeed.setPCDFixed(true);
      parSeed.setPCIFixed(true);
   }


//   vector<pair<string, string> > alignments = filter(ar.getAlignments(),
//                                                     minLength, maxLength);
   const std::vector<AxtPair>& wholeThing = ar.getAlignments();
   vector<pair<string, string> > allAlignments;
   for (size_t i = 0; i < wholeThing.size(); ++i)
   {
      allAlignments.push_back(wholeThing[i].getAlignment());
   }
   vector<pair<string, string> > alignments 
      = ar.sample(args._numPairs, args._maxLength / 2, args._maxLength, 0, 
                  false);

   for (size_t i = 0; i < alignments.size(); ++i)
   {
      cout << i << "a) " << alignments[i].first << endl
           << i << "b) " << alignments[i].second << endl;
   }
   FTAGParams parAll = estParams(allAlignments);
   FTAGParams parInput = estParams(alignments);
   ContextModel emInput;
   emInput.setDoubleJC(parInput);
   TransitionModel tmInput;
   tmInput.setSimplified(parInput);

   parSeed.setSymmetric(args._symmetric);
   parSeed.setUniGap(args._unigap);
   parSeed.randomizeNonFixed();

   cout << "From Whole File: " << parAll << endl;
   cout << "From input: " << parInput << endl;
   cout << "Random Seed: " << parSeed << endl;

   FTAGParams offset;
   offset.setAll(args._offset);

   AGDOptimizer optimizer;
   optimizer.setRelative(args._relative);
   optimizer.setSavedInit(parSeed);
   optimizer.setLoopParams(args._numOptTrials, args._maxOptIt, 
                           args._optThreshold, offset);

   EmissionEstimator ee;
   TransitionEstimator te;
   ee.setBias(args._emBias);
   te.setBias(args._tmBias);

   FTAGTrain* trainer = new FTAGTrain();
   trainer->initialize(optimizer, ee, te);
   trainer->setSequences(alignments, args._winSize);
   trainer->emLoop(args._maxEMIt, args._emThreshold, args._emConvRepeats,
                   args._emCRThreshold, &emTrace);

   FTAGParams params = trainer->getParams();

   TransitionModel tm;
   ContextModel em;
   tm.setSimplified(params);
   em.setDoubleJC(params);

   delete trainer;
   trainer = NULL;
   double pr = 0.;
   double prInput = 0.;
   double tdiff = 0;
   size_t tlength = 0;

   for (size_t i = 0; i < alignments.size(); ++i)
   {
      FTAGModel ftag;
      ftag.setSequences(alignments[i].first, alignments[i].second,
                        args._winSize, args._winSize);

      ftag.setTransitionModel(tm);
      ftag.setEmissionModel(em);
      pr += log(ftag.forward());
      string a,b;
      deque<Trace> trace;
      ftag.viterbi();
      ftag.viterbiTrace(a, b, trace);
      cout << i << "a) " << a << endl 
           << i << "b) " << b << endl;
      AxtPair apEstimated(a, b);
      AxtPair apOriginal(alignments[i].first, alignments[i].second);
      size_t adiff = apOriginal.diff(apEstimated);

      ftag.setTransitionModel(tmInput);
      ftag.setEmissionModel(emInput);
      prInput += log(ftag.forward());

      tdiff += adiff;
      tlength += alignments[i].first.length();

      makeLenDistSimpleSymmetric(trace, dist, dist_nc);
   }

   FTAGParams parDelta = parInput - params;
   parDelta = parDelta.abs();
   double parMSE = params.mse(parInput);

   cout << "Pr Input= " << prInput << "\nPar_Input= " << parInput << endl
        << "Par_Delta= " << parDelta << "\nPar_MSE= " << parMSE << endl
        << "Num Alignments=" << alignments.size() 
        << " Length=[" << args._maxLength/2 << ", " << args._maxLength << "]"
        << " Pr = " << pr  << " tdiff = " << tdiff
        << "\nParamEst =\n" << params << endl;

   if (logFile)
      makeEmGraph(emTrace, prInput, params, tlength, logFile);

   if (distFile)
   {
      distFile << "len  freq  tot  freqNC  totNC\n";
      for (size_t i = 1; i < dist.size(); ++i)
      {
         distFile << i << "  " << dist[i] << "  " << dist[0] << "  "
                  << dist_nc[i] << "  " << dist_nc[0] << "\n";
      }
   }
   return 0;
}