int main(int argc, char** argv) { FTAGArgs args; args.getArgs(argc, argv); srand48(args._seed); cout << "SEED=" << args._seed << endl; ofstream ofile(args._outFile.c_str()); if (!ofile) { cerr << "Specifiy output file with ofile=<path>" << endl; exit(1); } ofstream pfile(args._fpFileOut.c_str()); AxtReader ar; try { ar.read(args._inFile); const vector<AxtPair>& wholeThing = ar.getAlignments(); Pairs allAlignments; for (size_t i = 0; i < wholeThing.size(); ++i) { allAlignments.push_back(wholeThing[i].getAlignment()); } Pairs alignments = ar.sample(args._numPairs, args._maxLength / 2, args._maxLength, 0, false); FTAGParams parAll = estParams(allAlignments, args._symmetric); FTAGParams parInput = estParams(alignments, args._symmetric); cout << "From Whole File: " << parAll << endl; cout << "From input: " << parInput << endl; ofile << alignments; if (pfile.is_open()) { pfile << parInput; } } catch(string message) { cerr << message << endl; return 1; } return 0; }
int main(int argc, char** argv) { FTAGArgs args; args.getArgs(argc, argv); srand48(args._seed); cout << "SEED=" << args._seed << endl; FTAGParams params; ifstream pfile(args._fpFile.c_str()); if (!pfile) { cout << "Specify params with fpfile=<path>" << endl; } pfile >> params; ContextModel em; em.setDoubleJC(params); TransitionModel tm; tm.setSimplified(params); FTAGGen sGen; sGen.setEmissionModel(em); sGen.setTransitionModel(tm); string a,b; deque<Trace> trace; sGen.genAlignment(a, b, trace, true, 10000); if (drand48() < args._flipProb) { reverse(a.begin(), a.end()); reverse(b.begin(), b.end()); } cout << a << endl << b << endl; // copy(trace.begin(), trace.end(), ostream_iterator<Trace>(cout, "\n")); for (deque<Trace>::iterator i = trace.begin(); i != trace.end(); ++i) cout << *i << endl; cout << trace.size() << " state transitions used" << endl; return 0; }
int main(int argc, char** argv) { FTAGArgs args; args.getArgs(argc, argv); srand48(args._seed); cout << "SEED=" << args._seed << endl; FTAGParams params; try { ifstream pfile(args._fpFile.c_str()); if (!pfile) { cout << "Specify params with fpfile=<path>" << endl; } pfile >> params; } catch(string message) { cout << message << endl; return 1; } ContextModel em; em.setDoubleJC(params); TransitionModel tm; tm.setSimplified(params); AGDOptimizer optimizer; optimizer.setFixed(params); optimizer.setModels(em, tm); FTAGParams offset(params); offset.setAll(args._offset); FTAGParams parSeed(params); parSeed.setSymmetric(args._symmetric); parSeed.randomizeNonFixed(); optimizer.setSavedInit(parSeed); optimizer.setRelative(args._relative); optimizer.setRandomOrder(args._randOrder); optimizer.setLoopParams(args._numOptTrials, args._maxOptIt, args._optThreshold, offset); double mse = optimizer.optimize(); double mymse = params.mse(optimizer.getParams()); TransitionModel tmEst; tmEst.setSimplified(optimizer.getParams()); cout << "ORIGINAL\n" << tm << "\nESTIMATED\n" << tmEst << endl << endl; cout << "ORIGINAL\n" << params << "\nNEW\n" << optimizer.getParams() << "\nDELTA\n" << params - optimizer.getParams() << endl; cout << "MSE=" << mse << endl; return 0; }
int main(int argc, char** argv) { FTAGArgs args; args.getArgs(argc, argv); srand48(args._seed); cout << "SEED=" << args._seed << endl; ofstream logFile; FTAGTrain::EMTrace emTrace; if (!args._outFile.empty()) { logFile.open(args._outFile.c_str()); } ofstream distFile; vector<size_t> dist, dist_nc; if (!args._distFile.empty()) { distFile.open(args._distFile.c_str()); dist = vector<size_t>(args._maxLength, 0); dist_nc = vector<size_t>(args._maxLength, 0); } AxtReader ar; FTAGParams parSeed; try { ar.read(args._inFile); ifstream pfile(args._fpFile.c_str()); if (!pfile) { cout << "Specify params with fpfile=<path>" << endl; } pfile >> parSeed; } catch(string message) { cout << message << endl; return 1; } if (args._context == false) { parSeed.setRMD(0.); parSeed.setRMI(0.); parSeed.setPCD(1.); parSeed.setPCI(1.); parSeed.setRMDFixed(true); parSeed.setRMIFixed(true); parSeed.setPCDFixed(true); parSeed.setPCIFixed(true); } // vector<pair<string, string> > alignments = filter(ar.getAlignments(), // minLength, maxLength); const std::vector<AxtPair>& wholeThing = ar.getAlignments(); vector<pair<string, string> > allAlignments; for (size_t i = 0; i < wholeThing.size(); ++i) { allAlignments.push_back(wholeThing[i].getAlignment()); } vector<pair<string, string> > alignments = ar.sample(args._numPairs, args._maxLength / 2, args._maxLength, 0, false); for (size_t i = 0; i < alignments.size(); ++i) { cout << i << "a) " << alignments[i].first << endl << i << "b) " << alignments[i].second << endl; } FTAGParams parAll = estParams(allAlignments); FTAGParams parInput = estParams(alignments); ContextModel emInput; emInput.setDoubleJC(parInput); TransitionModel tmInput; tmInput.setSimplified(parInput); parSeed.setSymmetric(args._symmetric); parSeed.setUniGap(args._unigap); parSeed.randomizeNonFixed(); cout << "From Whole File: " << parAll << endl; cout << "From input: " << parInput << endl; cout << "Random Seed: " << parSeed << endl; FTAGParams offset; offset.setAll(args._offset); AGDOptimizer optimizer; optimizer.setRelative(args._relative); optimizer.setSavedInit(parSeed); optimizer.setLoopParams(args._numOptTrials, args._maxOptIt, args._optThreshold, offset); EmissionEstimator ee; TransitionEstimator te; ee.setBias(args._emBias); te.setBias(args._tmBias); FTAGTrain* trainer = new FTAGTrain(); trainer->initialize(optimizer, ee, te); trainer->setSequences(alignments, args._winSize); trainer->emLoop(args._maxEMIt, args._emThreshold, args._emConvRepeats, args._emCRThreshold, &emTrace); FTAGParams params = trainer->getParams(); TransitionModel tm; ContextModel em; tm.setSimplified(params); em.setDoubleJC(params); delete trainer; trainer = NULL; double pr = 0.; double prInput = 0.; double tdiff = 0; size_t tlength = 0; for (size_t i = 0; i < alignments.size(); ++i) { FTAGModel ftag; ftag.setSequences(alignments[i].first, alignments[i].second, args._winSize, args._winSize); ftag.setTransitionModel(tm); ftag.setEmissionModel(em); pr += log(ftag.forward()); string a,b; deque<Trace> trace; ftag.viterbi(); ftag.viterbiTrace(a, b, trace); cout << i << "a) " << a << endl << i << "b) " << b << endl; AxtPair apEstimated(a, b); AxtPair apOriginal(alignments[i].first, alignments[i].second); size_t adiff = apOriginal.diff(apEstimated); ftag.setTransitionModel(tmInput); ftag.setEmissionModel(emInput); prInput += log(ftag.forward()); tdiff += adiff; tlength += alignments[i].first.length(); makeLenDistSimpleSymmetric(trace, dist, dist_nc); } FTAGParams parDelta = parInput - params; parDelta = parDelta.abs(); double parMSE = params.mse(parInput); cout << "Pr Input= " << prInput << "\nPar_Input= " << parInput << endl << "Par_Delta= " << parDelta << "\nPar_MSE= " << parMSE << endl << "Num Alignments=" << alignments.size() << " Length=[" << args._maxLength/2 << ", " << args._maxLength << "]" << " Pr = " << pr << " tdiff = " << tdiff << "\nParamEst =\n" << params << endl; if (logFile) makeEmGraph(emTrace, prInput, params, tlength, logFile); if (distFile) { distFile << "len freq tot freqNC totNC\n"; for (size_t i = 1; i < dist.size(); ++i) { distFile << i << " " << dist[i] << " " << dist[0] << " " << dist_nc[i] << " " << dist_nc[0] << "\n"; } } return 0; }