/** * Given a wordset create UYiV yields crossing them and return the best one * @param ws set of words * @return best UYiV */ shared_ptr<UYiV> Dencoder::getBestUYiV(unordered_set< shared_ptr<Word> > * ws) const { shared_ptr<Sequence> seq = getSeq(); shared_ptr<Word> u; shared_ptr<Word> v; list <std::pair<int,int> > * wordPosPairs; WordList* y; shared_ptr<UYiV> newUYV; shared_ptr<UYiV> best = NULL; for(unordered_set< shared_ptr<Word> >::iterator it1 = ws->begin(); it1!= ws->end(); ++it1) { u = *it1; for(unordered_set< shared_ptr<Word> >::iterator it2 = ws->begin(); it2!= ws->end(); ++it2) { v = *it2; //wordPosPairs could be NULL wordPosPairs = seq->wordPosPairList(u,v); if(wordPosPairs == NULL || wordPosPairs->size()>=2) { y = seq->obtainWordsInside(wordPosPairs, u->getSize()); newUYV = make_shared<UYiV>(u, v, y, wordPosPairs); if(!(wordPosPairs == NULL || y->getSize() == 0)) { if(score(best)<=score(newUYV)) { // cout << "Score best: " << score(best) << endl; best = newUYV; // cout << "Score New: " << score(newUYV) << endl; } } } else { delete wordPosPairs; } // it = ls.erase(it); } } return best; }
/** * Given the current best Yield and a word, try to find a Yield with the suffix * and prefix of this word that's better than the current one. * @param best current best yield * @param mr word */ void Dencoder::getUVFromMaximalRepeat(shared_ptr<UYiV> &best, shared_ptr<Word> mr) { shared_ptr<Word> u; shared_ptr<Word> v; shared_ptr<Sequence> seq = getSeq(); int size = mr->getSize(); shared_ptr<UYiV> newUYV; WordList* y; list <std::pair<int,int> > * wordPosPairs; for(int i = 1; i<size; ++i) { u = seq->createSubWord(mr, 0, i); for(int j = i; j<size; ++j) { v = seq->createSubWord(mr, j, size); wordPosPairs = seq->wordPosPairList(u,v); clock_t t; t = clock(); y = seq->obtainWordsInside(wordPosPairs, u->getSize()); t = clock() - t; printf("obtainWordsInside: It took me %ld clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC); newUYV = make_shared<UYiV>(u, v, y, wordPosPairs); if(!(wordPosPairs == NULL || y->getSize() == 0)) { if(score(best)<score(newUYV)) { // cout << "Score best: " << score(best) << endl; best = newUYV; // cout << "Score New: " << score(newUYV) << endl; } } } // cout << "Score primer palabra: " << score(best) << endl; } }
/** * replace the given yield and generate the productions needed * @param uyv */ void Dencoder::replaceYield(shared_ptr<UYiV> uyv) { shared_ptr<Word> u = uyv->getU(); int * uInt = u->getSubSequence(); shared_ptr<Word> v = uyv->getV(); int * vInt = v->getSubSequence(); int uSize = u->getSize(); int vSize = v->getSize(); int ySize = 0; int *yInt; int currentPos = 0; WordList* y = uyv->getYList(); assert(y!= NULL); // set<shared_ptr<Word> > * yWords = y->getWordSet(); vector< pair<shared_ptr<Word>, int> > vecWord = y->getWordVec(); int lenghtInside = y->sumWordsLengthWithoutRepeats(); int I = symbol_->getNonTerminal(); int N = symbol_->getNonTerminal(); std::queue<int> initialQ; prodElectionOrder_[I] = initialQ ; replaceNonTerminal(uyv, N); // U size + I NonTerminal + v Size int * nRHS = new int[uSize+1+vSize]; // Rule for the insides + y->size() are the pipes int * iRHS = new int[lenghtInside+vecWord.size()]; //Add u symbols for(int i = 0; i < uSize; ++i) { nRHS[i] = uInt[i]; } //Add I nonTerminal nRHS[uSize] = I; //Add v symbols for(int i = 0; i < vSize; ++i) { nRHS[uSize+1+i] = vInt[i]; } //Add y simbols for(vector< pair<shared_ptr<Word>, int> >::iterator it = vecWord.begin(); it != vecWord.end(); ++it) { shared_ptr<Word> y = (*it).first; yInt = y->getSubSequence(); ySize = y->getSize(); //cannot be 0 // assert(ySize!=0); for (int i = 0; i < ySize; ++i) { iRHS[currentPos+i] = yInt[i]; } iRHS[currentPos + ySize] = 2; currentPos += ySize+1; // cout << endl << *y << endl << (*it).second << endl; } // int sProdSize = getSeq()->getSize() - y->sumWordsLengthWithRepeats() - n*(uSize+vSize) + n; //Create Productions: shared_ptr<Production> p1 = make_shared<Production>(I, iRHS, lenghtInside+y->getWordSet()->size()-1); shared_ptr<Production> p2 = make_shared<Production>(N, nRHS, uSize+1+vSize); grammar_->addProd(p1); grammar_->addProd(p2); }