/** * Calculate score of a given UYiV. The score is based on the formulas written in the inform. * @param uyv * @return score */ double Dencoder::score(shared_ptr<UYiV> uyv) const { if (uyv == NULL) return -1; WordList* y = uyv->getYList(); //if the yList is NULL return score 0 assert(y != NULL); if (y == NULL) return 0; // if (y->getSize() == 0) return -1; int p, q, k, l, pipes, I, encoding, total; p = uyv->getWordsPosPairs()->size(); //This is the complete encoding... And I just to count the encoding of the new uyv... encoding = 0; q = y->getWordVec().size(); k = uyv->getU()->getSize(); l = uyv->getV()->getSize(); pipes = (q-1) ; I = 1; // the terms with the + are our gains, the - our penalties // total = - getProdElectionSize() + p*k + p*l + y->sumWordsLengthWithoutRepeats() - y->sumWordsLengthWithRepeats() - p - 1 - k - l - 1 - 1 - (q - 1); total = p*k + p*l + y->sumWordsLengthWithRepeats() - y->sumWordsLengthWithoutRepeats() - encoding - p - 1 - k - l - 1 - I - pipes ; assert(y->sumWordsLengthWithRepeats()>=y->sumWordsLengthWithoutRepeats()); return total; }
/** * replace the given yield and generate the productions needed * @param uyv */ void Dencoder::replaceYield(shared_ptr<UYiV> uyv) { shared_ptr<Word> u = uyv->getU(); int * uInt = u->getSubSequence(); shared_ptr<Word> v = uyv->getV(); int * vInt = v->getSubSequence(); int uSize = u->getSize(); int vSize = v->getSize(); int ySize = 0; int *yInt; int currentPos = 0; WordList* y = uyv->getYList(); assert(y!= NULL); // set<shared_ptr<Word> > * yWords = y->getWordSet(); vector< pair<shared_ptr<Word>, int> > vecWord = y->getWordVec(); int lenghtInside = y->sumWordsLengthWithoutRepeats(); int I = symbol_->getNonTerminal(); int N = symbol_->getNonTerminal(); std::queue<int> initialQ; prodElectionOrder_[I] = initialQ ; replaceNonTerminal(uyv, N); // U size + I NonTerminal + v Size int * nRHS = new int[uSize+1+vSize]; // Rule for the insides + y->size() are the pipes int * iRHS = new int[lenghtInside+vecWord.size()]; //Add u symbols for(int i = 0; i < uSize; ++i) { nRHS[i] = uInt[i]; } //Add I nonTerminal nRHS[uSize] = I; //Add v symbols for(int i = 0; i < vSize; ++i) { nRHS[uSize+1+i] = vInt[i]; } //Add y simbols for(vector< pair<shared_ptr<Word>, int> >::iterator it = vecWord.begin(); it != vecWord.end(); ++it) { shared_ptr<Word> y = (*it).first; yInt = y->getSubSequence(); ySize = y->getSize(); //cannot be 0 // assert(ySize!=0); for (int i = 0; i < ySize; ++i) { iRHS[currentPos+i] = yInt[i]; } iRHS[currentPos + ySize] = 2; currentPos += ySize+1; // cout << endl << *y << endl << (*it).second << endl; } // int sProdSize = getSeq()->getSize() - y->sumWordsLengthWithRepeats() - n*(uSize+vSize) + n; //Create Productions: shared_ptr<Production> p1 = make_shared<Production>(I, iRHS, lenghtInside+y->getWordSet()->size()-1); shared_ptr<Production> p2 = make_shared<Production>(N, nRHS, uSize+1+vSize); grammar_->addProd(p1); grammar_->addProd(p2); }