Beispiel #1
0
/**
 * Given a wordset create UYiV yields crossing them and return the best one
 * @param ws set of words
 * @return best UYiV
 */
shared_ptr<UYiV> Dencoder::getBestUYiV(unordered_set< shared_ptr<Word> > * ws) const
{


  shared_ptr<Sequence> seq = getSeq();
  shared_ptr<Word> u;
  shared_ptr<Word> v;
  list <std::pair<int,int> > * wordPosPairs;
  WordList* y;

  shared_ptr<UYiV> newUYV;
  shared_ptr<UYiV> best = NULL;

  for(unordered_set< shared_ptr<Word> >::iterator it1 = ws->begin(); it1!= ws->end(); ++it1)
  {
    u = *it1;
    for(unordered_set< shared_ptr<Word> >::iterator it2 = ws->begin(); it2!= ws->end(); ++it2)
    {
      v = *it2;

      //wordPosPairs could be NULL
      wordPosPairs = seq->wordPosPairList(u,v);
      if(wordPosPairs == NULL || wordPosPairs->size()>=2)
      {
        y = seq->obtainWordsInside(wordPosPairs, u->getSize());
        newUYV = make_shared<UYiV>(u, v, y, wordPosPairs);

        if(!(wordPosPairs == NULL || y->getSize() == 0))
        {
          if(score(best)<=score(newUYV))
          {
            // cout << "Score best: " << score(best) << endl;
            best = newUYV;
            // cout << "Score New: " << score(newUYV) << endl;
          }
        }  
      }
      else
      {
        delete wordPosPairs;
      }
      // it = ls.erase(it);
    }
  }
  return best;
}
Beispiel #2
0
/**
 * Given the current best Yield and a word, try to find a Yield with the suffix
 * and prefix of this word that's better than the current one.
 * @param best current best yield
 * @param mr   word
 */
void Dencoder::getUVFromMaximalRepeat(shared_ptr<UYiV> &best, shared_ptr<Word> mr)
{
  shared_ptr<Word> u;
  shared_ptr<Word> v;
  shared_ptr<Sequence> seq = getSeq();
  int size = mr->getSize();

  shared_ptr<UYiV> newUYV;
  WordList* y;
  list <std::pair<int,int> > * wordPosPairs;

  for(int i = 1; i<size; ++i)
  {
    u = seq->createSubWord(mr, 0, i);
    for(int j = i; j<size; ++j)
    {
      v = seq->createSubWord(mr, j, size);
      wordPosPairs = seq->wordPosPairList(u,v);

        clock_t t;
  t = clock();

      y = seq->obtainWordsInside(wordPosPairs, u->getSize());
  t = clock() - t;
  printf("obtainWordsInside: It took me %ld clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);
      newUYV = make_shared<UYiV>(u, v, y, wordPosPairs);

      if(!(wordPosPairs == NULL || y->getSize() == 0))
      {
        if(score(best)<score(newUYV))
        {
          // cout << "Score best: " << score(best) << endl;
          best = newUYV;
          // cout << "Score New: " << score(newUYV) << endl;
        }

      }      

    }
    // cout << "Score primer palabra: " << score(best) << endl;
  }
}
Beispiel #3
0
/**
 * replace the given yield and generate the productions needed
 * @param uyv
 */
void Dencoder::replaceYield(shared_ptr<UYiV> uyv)
{
  shared_ptr<Word> u = uyv->getU();
  int * uInt = u->getSubSequence();
  shared_ptr<Word> v = uyv->getV();
  int * vInt = v->getSubSequence();
  int uSize = u->getSize();
  int vSize = v->getSize();
  int ySize = 0;
  int *yInt;
  int currentPos = 0;

  WordList* y = uyv->getYList();
  assert(y!= NULL);
  // set<shared_ptr<Word> > * yWords = y->getWordSet();
  vector< pair<shared_ptr<Word>, int> > vecWord = y->getWordVec();


  int lenghtInside = y->sumWordsLengthWithoutRepeats();

  int I = symbol_->getNonTerminal();
  int N = symbol_->getNonTerminal();

  std::queue<int> initialQ;
  prodElectionOrder_[I] = initialQ ;
    
  replaceNonTerminal(uyv, N);
  // U size + I NonTerminal + v Size
  int * nRHS = new int[uSize+1+vSize];
  // Rule for the insides + y->size() are the pipes
  int * iRHS = new int[lenghtInside+vecWord.size()];


  //Add u symbols
  for(int i = 0; i < uSize; ++i)
  {
    nRHS[i] = uInt[i];
  }
  //Add I nonTerminal
  nRHS[uSize] = I;
  //Add v symbols
  for(int i = 0; i < vSize; ++i)
  {
    nRHS[uSize+1+i] = vInt[i];
  }

  //Add y simbols
  for(vector< pair<shared_ptr<Word>, int> >::iterator it = vecWord.begin(); it != vecWord.end(); ++it)
  {
    shared_ptr<Word> y = (*it).first;
    yInt = y->getSubSequence();
    ySize = y->getSize();
    //cannot be 0
    // assert(ySize!=0);
    for (int i = 0; i < ySize; ++i)
    {
      iRHS[currentPos+i] = yInt[i];

    }
    iRHS[currentPos + ySize] = 2;
    currentPos += ySize+1;

    // cout << endl << *y << endl << (*it).second << endl;
  }


  // int sProdSize = getSeq()->getSize() - y->sumWordsLengthWithRepeats() - n*(uSize+vSize) + n;

  //Create Productions:
  shared_ptr<Production> p1 = make_shared<Production>(I, iRHS, lenghtInside+y->getWordSet()->size()-1);
  shared_ptr<Production> p2 = make_shared<Production>(N, nRHS, uSize+1+vSize);
  grammar_->addProd(p1);
  grammar_->addProd(p2);

}