void DynSuffixArray::Delete(unsigned index, unsigned num2del) { int ltmp = m_L->at(m_ISA->at(index)); int true_pos = LastFirstFunc(m_ISA->at(index)); // track cycle shift (newIndex - 1) for(size_t q = 0; q < num2del; ++q) { int row = m_ISA->at(index); // gives the position of index in SA and m_F //std::cerr << "row = " << row << std::endl; //std::cerr << "SA[r]/index = " << m_SA->at(row) << "/" << index << std::endl; true_pos -= (row <= true_pos ? 1 : 0); // track changes m_L->erase(m_L->begin() + row); m_F->erase(m_F->begin() + row); m_ISA->erase(m_ISA->begin() + index); // order is important for (vuint_t::iterator itr = m_ISA->begin(); itr != m_ISA->end(); ++itr) { if((int)*itr > row) --(*itr); } m_SA->erase(m_SA->begin() + row); for (vuint_t::iterator itr = m_SA->begin(); itr != m_SA->end(); ++itr) { if(*itr > index) --(*itr); } } m_L->at(m_ISA->at(index))= ltmp; Reorder(LastFirstFunc(m_ISA->at(index)), true_pos); //PrintAuxArrays(); }
void DynSuffixArray::Reorder(unsigned j, unsigned jprime) { //cerr << "j=" << j << "\tj'=" << jprime << endl; while(j != jprime) { //cerr << "j=" << j << "\tj'=" << jprime << endl; int tmp, isaIdx(-1); int new_j = LastFirstFunc(j); // for SA, L, and F, the element at pos j is moved to j' tmp = m_L->at(j); // L m_L->at(j) = m_L->at(jprime); m_L->at(jprime) = tmp; tmp = m_SA->at(j); // SA m_SA->at(j) = m_SA->at(jprime); m_SA->at(jprime) = tmp; // all ISA values between (j...j'] decremented for(size_t i = 0; i < m_ISA->size(); ++i) { if((m_ISA->at(i) == j) && (isaIdx == -1)) isaIdx = i; // store index of ISA[i] = j if((m_ISA->at(i) > j) && (m_ISA->at(i) <= jprime)) --(*m_ISA)[i]; } // replace j with j' in ISA //isa[isaIdx] = jprime; m_ISA->at(isaIdx) = jprime; j = new_j; jprime = LastFirstFunc(jprime); } }
void DynSuffixArray::Insert(vuint_t* newSent, unsigned newIndex) { // for sentences //stages 1, 2, 4 stay same from 1char case //(use last word of new text in step 2 and save Ltmp until last insert?) //stage 3...all words of new sentence are inserted backwards // stage 2: k=ISA[newIndex], tmp= L[k], L[k] = newChar //PrintAuxArrays(); CHECK(newIndex <= m_SA->size()); int k(-1), kprime(-1); k = (newIndex < m_SA->size() ? m_ISA->at(newIndex) : m_ISA->at(0)); // k is now index of the cycle that starts at newindex int true_pos = LastFirstFunc(k); // track cycle shift (newIndex - 1) int Ltmp = m_L->at(k); m_L->at(k) = newSent->at(newSent->size()-1); // cycle k now ends with correct word for(int j = newSent->size()-1; j > -1; --j) { kprime = LastFirstFunc(k); // find cycle that starts with (newindex - 1) //kprime += ((m_L[k] == Ltmp) && (k > isa[k]) ? 1 : 0); // yada yada // only terminal char can be 0 so add new vocab at end kprime = (kprime > 0 ? kprime : m_SA->size()); true_pos += (kprime <= true_pos ? 1 : 0); // track changes // insert everything m_F->insert(m_F->begin() + kprime, newSent->at(j)); int theLWord = (j == 0 ? Ltmp : newSent->at(j-1)); m_L->insert(m_L->begin() + kprime, theLWord); for (vuint_t::iterator itr = m_SA->begin(); itr != m_SA->end(); ++itr) { if(*itr >= newIndex) ++(*itr); } m_SA->insert(m_SA->begin() + kprime, newIndex); for (vuint_t::iterator itr = m_ISA->begin(); itr != m_ISA->end(); ++itr) { if((int)*itr >= kprime) ++(*itr); } m_ISA->insert(m_ISA->begin() + newIndex, kprime); k = kprime; //PrintAuxArrays(); } // Begin stage 4 Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1) }
void DynSuffixArray::Reorder(unsigned j, unsigned jprime) { set<pair<unsigned, unsigned> > seen; while(j != jprime) { // this 'seenit' check added for data with many loops. will remove after double // checking. bool seenit = seen.insert(std::make_pair(j, jprime)).second; if(seenit) { for(size_t i=1; i < m_SA->size(); ++i) { if(m_corpus->at(m_SA->at(i)) < m_corpus->at(m_SA->at(i-1))) { cerr << "PROBLEM WITH SUFFIX ARRAY REORDERING. EXITING...\n"; exit(1); } } return; } //cerr << "j=" << j << "\tj'=" << jprime << endl; int isaIdx(-1); int new_j = LastFirstFunc(j); CHECK(j <= jprime); // for SA and L, the element at pos j is moved to pos j' m_L->insert(m_L->begin() + jprime + 1, m_L->at(j)); m_L->erase(m_L->begin() + j); m_SA->insert(m_SA->begin() + jprime + 1, m_SA->at(j)); m_SA->erase(m_SA->begin() + j); // all ISA values between (j...j'] decremented for(size_t i = 0; i < m_ISA->size(); ++i) { if((m_ISA->at(i) == j) && (isaIdx == -1)) isaIdx = i; // store index of ISA[i] = j if((m_ISA->at(i) > j) && (m_ISA->at(i) <= jprime)) --(*m_ISA)[i]; } // replace j with j' in ISA //isa[isaIdx] = jprime; m_ISA->at(isaIdx) = jprime; j = new_j; jprime = LastFirstFunc(jprime); } //cerr << "j=" << j << "\tj'=" << jprime << endl; }