bool PhraseScorer::skipTo(int32_t target) { for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) { more = pp->skipTo(target); } if (more) sort(); // re-sort return doNext(); }
qreal ExactPhraseScorer::phraseFreq(){ //Func - Returns the freqency of the phrase //Pre - first != NULL // last != NULL // pq != NULL // size of the PhraseQueue pq is 0 //Post - The frequency of the phrase has been returned CND_PRECONDITION(first != NULL,"first is NULL"); CND_PRECONDITION(last != NULL,"last is NULL"); CND_PRECONDITION(pq != NULL,"pq is NULL"); CND_PRECONDITION(pq->size()==0,"pq is not empty"); //build pq from list //Add the nodes of the list of PhrasePositions and store them //into the PhraseQueue pq so it can used to build //a list of sorted nodes for (PhrasePositions* pp = first; pp != NULL; pp = pp->_next) { //Read the first TermPosition of the current PhrasePositions pp pp->firstPosition(); //Store the current PhrasePositions pp into the PhraseQueue pq pq->put(pp); } //pqToList requires that first and last be NULL when it's called. //This is done at the beginning of pqToList() //In this case, the nodes of the linked list are referenced by pq (see //above loop), so we can clear our pointers to the head and tail of the //linked list without fear of leaking the nodes. //rebuild list from pq pqToList(); //Initialize freq at 0 int32_t freq = 0; //find position with all terms do { //scan forward in first while (first->position < last->position){ do{ if (!first->nextPosition()){ return (qreal)freq; } } while (first->position < last->position); //Make the current first node the last node in the list firstToLast(); } //all equal: a match has been found freq++; } while (last->nextPosition()); return (qreal)freq; }
float_t SloppyPhraseScorer::phraseFreq() { //Func - Returns the freqency of the phrase //Pre - first != NULL // last != NULL // pq != NULL //Post - The frequency of the phrase has been returned CND_PRECONDITION(first != NULL,"first is NULL"); CND_PRECONDITION(last != NULL,"last is NULL"); CND_PRECONDITION(pq != NULL,"pq is NULL"); //Clear the PhraseQueue pq; pq->clear(); int32_t end = 0; //declare iterator PhrasePositions* pp = NULL; // build pq from list //Sort the list of PhrasePositions using pq for (pp = first; pp != NULL; pp = pp->_next) { //Read the first TermPosition of the current PhrasePositions pp pp->firstPosition(); //Check if the position of the pp is bigger than end if (pp->position > end){ end = pp->position; } //Store the current PhrasePositions pp into the PhraseQueue pp pq->put(pp); } float_t freq = 0.0f; bool done = false; do { //Pop a PhrasePositions pp from the PhraseQueue pp pp = pq->pop(); //Get start position int32_t start = pp->position; //Get next position int32_t next = pq->top()->position; for (int32_t pos = start; pos <= next; pos = pp->position) { //advance pp to min window start = pos; if (!pp->nextPosition()) { //ran out of a term -- done done = true; break; } } //Calculate matchLength int32_t matchLength = end - start; //Check if matchLength is smaller than slop if (matchLength <= slop){ // penalize longer matches freq += 1.0 / (matchLength + 1); } if (pp->position > end){ end = pp->position; } //restore pq pq->put(pp); }while (!done); return freq; }
void PhraseScorer::init() { for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) more = pp->next(); if(more) sort(); }