Esempio n. 1
0
	qreal ExactPhraseScorer::phraseFreq(){
	//Func - Returns the freqency of the phrase
	//Pre  - first != NULL
	//       last  != NULL
	//       pq    != NULL
	//       size of the PhraseQueue pq is 0
	//Post - The frequency of the phrase has been returned

		CND_PRECONDITION(first != NULL,"first is NULL");
		CND_PRECONDITION(last  != NULL,"last is NULL");
		CND_PRECONDITION(pq    != NULL,"pq is NULL");
		CND_PRECONDITION(pq->size()==0,"pq is not empty");

		//build pq from list

		//Add the nodes of the list of PhrasePositions and store them
		//into the PhraseQueue pq so it can used to build
		//a list of sorted nodes
		for (PhrasePositions* pp = first; pp != NULL; pp = pp->_next) {
			//Read the first TermPosition of the current PhrasePositions pp
			pp->firstPosition();
			//Store the current PhrasePositions pp into the PhraseQueue pq
			pq->put(pp);
		}
		//pqToList requires that first and last be NULL when it's called.  
		//This is done at the beginning of pqToList()
		//In this case, the nodes of the linked list are referenced by pq (see
		//above loop), so we can clear our pointers to the head and tail of the
		//linked list without fear of leaking the nodes.
		
		//rebuild list from pq
		pqToList();

		//Initialize freq at 0
		int32_t freq = 0;

		//find position with all terms
		do {
			//scan forward in first
			while (first->position < last->position){
				do{
					if (!first->nextPosition()){
						return (qreal)freq;
					}
				} while (first->position < last->position);
				//Make the current first node the last node in the list
				firstToLast();
			}
			//all equal: a match has been found
			freq++;
		} while (last->nextPosition());

		return (qreal)freq;
	}
  float_t SloppyPhraseScorer::phraseFreq() {
  //Func - Returns the freqency of the phrase
  //Pre  - first != NULL
  //       last  != NULL
  //       pq    != NULL
  //Post - The frequency of the phrase has been returned

	  CND_PRECONDITION(first != NULL,"first is NULL");
	  CND_PRECONDITION(last  != NULL,"last is NULL");
	  CND_PRECONDITION(pq    != NULL,"pq is NULL");

	  //Clear the PhraseQueue pq;
      pq->clear();

      int32_t end = 0;

	  //declare iterator
      PhrasePositions* pp = NULL;

      // build pq from list

	  //Sort the list of PhrasePositions using pq
      for (pp = first; pp != NULL; pp = pp->_next) {
		  //Read the first TermPosition of the current PhrasePositions pp
          pp->firstPosition();
		  //Check if the position of the pp is bigger than end
		  if (pp->position > end){
              end = pp->position;
		      }
          //Store the current PhrasePositions pp into the PhraseQueue pp
          pq->put(pp);		
          }

     float_t freq = 0.0f;
     
	 bool done = false;
     
	 do {
		 //Pop a PhrasePositions pp from the PhraseQueue pp
         pp = pq->pop();
         //Get start position
         int32_t start = pp->position;
		 //Get next position
		 int32_t next = pq->top()->position;

         for (int32_t pos = start; pos <= next; pos = pp->position) {
             //advance pp to min window
             start = pos;				  
             
             if (!pp->nextPosition()) {
				 //ran out of a term -- done
                 done = true;
                 break;
                 }
             }
         
         //Calculate matchLength
		 int32_t matchLength = end - start;
         //Check if matchLength is smaller than slop
         if (matchLength <= slop){
             // penalize longer matches
             freq += 1.0 / (matchLength + 1);	  
             }

		 if (pp->position > end){
             end = pp->position;
             }
         
         //restore pq
		 pq->put(pp);				  
	 }while (!done);

     return freq;
  }