void RetManager::retrieval(unsigned retNum) { retDocID = new unsigned[retNum]; retDocScore = new float[retNum]; topDocs = new MinHeap(retNum); DecisionTree *DT = new DecisionTree(r,num,retNum); while((curDoc = findNextDoc())!=MaxNum) { DT->putDoc(curDoc,r); //float score = grade(); //if(score > topDocs->smallest) topDocs->push(curDoc,score); } int i,l,n; const float okapiK1=1.2; const float okapiB=0.2; DecisionTreeNode **blockList = DT->getBlockList(); vector<pair<unsigned,unsigned*> >::iterator it; for(i=0;i<DT->getBlockNum();i++) { n=blockList[i]->termScores.size(); for(it=blockList[i]->content->record.begin();it!=blockList[i]->content->record.end();it++) { unsigned* theTF=it->second; float score=0; float docLength = theIndex -> getDocLength(it->first); for(l=0;l<n;l++) { float tf = theTF[l]; float weight = ((okapiK1+1.0)*tf) / (okapiK1*(1.0-okapiB+okapiB*docLength/theIndex->docLengthAvg)+tf); score+=weight*blockList[i]->termScores[l]; } if(score > topDocs->smallest) topDocs->push(it->first,score); evalCounter++; } } retN = topDocs->n; for(i=retN-1;i>=0;i--) { retDocID[i] = topDocs->pop(retDocScore[i]); } delete(topDocs); delete(DT); }
uint RetManager::retrieval(unsigned retNum, unsigned* pages, profilerC& p) { // p.start(CONSTS::ALLQS); cout << retNum << endl; retDocID = new unsigned[retNum]; retDocScore = new float[retNum]; // p.start(CONSTS::STEP1); DecisionTree *DT = new DecisionTree(r,num,retNum); theHead = DT->getHead(); // p.end(CONSTS::STEP1); // cout << "qp processing" << endl; int i,l,n; // p.start(CONSTS::STEP2); curDoc = findNextDoc(); uint Totaldocs_decompressed = 1; while(curDoc < CONSTS::MAXD) { for(i=0; i<num; i++) if(r[i].curDocID<curDoc) r[i].curDocID = r[i].PR->nextGEQ(curDoc); DT->putDoc(curDoc,r);;//DT->putDoc(curDoc,r); curDoc = findNextDoc(); Totaldocs_decompressed ++; } // cout <<"Totaldocs_decompressed: "<< Totaldocs_decompressed <<endl; // p.end(CONSTS::STEP2); // cout << "qp processing done" << endl; // cout << "look up" << endl; const float okapiK1=1.2; const float okapiB=0.2; DecisionTreeNode **blockList = DT->getBlockList(); vector<pair<unsigned,unsigned*> >::iterator it; // p.start(CONSTS::STEP3); cout << "Number of Blocks: " <<DT->getBlockNum()<<endl; for(i=0; i<DT->getBlockNum(); i++) { cout << "i: "<< i << endl; n=blockList[i]->termScores.size(); cout << "n: "<< n << endl; int theSize = blockList[i]->content->record.size(); cout<< "theSize of current lock: "<<theSize << endl; if(theSize+retN>retNum) theSize = retNum-retN; cout<< "TopK: "<< retNum <<" retN: "<<retN<<" theSize needed to reach topK: " <<theSize<<endl; if(theSize==0) continue; topDocs = new MinHeap(theSize); cout<< "start doing look up in block: "<<i<< endl; for(it=blockList[i]->content->record.begin(); it!=blockList[i]->content->record.end(); it++) { unsigned* theTF=it->second; float score=0; float docLength = pages[it->first];//float docLength = theIndex -> getDocLength(it->first); for(l=0; l<n; l++) { float tf = theTF[l]; float weight = ((okapiK1+1.0)*tf) / (okapiK1 * (1.0-okapiB+okapiB * docLength /CONSTS::AVGD)+tf);//float weight = ((okapiK1+1.0)*tf) / (okapiK1*(1.0-okapiB+okapiB*docLength/theIndex->docLengthAvg)+tf); score+=weight*blockList[i]->termScores[l]; } if(score > topDocs->smallest) topDocs->push(it->first,score); evalCounter++; } cout<< "Doc evaluated by far: "<<evalCounter<<endl; cout<< "doing look up in block: "<<i<<" done"<< endl; // p.end(CONSTS::STEP3); for(l=retN+theSize-1; l>=retN; l--) { // cout << l << endl; retDocID[l] = topDocs->pop(retDocScore[l]); } retN+=theSize; cout << "retN: "<<retN<<endl; delete(topDocs); } delete(DT); cout << "look up done" << endl; // p.end(CONSTS::ALLQS); cout << "docs decompressed: "<< Totaldocs_decompressed << endl; return Totaldocs_decompressed; }