Пример #1
0
void RetManager::retrieval(unsigned retNum)
{
	retDocID = new unsigned[retNum];
	retDocScore = new float[retNum];
	topDocs = new MinHeap(retNum);
	DecisionTree *DT = new DecisionTree(r,num,retNum);
	
	while((curDoc = findNextDoc())!=MaxNum)
	{
		DT->putDoc(curDoc,r);
		//float score = grade();
		//if(score > topDocs->smallest) topDocs->push(curDoc,score);
	}
	int i,l,n;
	const float okapiK1=1.2;
	const float okapiB=0.2;
	DecisionTreeNode **blockList = DT->getBlockList();
	vector<pair<unsigned,unsigned*> >::iterator it;
	for(i=0;i<DT->getBlockNum();i++)
	{
		n=blockList[i]->termScores.size();
		for(it=blockList[i]->content->record.begin();it!=blockList[i]->content->record.end();it++)
		{
			unsigned* theTF=it->second;
			float score=0;
			float docLength = theIndex -> getDocLength(it->first);
			for(l=0;l<n;l++)
			{
				float tf = theTF[l];
				float weight = ((okapiK1+1.0)*tf) / (okapiK1*(1.0-okapiB+okapiB*docLength/theIndex->docLengthAvg)+tf);
				score+=weight*blockList[i]->termScores[l];
			}
			if(score > topDocs->smallest) topDocs->push(it->first,score);
			evalCounter++;
		}
	}

	retN = topDocs->n;
	for(i=retN-1;i>=0;i--)
	{
		retDocID[i] = topDocs->pop(retDocScore[i]);
	}
	delete(topDocs);
	delete(DT);
}
Пример #2
0
uint RetManager::retrieval(unsigned retNum, unsigned* pages, profilerC& p)
{
    // p.start(CONSTS::ALLQS);
    cout << retNum << endl;
    retDocID = new unsigned[retNum];
    retDocScore = new float[retNum];
    // p.start(CONSTS::STEP1);
    DecisionTree *DT = new DecisionTree(r,num,retNum);
    theHead = DT->getHead();
    // p.end(CONSTS::STEP1);
    // cout << "qp processing" << endl;
    int i,l,n;
    // p.start(CONSTS::STEP2);
    curDoc = findNextDoc();
    uint Totaldocs_decompressed = 1;
    while(curDoc < CONSTS::MAXD)
    {
        for(i=0; i<num; i++) if(r[i].curDocID<curDoc) r[i].curDocID = r[i].PR->nextGEQ(curDoc);
        DT->putDoc(curDoc,r);;//DT->putDoc(curDoc,r);
        curDoc = findNextDoc();
        Totaldocs_decompressed ++;
    }
    // cout <<"Totaldocs_decompressed: "<< Totaldocs_decompressed <<endl;
    // p.end(CONSTS::STEP2);
    // cout << "qp processing done" << endl;

    // cout << "look up" << endl;
    const float okapiK1=1.2;
    const float okapiB=0.2;
    DecisionTreeNode **blockList = DT->getBlockList();
    vector<pair<unsigned,unsigned*> >::iterator it;
    // p.start(CONSTS::STEP3);
    cout << "Number of Blocks: " <<DT->getBlockNum()<<endl;
    for(i=0; i<DT->getBlockNum(); i++)
    {
        cout << "i: "<< i << endl;
        n=blockList[i]->termScores.size();
        cout << "n: "<< n << endl;
        int theSize = blockList[i]->content->record.size();
        cout<< "theSize of current lock: "<<theSize << endl;
        if(theSize+retN>retNum) theSize = retNum-retN;
        cout<< "TopK: "<< retNum <<" retN: "<<retN<<" theSize needed to reach topK: " <<theSize<<endl;
        if(theSize==0) continue;
        topDocs = new MinHeap(theSize);
        cout<< "start doing look up in block: "<<i<< endl;
        for(it=blockList[i]->content->record.begin(); it!=blockList[i]->content->record.end(); it++)
        {
            unsigned* theTF=it->second;
            float score=0;
            float docLength = pages[it->first];//float docLength = theIndex -> getDocLength(it->first);
            for(l=0; l<n; l++)
            {
                float tf = theTF[l];
                float weight = ((okapiK1+1.0)*tf) / (okapiK1 * (1.0-okapiB+okapiB * docLength /CONSTS::AVGD)+tf);//float weight = ((okapiK1+1.0)*tf) / (okapiK1*(1.0-okapiB+okapiB*docLength/theIndex->docLengthAvg)+tf);
                score+=weight*blockList[i]->termScores[l];
            }
            if(score > topDocs->smallest) topDocs->push(it->first,score);
            evalCounter++;
        }
        cout<< "Doc evaluated by far: "<<evalCounter<<endl;
        cout<< "doing look up in block: "<<i<<" done"<< endl;
        // p.end(CONSTS::STEP3);
        for(l=retN+theSize-1; l>=retN; l--)
        {
            // cout << l << endl;
            retDocID[l] = topDocs->pop(retDocScore[l]);
        }
        retN+=theSize;
        cout << "retN: "<<retN<<endl;
        delete(topDocs);
    }
    delete(DT);
    cout << "look up done" << endl;
    // p.end(CONSTS::ALLQS);
    cout << "docs decompressed: "<< Totaldocs_decompressed << endl;
    return Totaldocs_decompressed;
}