Пример #1
0
// -----------------------------------------------------------------------------
// CreateTablesL
// Create table through database session
// -----------------------------------------------------------------------------
//
LOCAL_C void CreateTablesL(RDbDatabase& aDatabase)
{
#ifdef _DRM_TESTING
    WriteL(_L8("CreateTablesL"));
#endif

    CDbColSet* columns = CDbColSet::NewLC();

    //filename columns
    TDbCol filename(KColFilename,EDbColLongText16);
    filename.iAttributes = TDbCol::ENotNull;
    columns->AddL(filename);

    //position columns
    TDbCol position(KColPosition,EDbColUint16);
    position.iAttributes = TDbCol::ENotNull;
    columns->AddL(position);

    //content id columns
    TDbCol cid(KColCid,EDbColLongText16);
    cid.iAttributes = TDbCol::ENotNull;
    columns->AddL(cid);

    //content group id columns
    TDbCol group(KColGroupId,EDbColLongText16);
    columns->AddL(group);

    //transaction id columns
    TDbCol ttid(KColTtid,EDbColText16,KTtidLen);
    columns->AddL(ttid);

    aDatabase.CreateTable( KTable,*columns);
    CleanupStack::PopAndDestroy(columns); //columns

}
void ModelDB::sparseOutput(TMMM & v, string & docID, string & trueLabel, int crit, int sc, ostream * pout){
    // set the precision in output to 8
    pout->precision(8);
    cout.precision(8);

    map<int, TOUTPUT> vd;
    TMMM::iterator vit;
    int citPred;
    int omitCid;
    
    // get the integer index for the omit class name
    if(args.omit != "")
	omitCid=getIntID(args.omit, cateKeys, reverseCateKeys, maxCateID);


    // TACM: the two level map: hash_map on mid, then map on cid; keep different scores
    for(TACM::iterator acmit = acm.begin(); acmit != acm.end(); acmit++){
	
	TCM & cm = acmit->second;
	
	// calculate the default threshold for each model, if --includethresh not specified
	if (args.threshold == 0.0 && args.criterion!="") {
	    int nClasses = cm.size();
	    double probthresh = (double)1/(double)nClasses;
	    double oddsthresh = (double)1/(double)(nClasses-1);
	    switch(crit) {
		case CT_prob:       args.threshold = probthresh; break;
		case CT_logProb:    args.threshold = log(probthresh); break;
		case CT_odds:       args.threshold = oddsthresh; break; 
		case CT_logOdds:    args.threshold = log(oddsthresh); break;
		case CT_linear:     args.threshold = 0.0; break;
	    }
	} 


	TOUTPUT & ot = ((vd.insert(make_pair(acmit->first, TOUTPUT()))).first)->second;
	ot.used = false;
	ot.maxP = 0.0;

	// for each class of the model, calculate the different scores
	for(TCM::iterator cmit = cm.begin(); cmit != cm.end(); cmit++){
	    updateOutputVector(cmit, crit, vd); 
	}

    } // end of processing every model


    // generate the results line(s)

    map<int, TOUTPUT>::iterator toit;

    for(toit = vd.begin(); toit != vd.end(); toit++){
	// --common part first
	int mid = toit->first;
	TOUTPUT & to = toit->second;
	
	// if --classic is specifed, output the truelabel first
	if(args.classic) 
	    (*pout) << trueLabel << " ";
	
	vector<TCM::iterator>::iterator vcitit;
	for(vcitit = to.v.begin(); vcitit != to.v.end(); vcitit++){
	    TST & st = (*vcitit)->second;
	    
	    // omit the class;
	    if(args.omit!="" && st.cid == omitCid)
		continue;
	    
	    // if --classic is specified, output the prob of this class, then process the next class.
	    if(args.classic) {
		(*pout) << st.prob << " ";
		continue;
	    }

	    // if no --classic specified, 
            // FIRST output the "DOCID MODEL LINECLASS"
	    (*pout) << docID << " " << getModelStringID(mid) << " " << getCateStringID(st.cid) << " ";     
      
	    // ALL_SCALE_LINE: "PROB LOGPROB ODDS LOGODDS LINEAR"
	    if(sc == SC_all){	  
		(*pout) << st.prob << " " << st.logprob << " "
			<< st.odds << " " << st.logodds << " " << st.innprod << " ";
	    }
	    // ONE_SCALE_LINE: "SCORE"
	    else{   
		double score;
		switch(sc){
		    case SC_prob:	score = st.prob;	break;
		    case SC_logProb:	score = st.logprob;	break;
		    case SC_odds:	score = st.odds;	break;
		    case SC_logOdds:	score = st.logodds;	break;
		    case SC_linear:	score = st.innprod;	break;
		}
		(*pout) << score << " " ;
	    } 
      
	    // PRED_CLASS_ENTRY?
	    // if the model is a BBRtrain model, use thresh value for predict label.
	    if(args.printPred){
		// search the mid in the BBRtrain model id list
		map<int, double>::iterator mit_BBRModelThresh = m_BBRModelThresh.find(mid);
		// if this is a BBRtrain model, 
		// the predicted label is "1" if the score is larger than or equal to the threshold value; 
		// otherwise, the predicted label is "-1"
		if (mit_BBRModelThresh != m_BBRModelThresh.end()) {
		    double ppredCls1;
		    string cid = getCateStringID(to.maxPPredCid);
		    // get the ppred for class +1
		    ppredCls1 = (cid=="1"||cid=="+1") ? to.maxPPred : 1-to.maxPPred;
		    if (ppredCls1 >= mit_BBRModelThresh->second)
			(*pout) << "pred:" << "1" << " ";
		    else
			(*pout) << "pred:" << "-1" << " ";
		}
		// if this is not a BBRtrain model
		else
		    (*pout) << "pred:" << getCateStringID(to.maxPPredCid) << " ";

	    }
	    
	    // TRUE_CLASS_ENTRY?
	    if(args.printTrue){
		
		string trueCate = "@unknown";

		// if label file is specified, use the map<mid,cid> of the current test vector to get the true label
		if(args.labelFile!="") {
                    
		    THLL::iterator ltit = labelTable.find(toUpperCase(docID));
		    if(ltit != labelTable.end()){
			vector<TID> & vl = ltit->second;
			TID ttid(mid, 0);
			vector<TID>::iterator vtit = lower_bound(vl.begin(), vl.end(), ttid, less<TID>());
			if(vtit != vl.end() && vtit->modelID == mid){
			    trueCate = getCateStringID(vtit->cateID);
			}
		    }
		    else{
		    }
		    
                    /*
		    // search the map<mid,cid> for the true label for the mid of the current test vector 
		    map<int,int>::iterator mmit = m_trueLabelForOneVector.find(mid);
		    if(mmit!=m_trueLabelForOneVector.end()) {
			    trueCate = getCateStringID(mmit->second);
		    }
		    */
		}
		// if no label file is specified, and there is only one model, use the true label in test vector file.
		else {
		    if(acm.size()>1) {
			// ERROR MESSAGE 53
			string emsg = "Cannot include true labels in results if there are multiple models but no label file.";
			if(args.errorFile!="")
			    (*errout)<< emsg << endl;
			throw MyException(emsg);
		    }
		    else
			trueCate = trueLabel;
		}
		
		(*pout) << "true:" << trueCate<<" ";
	    }

	    (*pout) << endl;

	} // end of processing all the classes for this model
	
	// if -k/--classic specified, output predclassname
	if(args.classic)
	    (*pout) << getCateStringID(to.maxPCid) << endl;	
    } // for the function
}