int DepPipe::writeInstance(FILE *featFile, DepInstance *pInstance) { // cerr << endl; int instanceLength = pInstance->size(); for(int w1 = 0; w1 < instanceLength; w1++) { for(int w2 = w1+1; w2 < instanceLength; w2++) { for(int ph = 0; ph < 2; ph++) { bool attR = ph == 0 ? true : false; FeatureVec prodFV; addArcFeature(pInstance,w1,w2,attR,prodFV); vector<int> vecKeys; prodFV.getKeys(vecKeys); // cerr << vecKeys.size() << " "; ::writeObject(featFile, vecKeys); } } // cerr << endl; } ::writeObject(featFile, (int)-1); if(options.m_isLabeled) { for(int w1 = 0; w1 < instanceLength; w1++) { for(int t = 0; t < m_vecTypes.size(); t++) { const string &type = m_vecTypes[t]; for(int ph = 0; ph < 2; ph++) { bool attR = ph == 0 ? true : false; for(int ch = 0; ch < 2; ch++) { bool child = ch == 0 ? true : false; FeatureVec prodFV; addLabelFeature(pInstance, w1, type, child, attR, prodFV); vector<int> vecKeys; prodFV.getKeys(vecKeys); // cerr << vecKeys.size() << " "; // copy(vecKeys.begin(), vecKeys.end(), ostream_iterator<int>(cerr, " ")); // cerr << endl; ::writeObject(featFile, vecKeys); } } } // cerr << endl; } ::writeObject(featFile, int(-2)); } // exit(0); writeExtendedFeatures(pInstance,featFile); vector<int> vecKeys; pInstance->fv.getKeys(vecKeys); ::writeObject(featFile, vecKeys); ::writeObject(featFile, int(-3)); // cerr << pInstance->actParseTree.size() << endl; writeObject(featFile, pInstance->actParseTree); writeObject(featFile, int(-4)); return 0; }
double MultiClassExp::getConfidence(const FeatureVec& feature_vec, const std::vector<double>& weights_plus, const std::vector<double>& weights_minus) { double confidence_plus = std::inner_product(feature_vec.begin(), feature_vec.end(), weights_plus.begin(), 0.0); double confidence_minus = std::inner_product(feature_vec.begin(), feature_vec.end(), weights_minus.begin(), 0.0); double confidence = confidence_plus-confidence_minus; return confidence; }
/*the document is index:value format because there is less features,centorid is array format for quick access *now it's changed into cosine!2011-07-22 * */ float KNN::InnerProduct(FeatureVec & document,vector<float> & centorid, int catIndex) { /*part of cosine*/ float similarity=0.0; float finalScore=0.0; float totalDoc=0.0; FeatVecIte it; for(it=document.begin();it!=document.end();it++) { similarity+=((*it).weight*centorid[(*it).index]);//direct access the vector } //float docLen=inner_product(centorid.begin(),centorid.end(),centorid.begin(),0.0);//tip:可以提前计算好。@zhqm return similarity; }
int KNN::MaxProbCat(FeatureVec & document,vector<vector<float> > & Centoroid,float & score) { float MaxScoreNow=0.0; int MaxCatIndex=-1; int currentCatIndex=0; float docDotProduct=0.0; FeatVecIte dit; for(dit=document.begin();dit!=document.end();dit++) { docDotProduct+=(*dit).weight*(*dit).weight; //tip:@zhqm totalDoc += } docDotProduct = sqrt(docDotProduct); if (docDotProduct == 0){ score = 0.0; return MaxCatIndex; } vector<vector<float> >::iterator it; for(it=Centoroid.begin();it!=Centoroid.end();it++,currentCatIndex++) { float score=InnerProduct(document,(*it), currentCatIndex); score=(float)score/(docDotProduct * m_centoroidFeatureDotProduct.at(currentCatIndex)); //cout<<"cat="<<currentCatIndex<<"\tscore="<<score<<endl; if(score>MaxScoreNow)//a better class { MaxScoreNow=score; MaxCatIndex=currentCatIndex; } } score=MaxScoreNow; return MaxCatIndex; }
int DepPipe::readInstance(FILE *featFile, int length, MultiArray<FeatureVec> &fvs, MultiArray<double> &probs, MultiArray<FeatureVec> &fvs_trips, MultiArray<double> &probs_trips, MultiArray<FeatureVec> &fvs_sibs, MultiArray<double> &probs_sibs, MultiArray<FeatureVec> &nt_fvs, MultiArray<double> &nt_probs, FeatureVec &fv, string &actParseTree, const Parameter ¶ms) { // cerr << "read instance" << endl; // cerr << endl; vector<unsigned int> fvs_dim; unsigned int fvs_pos; MyVector<int> vecKeys; // Get production crap. for(int w1 = 0; w1 < length; w1++) { int w2 = w1 + 1; if (w2 >= length) continue; fvs.setDemisionVal(fvs_dim, w1, w2, 0); fvs.getElement(fvs_dim, fvs_pos); for(; w2 < length; w2++) { for(int ph = 0; ph < 2; ph++) { FeatureVec &prodFV = fvs.getElement(fvs_pos); ::readObject(featFile, vecKeys); // cerr << vecKeys.size() << " "; prodFV.setKeys(vecKeys); probs.getElement(fvs_pos) = params.getScore(prodFV); ++fvs_pos; } } // cerr << endl; } int last; ::readObject(featFile, last); if(last != -1) { cerr << "DepPipe::readInstance() Error reading file. -1" << endl; return -1; } // cerr << endl; if(options.m_isLabeled) { vector<unsigned int> nt_dim(4); unsigned int nt_pos; nt_fvs.setDemisionVal(nt_dim, 0, 0, 0, 0); nt_fvs.getElement(nt_dim, nt_pos); for(int w1 = 0; w1 < length; w1++) { for(int t = 0; t < m_vecTypes.size(); t++) { const string &type = m_vecTypes[t]; for(int ph = 0; ph < 2; ph++) { for(int ch = 0; ch < 2; ch++) { FeatureVec &prodFV = nt_fvs.getElement(nt_pos); ::readObject(featFile, vecKeys); prodFV.setKeys(vecKeys); // cerr << vecKeys.size() << " "; nt_probs.getElement(nt_pos) = params.getScore(prodFV); ++nt_pos; } } } // cerr << endl; } ::readObject(featFile, last); if(last != -2) { cerr << "DepPipe::readInstance() Error reading file.-2 vs. " << last << endl; return -1; } } // exit(0); ::readObject(featFile, vecKeys); fv.setKeys(vecKeys); ::readObject(featFile, last); if(last != -3) { cerr << "DepPipe::readInstance() Error reading file.-3 vs. " << last << endl; return -1; } MyVector<char> my_str; ::readObject(featFile, my_str); actParseTree = my_str.begin(); // cerr << actParseTree << endl; // cerr << actParseTree.size() << endl; ::readObject(featFile, last); if(last != -4) { cerr << "DepPipe::readInstance() Error reading file.-4 vs. " << last << endl; return -1; } return 0; }
void DepPipe::add(const string &feat, double val, FeatureVec &fv) { int num = m_featAlphabet.lookupIndex(feat); if (num >= 0) { fv.add(num, val); } }