int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, CitationSet& citationSet, UniGramFeature& uniGrams, BiGramFeature& biGrams, JournalSet& journalSet, FeatureSet& allFeatures, int printLog) { int rtn = 0; allFeatures.mFeatures.clear(); allFeatures.mMaxIndex = uniGrams.mDictionary.rbegin()->first + 1; allFeatures.mFeatures.resize(tokenVector.size()); FeatureSet biFeatures; biFeatures.mMaxIndex = biGrams.mDictionary.rbegin()->first + 1; biFeatures.mFeatures.resize(tokenVector.size()); FeatureSet jourFeatures; jourFeatures.mMaxIndex = journalSet.mJournals.rbegin()->first + 1; jourFeatures.mFeatures.resize(tokenVector.size()); int numThreads = omp_get_num_procs(); if (printLog != SILENT) clog << "CPU number: " << numThreads << endl; omp_set_num_threads(numThreads); if (printLog != SILENT) clog << "Start Parallel Extract Features" << endl; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < tokenVector.size(); i++) { uniGrams.Extract(*tokenVector[i], allFeatures.mFeatures[i]); } #pragma omp parallel for schedule(dynamic) for (int i = 0; i < tokenVector.size(); i++) { biGrams.Extract(*tokenVector[i], biFeatures.mFeatures[i]); } #pragma omp parallel for schedule(dynamic) for (int i = 0; i < tokenVector.size(); ++i) { Journal* ptrJournal = NULL; ptrJournal = journalSet.SearchJournalTitle(citationSet[tokenVector[i]->mPmid]->mJournalTitle); if (ptrJournal != NULL) { jourFeatures.mFeatures[i][ptrJournal->mJournalId] = 1.0; } else { cerr << "Error: can't find \"" << citationSet[tokenVector[i]->mPmid]->mJournalTitle << " in pmid " << tokenVector[i]->mPmid << endl; } } rtn = allFeatures.Merge(biFeatures); CHECK_RTN(rtn); rtn = allFeatures.Merge(jourFeatures); CHECK_RTN(rtn); rtn = allFeatures.Normalize(); CHECK_RTN(rtn); return 0; }
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, UniGramFeature& uniGrams, BiGramFeature& biGrams, feature_node** &featureSpace, int printLog) { int numThreads = omp_get_num_procs(); if (printLog != SILENT) clog << "CPU number: " << numThreads << endl; omp_set_num_threads(numThreads); if (printLog != SILENT) clog << "Extract unigram & bigram" << endl; if (printLog != SILENT) clog << "Make Feature table" << endl; int featureNum = (int)tokenVector.size(); featureSpace = NULL; featureSpace = Malloc(feature_node*, featureNum); memset(featureSpace, 0, sizeof(feature_node*)* featureNum); int uniMaxIndex = uniGrams.mDictionary.rbegin()->first + 1; int biMaxIndex = biGrams.mDictionary.rbegin()->first + 1; if (printLog != SILENT) clog << "Extract features parallel" << endl; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < (int)tokenVector.size(); i++) { FeatureSet tabAllFeatures; tabAllFeatures.mMaxIndex = uniMaxIndex; tabAllFeatures.mFeatures.resize(1); FeatureSet tabBiFeatures; tabBiFeatures.mMaxIndex = biMaxIndex; tabBiFeatures.mFeatures.resize(1); uniGrams.Extract(*tokenVector[i], tabAllFeatures.mFeatures[0]); biGrams.Extract(*tokenVector[i], tabBiFeatures.mFeatures[0]); tabAllFeatures.Merge(tabBiFeatures); tabAllFeatures.Normalize(); featureSpace[i] = NULL; LinearMachine::TransFeatures(featureSpace[i], tabAllFeatures.mFeatures[0]); } return 0; }
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, UniGramFeature& uniGrams, BiGramFeature& biGrams, FeatureSet& allFeatures, int printLog) { int rtn = 0; allFeatures.mFeatures.clear(); allFeatures.mMaxIndex = uniGrams.mDictionary.rbegin()->first + 1; allFeatures.mFeatures.resize(tokenVector.size()); FeatureSet biFeatures; biFeatures.mMaxIndex = biGrams.mDictionary.rbegin()->first + 1; biFeatures.mFeatures.resize(tokenVector.size()); int numThreads = omp_get_num_procs(); if (printLog != SILENT) clog << "CPU number: " << numThreads << endl; omp_set_num_threads(numThreads); if (printLog != SILENT) clog << "Start Parallel Extract Features" << endl; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < tokenVector.size(); i++) { uniGrams.Extract(*tokenVector[i], allFeatures.mFeatures[i]); } #pragma omp parallel for schedule(dynamic) for (int i = 0; i < tokenVector.size(); i++) { biGrams.Extract(*tokenVector[i], biFeatures.mFeatures[i]); } allFeatures.Merge(biFeatures); rtn = allFeatures.Normalize(); CHECK_RTN(rtn); return 0; }
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, CitationSet& citationSet, UniGramFeature& uniGrams, BiGramFeature& biGrams, JournalSet& journalSet, feature_node** &featureSpace, int printLog) { int numThreads = omp_get_num_procs(); if (printLog != SILENT) clog << "CPU number: " << numThreads << endl; omp_set_num_threads(numThreads); if (printLog != SILENT) clog << "Extract unigram & bigram" << endl; if (printLog != SILENT) clog << "Make Feature table" << endl; int featureNum = (int)tokenVector.size(); featureSpace = NULL; featureSpace = Malloc(feature_node*, featureNum); memset(featureSpace, 0, sizeof(feature_node*)* featureNum); int uniMaxIndex = uniGrams.mDictionary.rbegin()->first + 1; int biMaxIndex = biGrams.mDictionary.rbegin()->first + 1; int jourMaxIndex = journalSet.mJournals.rbegin()->first + 1; if (printLog != SILENT) clog << "Extract features parallel" << endl; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < (int)tokenVector.size(); i++) { FeatureSet tabAllFeatures; tabAllFeatures.mMaxIndex = uniMaxIndex; tabAllFeatures.mFeatures.resize(1); FeatureSet tabBiFeatures; tabBiFeatures.mMaxIndex = biMaxIndex; tabBiFeatures.mFeatures.resize(1); uniGrams.Extract(*tokenVector[i], tabAllFeatures.mFeatures[0]); biGrams.Extract(*tokenVector[i], tabBiFeatures.mFeatures[0]); FeatureSet tabJourFeatures; tabJourFeatures.mMaxIndex = jourMaxIndex; tabJourFeatures.mFeatures.resize(1); if (citationSet[tokenVector[i]->mPmid]->mJournalTitle != NULL) { Journal* ptrJournal = journalSet.SearchJournalTitle(citationSet[tokenVector[i]->mPmid]->mJournalTitle); if (ptrJournal != NULL) { tabJourFeatures.mFeatures[0][ptrJournal->mJournalId] = 1.0; } else cerr << "Error: \"" << citationSet[tokenVector[i]->mPmid]->mJournalTitle << "\" can't find journal in journal set in pmid " << tokenVector[i]->mPmid << endl; } else cerr << "Error: " << tokenVector[i]->mPmid << " can't find journal title in citation" << endl; tabAllFeatures.Merge(tabBiFeatures); tabAllFeatures.Merge(tabJourFeatures); tabAllFeatures.Normalize(); featureSpace[i] = NULL; LinearMachine::TransFeatures(featureSpace[i], tabAllFeatures.mFeatures[0]); } return 0; }