Beispiel #1
0
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, CitationSet& citationSet, UniGramFeature& uniGrams, BiGramFeature& biGrams, JournalSet& journalSet, FeatureSet& allFeatures, int printLog)
{
	int rtn = 0;
	allFeatures.mFeatures.clear();
	allFeatures.mMaxIndex = uniGrams.mDictionary.rbegin()->first + 1;
	allFeatures.mFeatures.resize(tokenVector.size());

	FeatureSet biFeatures;
	biFeatures.mMaxIndex = biGrams.mDictionary.rbegin()->first + 1;
	biFeatures.mFeatures.resize(tokenVector.size());

	FeatureSet jourFeatures;
	jourFeatures.mMaxIndex = journalSet.mJournals.rbegin()->first + 1;
	jourFeatures.mFeatures.resize(tokenVector.size());

	int numThreads = omp_get_num_procs();
	if (printLog != SILENT)
		clog << "CPU number: " << numThreads << endl;

	omp_set_num_threads(numThreads);
	if (printLog != SILENT)
		clog << "Start Parallel Extract Features" << endl;
#pragma omp parallel for schedule(dynamic) 
	for (int i = 0; i < tokenVector.size(); i++)
	{
		uniGrams.Extract(*tokenVector[i], allFeatures.mFeatures[i]);
	}

#pragma omp parallel for schedule(dynamic) 
	for (int i = 0; i < tokenVector.size(); i++)
	{
		biGrams.Extract(*tokenVector[i], biFeatures.mFeatures[i]);
	}

#pragma omp parallel for schedule(dynamic)
	for (int i = 0; i < tokenVector.size(); ++i)
	{
		Journal* ptrJournal = NULL;
		ptrJournal = journalSet.SearchJournalTitle(citationSet[tokenVector[i]->mPmid]->mJournalTitle);
		if (ptrJournal != NULL)
		{
			jourFeatures.mFeatures[i][ptrJournal->mJournalId] = 1.0;
		}
		else
		{
			cerr << "Error: can't find \"" << citationSet[tokenVector[i]->mPmid]->mJournalTitle << " in pmid " << tokenVector[i]->mPmid << endl;
		}
	}

	rtn = allFeatures.Merge(biFeatures);
	CHECK_RTN(rtn);
	rtn = allFeatures.Merge(jourFeatures);
	CHECK_RTN(rtn);
	rtn = allFeatures.Normalize();
	CHECK_RTN(rtn);
	return 0;
}
Beispiel #2
0
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, UniGramFeature& uniGrams, BiGramFeature& biGrams, feature_node** &featureSpace, int printLog)
{
	int numThreads = omp_get_num_procs();
	if (printLog != SILENT)
		clog << "CPU number: " << numThreads << endl;
	omp_set_num_threads(numThreads);
	if (printLog != SILENT)
		clog << "Extract unigram & bigram" << endl;

	if (printLog != SILENT)
		clog << "Make Feature table" << endl;
	int featureNum = (int)tokenVector.size();
	featureSpace = NULL;
	featureSpace = Malloc(feature_node*, featureNum);
	memset(featureSpace, 0, sizeof(feature_node*)* featureNum);

	int uniMaxIndex = uniGrams.mDictionary.rbegin()->first + 1;
	int biMaxIndex = biGrams.mDictionary.rbegin()->first + 1;

	if (printLog != SILENT)
		clog << "Extract features parallel" << endl;

#pragma omp parallel for schedule(dynamic)
	for (int i = 0; i < (int)tokenVector.size(); i++)
	{
		FeatureSet tabAllFeatures;
		tabAllFeatures.mMaxIndex = uniMaxIndex;
		tabAllFeatures.mFeatures.resize(1);

		FeatureSet tabBiFeatures;
		tabBiFeatures.mMaxIndex = biMaxIndex;
		tabBiFeatures.mFeatures.resize(1);

		uniGrams.Extract(*tokenVector[i], tabAllFeatures.mFeatures[0]);
		biGrams.Extract(*tokenVector[i], tabBiFeatures.mFeatures[0]);

		tabAllFeatures.Merge(tabBiFeatures);
		tabAllFeatures.Normalize();

		featureSpace[i] = NULL;
		LinearMachine::TransFeatures(featureSpace[i], tabAllFeatures.mFeatures[0]);
	}
	return 0;
}
Beispiel #3
0
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, UniGramFeature& uniGrams, BiGramFeature& biGrams, FeatureSet& allFeatures, int printLog)
{
	int rtn = 0;
	allFeatures.mFeatures.clear();
	allFeatures.mMaxIndex = uniGrams.mDictionary.rbegin()->first + 1;
	allFeatures.mFeatures.resize(tokenVector.size());

	FeatureSet biFeatures;
	biFeatures.mMaxIndex = biGrams.mDictionary.rbegin()->first + 1;
	biFeatures.mFeatures.resize(tokenVector.size());

	int numThreads = omp_get_num_procs();
	if (printLog != SILENT)
		clog << "CPU number: " << numThreads << endl;

	omp_set_num_threads(numThreads);
	if (printLog != SILENT)
		clog << "Start Parallel Extract Features" << endl;
#pragma omp parallel for schedule(dynamic) 
	for (int i = 0; i < tokenVector.size(); i++)
	{
		uniGrams.Extract(*tokenVector[i], allFeatures.mFeatures[i]);
	}

#pragma omp parallel for schedule(dynamic) 
	for (int i = 0; i < tokenVector.size(); i++)
	{
		biGrams.Extract(*tokenVector[i], biFeatures.mFeatures[i]);
	}

	allFeatures.Merge(biFeatures);
	rtn = allFeatures.Normalize();
	CHECK_RTN(rtn);

	return 0;
}
Beispiel #4
0
int MetalabelFeature::ExtractFeature(const vector<TokenCitation*>& tokenVector, CitationSet& citationSet, UniGramFeature& uniGrams, BiGramFeature& biGrams, JournalSet& journalSet, feature_node** &featureSpace, int printLog)
{
	int numThreads = omp_get_num_procs();
	if (printLog != SILENT)
		clog << "CPU number: " << numThreads << endl;
	omp_set_num_threads(numThreads);
	if (printLog != SILENT)
		clog << "Extract unigram & bigram" << endl;

	if (printLog != SILENT)
		clog << "Make Feature table" << endl;
	int featureNum = (int)tokenVector.size();
	featureSpace = NULL;
	featureSpace = Malloc(feature_node*, featureNum);
	memset(featureSpace, 0, sizeof(feature_node*)* featureNum);

	int uniMaxIndex = uniGrams.mDictionary.rbegin()->first + 1;
	int biMaxIndex = biGrams.mDictionary.rbegin()->first + 1;
	int jourMaxIndex = journalSet.mJournals.rbegin()->first + 1;

	if (printLog != SILENT)
		clog << "Extract features parallel" << endl;

#pragma omp parallel for schedule(dynamic)
	for (int i = 0; i < (int)tokenVector.size(); i++)
	{
		FeatureSet tabAllFeatures;
		tabAllFeatures.mMaxIndex = uniMaxIndex;
		tabAllFeatures.mFeatures.resize(1);

		FeatureSet tabBiFeatures;
		tabBiFeatures.mMaxIndex = biMaxIndex;
		tabBiFeatures.mFeatures.resize(1);

		uniGrams.Extract(*tokenVector[i], tabAllFeatures.mFeatures[0]);
		biGrams.Extract(*tokenVector[i], tabBiFeatures.mFeatures[0]);

		FeatureSet tabJourFeatures;
		tabJourFeatures.mMaxIndex = jourMaxIndex;
		tabJourFeatures.mFeatures.resize(1);
		if (citationSet[tokenVector[i]->mPmid]->mJournalTitle != NULL)
		{
			Journal* ptrJournal = journalSet.SearchJournalTitle(citationSet[tokenVector[i]->mPmid]->mJournalTitle);
			if (ptrJournal != NULL)
			{
				tabJourFeatures.mFeatures[0][ptrJournal->mJournalId] = 1.0;
			}
			else
				cerr << "Error: \"" << citationSet[tokenVector[i]->mPmid]->mJournalTitle << "\" can't find journal in journal set in pmid " << tokenVector[i]->mPmid << endl;
		}
		else
			cerr << "Error: " << tokenVector[i]->mPmid << " can't find journal title in citation" << endl;

		tabAllFeatures.Merge(tabBiFeatures);
		tabAllFeatures.Merge(tabJourFeatures);
		tabAllFeatures.Normalize();

		featureSpace[i] = NULL;
		LinearMachine::TransFeatures(featureSpace[i], tabAllFeatures.mFeatures[0]);
	}
	return 0;
}