コード例 #1
0
MyInt MyInt::operator+(const MyString& rhs) const
{
	return mValue + stoi(rhs.getString());
}
コード例 #2
0
void Indices::buildIndices(Corpus* pCorpus){
	cout << "Building indices using: " << pCorpus->getCorpusFile() << endl << std::flush;
	int sentenceNum = pCorpus->getSentenceNum();
	int colNum = pCorpus->getSentence(0)->getToken(0)->getColumnNum();
	m_vecMapString2IntCol = new MyVector<unordered_map <string, int>>;
	m_vecMapInt2StringCol = new MyVector<vector<string>>;
	for(int i = 0; i < colNum; ++ i){
		unordered_map <string, int>* p = new unordered_map <string, int>;
		m_vecMapString2IntCol->push_back(p);
		vector<string>* pv = new vector<string>;
		m_vecMapInt2StringCol->push_back(pv);
	}

	for(int i = 0; i < sentenceNum; ++ i){
		Sentence* pSentence = pCorpus->getSentence(i);
		int tokenNum = pSentence->getTokenNum();
		for(int j = 0; j < tokenNum; ++ j){
			Token* pToken = pSentence->getToken(j);

			//column
			for(int k = 0; k < colNum; ++ k){
				MyString* pCol = pToken->getColumn(k);

				unordered_map <string, int>* string2int = m_vecMapString2IntCol->getItem(k);
				vector<string>* int2string = m_vecMapInt2StringCol->getItem(k);

				unordered_map <string, int>::const_iterator got = string2int->find(pCol->getString());
				if(got == string2int->end()){// not found
					std::pair<string, int> newIndex (pCol->getString(), int2string->size());
					string2int->insert(newIndex);
					int2string->push_back(pCol->getString());
				}
			}

			//unigram tag
			MyString* pTag = pToken->getTag();
			unordered_map <string, int>::const_iterator got = m_mapString2IntTag.find(pTag->getString());
			if(got == m_mapString2IntTag.end()){// not found
					std::pair<string, int> newIndex (pTag->getString(), m_mapInt2StringTag.size());
					m_mapString2IntTag.insert(newIndex);
					m_mapInt2StringTag.push_back(pTag->getString());
			}

		}

	}

	for(int i = 0; i < sentenceNum; ++ i){
			Sentence* pSentence = pCorpus->getSentence(i);
			int tokenNum = pSentence->getTokenNum();
			for(int j = 1; j < tokenNum; ++ j){
				Token* pPreToken = pSentence->getToken(j - 1);
				Token* pCurrToken = pSentence->getToken(j);
				int preTag = m_mapString2IntTag.find(pPreToken->getTag()->getString())->second;
				int curTag = m_mapString2IntTag.find(pCurrToken->getTag()->getString())->second;
				string strBitag = BiTag::mergeTag(preTag, curTag);
				unordered_map <string, int>::const_iterator got = m_mapString2IntBiTag.find(strBitag);
				if(got == m_mapString2IntBiTag.end()){// not found
					std::pair<string, int> newIndex (strBitag, m_mapInt2StringBiTag.size());
					m_mapString2IntBiTag.insert(newIndex);
					m_mapInt2StringBiTag.push_back(strBitag);
				}
			}
	}

	//unigram feature index
	m_mapUnigramFeature = new std::tr1::unordered_map<string, Feature*>;
	m_vecUnigramFeatures = new MyVector<Feature>;

	//bigram feature index
	m_mapBigramFeature = new std::tr1::unordered_map<string, Feature*>;
	m_vecBigramFeatures = new MyVector<Feature>;
	printColCardinalities();
	cout << "Building indices finished!" << endl << endl << std::flush;
}