Пример #1
0
int API_DOC2VEC::doc2vec( 
	vector< string > 							vecQuaryWord,	//[In]quary word
	map< vector< string >, vector< float > > 	&mapQuaryDoc )	//[Out]quary Doc
{
	if ( vecQuaryWord.size() == 0 )
	{
		printf( "API_DOC2VEC::doc2vec err!!vecQuaryWord.size:%ld, do not find KeyWord!!\n", vecQuaryWord.size() );
		return -1; 
	}	
	
	/*****************************Init:Commen*****************************/
	int nRet = 0;
	string word;
	long long i, j, Count;

	/*****************************Init*****************************/
	vector<string> outQueryWords;
	vector<string> queryCutWords;
	map< string, vector< float > > 					mapQuaryWord;	//[In]quary word
	map< string, float >							mapQuaryTf; 	//[In]quary TF
	map< string,int >::iterator 					itStopwords;
	map< string, float >::iterator 					itQuaryTf;		//[In]dic word
	map< string, vector< float > >::iterator 		itDicWord;		//[In]dic word
	
	//doc2vec_cutword
	outQueryWords.clear();
	for ( j=0;j<vecQuaryWord.size();j++ )
	{
		queryCutWords.clear();
		//Cut Word
		app.cut( vecQuaryWord[j], queryCutWords, METHOD_MIX );

		//Check && Save GetFeat
		for ( i=0;i<queryCutWords.size();i++ )
		{
			if ( (queryCutWords[i].empty()) || (queryCutWords[i] == " ") || (queryCutWords[i] == "	") 
					|| (api_commen.IsDigit2(queryCutWords[i])) || (queryCutWords[i].size()<2) )
			{
				continue;
			}
			
			itStopwords = stopwords.find(queryCutWords[i]);
			if (itStopwords == stopwords.end()) // not find
			{
				outQueryWords.push_back( queryCutWords[i] );
			}
		}	
	}

/*	//check info
	printf("Cut Words:");
	for(i=0;i<outQueryWords.size();i++)
		printf( "%s ", outQueryWords[i].c_str() );
	printf( "\n" );*/

	mapQuaryWord.clear();	//[In]quary word
	mapQuaryTf.clear();		//[In]quary TF
	//Count Quary TF by Quary Vector 
	for (i = 0; i < outQueryWords.size(); i++) 
	{
		word = outQueryWords[i];
		
		//Quary Vector
		itDicWord = mapDicWord.find(word);
		if (itDicWord != mapDicWord.end()) // find it
			mapQuaryWord[word] = itDicWord->second;
		else
			continue;

		//Quary TF
		itQuaryTf = mapQuaryTf.find(word);
		if (itQuaryTf != mapQuaryTf.end()) // find it
			mapQuaryTf[word]++;
		else
			mapQuaryTf[word] = 1;
	}

	//Quary TF
	Count = mapQuaryTf.size();
	if ( Count == 0 )
	{
		printf( "idQuarySize:%ld, do not find KeyWord!!\n", outQueryWords.size() );
		return -1; 
	}		
	for(itQuaryTf = mapQuaryTf.begin(); itQuaryTf != mapQuaryTf.end(); itQuaryTf++)
	{
		mapQuaryTf[itQuaryTf->first] = itQuaryTf->second*1.0/Count;
	}

	//word2doc
	mapQuaryDoc.clear();
	nRet = doc2vec_word2doc( mapQuaryWord, mapQuaryTf, mapQuaryDoc );
	if ( nRet != 0 ) 
	{
		printf("doc2vec_train:word2doc err!!\n");
		return nRet;
	}

	return nRet;
}
Пример #2
0
/*************************************************************************
  > File Name: ./app.cpp
  > Author: 
  > Mail: 
  > Created Time: Mon 03 Aug 2015 10:13:19 AM CST
 ************************************************************************/

#include<iostream>
int a = 10;
std::string  str("hello");

CppJieba::Application app("./dict/jieba.dict.utf8",
        "./dict/hmm_model.utf8",
        "./dict/user.dict.utf8",
        "./dict/idf.utf8",
        "./dict/stop_words.utf8");

using namespace std;

void test()
{
    app.cut(std::string, std::vector<std::string>)
    {
        
    }
}