Exemple #1
0
/* 处理模块 */
void httpcws_handler(struct evhttp_request *req, void *arg)
{	
        struct evbuffer *buf;
        buf = evbuffer_new();
		
		/* 分析URL参数 */
		struct evkeyvalq httpcws_http_query;
		evhttp_parse_query(evhttp_request_uri(req), &httpcws_http_query);
		
		/* 接收POST表单信息 */
		const char *tcsql_input_postbuffer = (const char*) EVBUFFER_DATA(req->input_buffer);		
		
		/* 接收GET表单参数 */
		const char *httpcws_input_words = evhttp_find_header (&httpcws_http_query, "w");

		const char *httpcws_output_tmp = NULL;
		char *httpcws_output_words = "\0";
		if (tcsql_input_postbuffer != NULL) {
			char *tcsql_input_postbuffer_tmp = (char *) malloc(EVBUFFER_LENGTH(req->input_buffer)+1);
			memset (tcsql_input_postbuffer_tmp, '\0', EVBUFFER_LENGTH(req->input_buffer)+1);
			strncpy(tcsql_input_postbuffer_tmp, tcsql_input_postbuffer, EVBUFFER_LENGTH(req->input_buffer));
			char *decode_uri = urldecode(tcsql_input_postbuffer_tmp);
			free(tcsql_input_postbuffer_tmp);
			httpcws_output_tmp = ICTCLAS_ParagraphProcess(decode_uri, 0);
			free(decode_uri);
			httpcws_output_words = strdup(httpcws_output_tmp);
			trim (httpcws_output_words);
		} else if (httpcws_input_words != NULL) {
			char *httpcws_input_words_tmp = strdup(httpcws_input_words);
			char *decode_uri = urldecode(httpcws_input_words_tmp);
			free(httpcws_input_words_tmp);
			httpcws_output_tmp = ICTCLAS_ParagraphProcess(decode_uri, 0);
			free(decode_uri);
			httpcws_output_words = strdup(httpcws_output_tmp);
			trim (httpcws_output_words);
		} else {
			httpcws_output_words = strdup("");
		}
		
		/* 输出内容给客户端 */
		evhttp_add_header(req->output_headers, "Server", "HTTPCWS/1.0.0");
		evhttp_add_header(req->output_headers, "Content-Type", "text/plain; charset=GB2312");
		evhttp_add_header(req->output_headers, "Connection", "close");
		evbuffer_add_printf(buf, "%s", httpcws_output_words);
        evhttp_send_reply(req, HTTP_OK, "OK", buf);
		
		free(httpcws_output_words);
		evhttp_clear_headers(&httpcws_http_query);
		evbuffer_free(buf);	
}
Exemple #2
0
void MainWindow::textBagOfWords(std::set<std::string> &featureDic, std::map<std::string, int> &text,int &textSize)
{
    if(!ICTCLAS_Init()) //初始化分词组件。
    {
            QMessageBox::warning(this,"Warnning","Init fails",QMessageBox::Yes);
            return;
    }
    else
    {
            printf("Init ok\n");
    }
    ICTCLAS_SetPOSmap(2);

    QString myStr = ui->text->toPlainText();
    QByteArray ba = myStr.toLocal8Bit();
    char* sText;
    char* sSentence = (char*)malloc(ba.size()+10);
    sText = ba.data();

    int len = 0;
    //这样读入为了将换行符去掉,ASCII码10、13
    for(int i = 0; sText[i] != '\0'; i++)
    {
        if(sText[i] == '\n' || sText[i] == '\r')continue;
        sSentence[len++] = sText[i];
    }
    sSentence[len] = '\0';

    unsigned int nPaLen=strlen(sSentence); // 需要分词的长度
    char* sRst=0;   //用户自行分配空间,用于保存结果;
    sRst=(char*)malloc(nPaLen*6); //建议长度为字符串长度的6倍。

    int nRstLen=0; //分词结果的长度

    nRstLen = ICTCLAS_ParagraphProcess(sSentence,nPaLen,sRst,CODE_TYPE_UNKNOWN,0);  //字符串处理

    //free(sText);
    free(sSentence);

    //收集单词,形成字典
    std::string words;
    std::istringstream istream(sRst);
    std::set<std::string> txtWords;
    while(istream>>words)
    {
        txtWords.insert(words);
        if(featureDic.count(words))
        {
            text[words]++;
        }
    }
    textSize = txtWords.size();
    free(sRst);
    txtWords.clear();
    istream.clear();
    ICTCLAS_Exit();	//释放资源退出
    return;

}
int main(int argc, char* argv[])
{
	//Sample1: Sentence or paragraph lexical analysis with only one result

    char sSentence[2000],sSentenceResult[5000];
	ICTCLAS_Init();
	printf("Input sentence now!\n");
	scanf("%s",sSentence);
	while(_stricmp(sSentence,"q")!=0)
	{
		ICTCLAS_ParagraphProcess(sSentence,sSentenceResult);
		printf("%s\nInput string now!\n",sSentenceResult);
		scanf("%s",sSentence);
	}
    ICTCLAS_Exit();

	//Sample2: File segmentation and POS tagging
/* 
    ICTCLAS_Init();	
	ICTCLAS_FileProcess("E:\\Sample\\Corpus_NewPOS\\199802_Org.txt","E:\\Sample\\Corpus_NewPOS\\199802_Org_cla.txt");
	ICTCLAS_Exit();
*/

	//Sample3: Sentence segmentation and POS tagging with multiple result
/* 
    char sSentence[2000],**sSentenceResult;
	int i;
	sSentenceResult=new char*[5];
	for(i=0;i<5;i++)
		sSentenceResult[i]=new char[5000];		

	ICTCLAS_Init();
	printf("Input sentence now!\n");
	scanf("%s",sSentence);
	while(_stricmp(sSentence,"q")!=0)
	{
		ICTCLAS_SentenceProcess(sSentence,5,sSentenceResult);
		for(i=0;i<5;i++)
			printf("Result%d:%s\n",i+1,sSentenceResult[i]);
		printf("Input string now!\n");
		scanf("%s",sSentence);
	}
    ICTCLAS_Exit();
	
	for(i=0;i<5;i++)
		delete[] sSentenceResult[i];		
	delete [] sSentenceResult;
*/
	return 0;
}
const char* SplitUTF8(const char* sInput)
{
    const char * sResult;
    sResult=ICTCLAS_ParagraphProcess(sInput, 0);
    return sResult;
}
Exemple #5
0
void SplitGBK(const char *sInput)
{//分词演示

	//初始化分词组件
	if(!ICTCLAS_Init())//数据在当前路径下,默认为GBK编码的分词
	{
		printf("ICTCLAS INIT FAILED!\n");
		return ;
	}

	ICTCLAS_SetPOSmap(ICT_POS_MAP_SECOND);

	char sSentence[2000]="三枪拍案惊奇的主创人员包括孙红雷、小沈阳、闫妮等,导演为张艺谋";
	const char * sResult;

	int nCount;
	ICTCLAS_ParagraphProcessA(sSentence,&nCount);
	printf("nCount=%d\n",nCount);

	ICTCLAS_AddUserWord("孙红雷 yym");//添加孙红雷,作为演员名称
	sResult = ICTCLAS_ParagraphProcess(sSentence,1);
	printf("%s\n", sResult);
	ICTCLAS_AddUserWord("小沈阳 yym");//添加小沈阳,作为演员名称
	sResult = ICTCLAS_ParagraphProcess(sSentence,1);
	printf("%s\n", sResult);
	ICTCLAS_AddUserWord("闫妮 yym");//添加闫妮,作为演员名称
	sResult = ICTCLAS_ParagraphProcess(sSentence,1);
	printf("%s\n", sResult);
	ICTCLAS_AddUserWord("三枪拍案惊奇 dym");//添加三枪拍案惊奇,作为电影名称
	sResult = ICTCLAS_ParagraphProcess(sSentence,1);
	printf("%s\n", sResult);
	

	while(_stricmp(sSentence,"q")!=0)
	{
		sResult = ICTCLAS_ParagraphProcess(sSentence,0);
		printf("%s\nInput string now('q' to quit)!\n", sResult);
		scanf("%s",sSentence);
	}
	
	//导入用户词典前
	printf("未导入用户词典:\n");
	sResult = ICTCLAS_ParagraphProcess(sInput, 0);
	printf("%s\n", sResult);

	//导入用户词典后
	printf("\n导入用户词典后:\n");
	nCount = ICTCLAS_ImportUserDict("userdic.txt");//userdic.txt覆盖以前的用户词典
	//保存用户词典
	ICTCLAS_SaveTheUsrDic();
	printf("导入%d个用户词。\n", nCount);
	
	sResult = ICTCLAS_ParagraphProcess(sInput, 1);
	printf("%s\n", sResult);

	//动态添加用户词
	printf("\n动态添加用户词后:\n");
	ICTCLAS_AddUserWord("计算机学院   xueyuan");
	ICTCLAS_SaveTheUsrDic();
	sResult = ICTCLAS_ParagraphProcess(sInput, 1);
	printf("%s\n", sResult);


	//对文件进行分词
	ICTCLAS_FileProcess("testGBK.txt","testGBK_result.txt",1);


	//释放分词组件资源
	ICTCLAS_Exit();
}