/* 处理模块 */ void httpcws_handler(struct evhttp_request *req, void *arg) { struct evbuffer *buf; buf = evbuffer_new(); /* 分析URL参数 */ struct evkeyvalq httpcws_http_query; evhttp_parse_query(evhttp_request_uri(req), &httpcws_http_query); /* 接收POST表单信息 */ const char *tcsql_input_postbuffer = (const char*) EVBUFFER_DATA(req->input_buffer); /* 接收GET表单参数 */ const char *httpcws_input_words = evhttp_find_header (&httpcws_http_query, "w"); const char *httpcws_output_tmp = NULL; char *httpcws_output_words = "\0"; if (tcsql_input_postbuffer != NULL) { char *tcsql_input_postbuffer_tmp = (char *) malloc(EVBUFFER_LENGTH(req->input_buffer)+1); memset (tcsql_input_postbuffer_tmp, '\0', EVBUFFER_LENGTH(req->input_buffer)+1); strncpy(tcsql_input_postbuffer_tmp, tcsql_input_postbuffer, EVBUFFER_LENGTH(req->input_buffer)); char *decode_uri = urldecode(tcsql_input_postbuffer_tmp); free(tcsql_input_postbuffer_tmp); httpcws_output_tmp = ICTCLAS_ParagraphProcess(decode_uri, 0); free(decode_uri); httpcws_output_words = strdup(httpcws_output_tmp); trim (httpcws_output_words); } else if (httpcws_input_words != NULL) { char *httpcws_input_words_tmp = strdup(httpcws_input_words); char *decode_uri = urldecode(httpcws_input_words_tmp); free(httpcws_input_words_tmp); httpcws_output_tmp = ICTCLAS_ParagraphProcess(decode_uri, 0); free(decode_uri); httpcws_output_words = strdup(httpcws_output_tmp); trim (httpcws_output_words); } else { httpcws_output_words = strdup(""); } /* 输出内容给客户端 */ evhttp_add_header(req->output_headers, "Server", "HTTPCWS/1.0.0"); evhttp_add_header(req->output_headers, "Content-Type", "text/plain; charset=GB2312"); evhttp_add_header(req->output_headers, "Connection", "close"); evbuffer_add_printf(buf, "%s", httpcws_output_words); evhttp_send_reply(req, HTTP_OK, "OK", buf); free(httpcws_output_words); evhttp_clear_headers(&httpcws_http_query); evbuffer_free(buf); }
void MainWindow::textBagOfWords(std::set<std::string> &featureDic, std::map<std::string, int> &text,int &textSize) { if(!ICTCLAS_Init()) //初始化分词组件。 { QMessageBox::warning(this,"Warnning","Init fails",QMessageBox::Yes); return; } else { printf("Init ok\n"); } ICTCLAS_SetPOSmap(2); QString myStr = ui->text->toPlainText(); QByteArray ba = myStr.toLocal8Bit(); char* sText; char* sSentence = (char*)malloc(ba.size()+10); sText = ba.data(); int len = 0; //这样读入为了将换行符去掉,ASCII码10、13 for(int i = 0; sText[i] != '\0'; i++) { if(sText[i] == '\n' || sText[i] == '\r')continue; sSentence[len++] = sText[i]; } sSentence[len] = '\0'; unsigned int nPaLen=strlen(sSentence); // 需要分词的长度 char* sRst=0; //用户自行分配空间,用于保存结果; sRst=(char*)malloc(nPaLen*6); //建议长度为字符串长度的6倍。 int nRstLen=0; //分词结果的长度 nRstLen = ICTCLAS_ParagraphProcess(sSentence,nPaLen,sRst,CODE_TYPE_UNKNOWN,0); //字符串处理 //free(sText); free(sSentence); //收集单词,形成字典 std::string words; std::istringstream istream(sRst); std::set<std::string> txtWords; while(istream>>words) { txtWords.insert(words); if(featureDic.count(words)) { text[words]++; } } textSize = txtWords.size(); free(sRst); txtWords.clear(); istream.clear(); ICTCLAS_Exit(); //释放资源退出 return; }
int main(int argc, char* argv[]) { //Sample1: Sentence or paragraph lexical analysis with only one result char sSentence[2000],sSentenceResult[5000]; ICTCLAS_Init(); printf("Input sentence now!\n"); scanf("%s",sSentence); while(_stricmp(sSentence,"q")!=0) { ICTCLAS_ParagraphProcess(sSentence,sSentenceResult); printf("%s\nInput string now!\n",sSentenceResult); scanf("%s",sSentence); } ICTCLAS_Exit(); //Sample2: File segmentation and POS tagging /* ICTCLAS_Init(); ICTCLAS_FileProcess("E:\\Sample\\Corpus_NewPOS\\199802_Org.txt","E:\\Sample\\Corpus_NewPOS\\199802_Org_cla.txt"); ICTCLAS_Exit(); */ //Sample3: Sentence segmentation and POS tagging with multiple result /* char sSentence[2000],**sSentenceResult; int i; sSentenceResult=new char*[5]; for(i=0;i<5;i++) sSentenceResult[i]=new char[5000]; ICTCLAS_Init(); printf("Input sentence now!\n"); scanf("%s",sSentence); while(_stricmp(sSentence,"q")!=0) { ICTCLAS_SentenceProcess(sSentence,5,sSentenceResult); for(i=0;i<5;i++) printf("Result%d:%s\n",i+1,sSentenceResult[i]); printf("Input string now!\n"); scanf("%s",sSentence); } ICTCLAS_Exit(); for(i=0;i<5;i++) delete[] sSentenceResult[i]; delete [] sSentenceResult; */ return 0; }
const char* SplitUTF8(const char* sInput) { const char * sResult; sResult=ICTCLAS_ParagraphProcess(sInput, 0); return sResult; }
void SplitGBK(const char *sInput) {//分词演示 //初始化分词组件 if(!ICTCLAS_Init())//数据在当前路径下,默认为GBK编码的分词 { printf("ICTCLAS INIT FAILED!\n"); return ; } ICTCLAS_SetPOSmap(ICT_POS_MAP_SECOND); char sSentence[2000]="三枪拍案惊奇的主创人员包括孙红雷、小沈阳、闫妮等,导演为张艺谋"; const char * sResult; int nCount; ICTCLAS_ParagraphProcessA(sSentence,&nCount); printf("nCount=%d\n",nCount); ICTCLAS_AddUserWord("孙红雷 yym");//添加孙红雷,作为演员名称 sResult = ICTCLAS_ParagraphProcess(sSentence,1); printf("%s\n", sResult); ICTCLAS_AddUserWord("小沈阳 yym");//添加小沈阳,作为演员名称 sResult = ICTCLAS_ParagraphProcess(sSentence,1); printf("%s\n", sResult); ICTCLAS_AddUserWord("闫妮 yym");//添加闫妮,作为演员名称 sResult = ICTCLAS_ParagraphProcess(sSentence,1); printf("%s\n", sResult); ICTCLAS_AddUserWord("三枪拍案惊奇 dym");//添加三枪拍案惊奇,作为电影名称 sResult = ICTCLAS_ParagraphProcess(sSentence,1); printf("%s\n", sResult); while(_stricmp(sSentence,"q")!=0) { sResult = ICTCLAS_ParagraphProcess(sSentence,0); printf("%s\nInput string now('q' to quit)!\n", sResult); scanf("%s",sSentence); } //导入用户词典前 printf("未导入用户词典:\n"); sResult = ICTCLAS_ParagraphProcess(sInput, 0); printf("%s\n", sResult); //导入用户词典后 printf("\n导入用户词典后:\n"); nCount = ICTCLAS_ImportUserDict("userdic.txt");//userdic.txt覆盖以前的用户词典 //保存用户词典 ICTCLAS_SaveTheUsrDic(); printf("导入%d个用户词。\n", nCount); sResult = ICTCLAS_ParagraphProcess(sInput, 1); printf("%s\n", sResult); //动态添加用户词 printf("\n动态添加用户词后:\n"); ICTCLAS_AddUserWord("计算机学院 xueyuan"); ICTCLAS_SaveTheUsrDic(); sResult = ICTCLAS_ParagraphProcess(sInput, 1); printf("%s\n", sResult); //对文件进行分词 ICTCLAS_FileProcess("testGBK.txt","testGBK_result.txt",1); //释放分词组件资源 ICTCLAS_Exit(); }