char* find_without_case (const char *content, const char *pkeywords) //如果是中文,原样输出 英文 则查找其在原文中到底具体字符的大小写 { char *back = NULL; if((is_english(pkeywords))) { back = get_exact_whatis(content, pkeywords); } return back; } /* ----- end of function get_change_by_case ----- */
char *create_tidy_str(const char *fin_path, char **pbuf, int* plen) { FILE *fp_in, *fp_out; fp_in = fopen(fin_path,"rb"); if(fp_in == NULL) { return; } fseek(fp_in, 0L, SEEK_END); int size = ftell(fp_in); fseek(fp_in, 0L, SEEK_SET); char *buf,*buf2; buf = (char*)malloc(size+1); buf2 = (char*)malloc(size+1); int len = fread(buf,1,size,fp_in); buf[len] = '\0'; bzero(buf2,size+1); char *p = buf; char *p2 = buf2; char *start = buf; int n; // 汉字或汉字之后EN_LENGHT个英文之内保留 do { // TODO is_chinese -> !is_ascii if(is_chinese(p)) { write_chinese(&p, &p2); start = p; } else { //printf("end:0x%x,p:0x%x\n",buf+len,p); // 非英文不保留,英文则保留有限位数 // TODO 判断英文不正确 会有一个英文多出来 if(!is_english(p)) { start = buf; p = forward_a_char(p); } else if(get_word_n(start, p - start) <= EN_LENGHT) { n = get_word_n(p, 1); memcpy(p2, p, n); p += n; p2 += n; } else if(get_word_n(start, p - start) == EN_LENGHT + 1) { addsep(&p2); p = forward_a_char(p); } else { p = forward_a_char(p); } } }while(*p != '\0'); free(buf); fclose(fp_in); *pbuf = buf2; *plen = strlen(buf2); return *pbuf; }