static int compare_str(const void * a, const void * b) { char ** a1 = (char**)a; char ** b1 = (char**)b; return stardict_strcmp(a1[0], b1[0]); }
gint comparefunc2(gconstpointer a,gconstpointer b) { gint x; x = stardict_strcmp(((struct _synworditem *)a)->synword,((struct _synworditem *)b)->synword); if (x == 0) return ((struct _worditem *)a)->definition - ((struct _worditem *)b)->definition; else return x; }
int cmp(const void *s1, const void *s2) { PAIR *a, *b; a= (PAIR *)s1; b= (PAIR *)s2; return stardict_strcmp(a->words, b->words); }
gint comparefunc(gconstpointer a,gconstpointer b) { return stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word); }
boolean IdxsynFile_Lookup(IdxsynFile * pMe, const char *str, long *idx, long *idx_suggest) { long idx2 = 0, idx_suggest2 = 0; boolean bFound = FALSE; long iFrom = 0; long iTo = pMe->npages - 2; int cmpint = 0; long iThisIndex = 0; if (stardict_strcmp(str, pMe->first.keystr)<0) { *idx = 0; *idx_suggest = 0; return FALSE; } else if (stardict_strcmp(str, pMe->real_last.keystr) >0) { *idx = INVALID_INDEX; *idx_suggest = pMe->wordcount - 1; return FALSE; } else { // find the page number where the search word might be iFrom = 0; iThisIndex = 0; while (iFrom <= iTo) { iThisIndex = (iFrom + iTo) / 2; cmpint = stardict_strcmp(str, IdxsynFile_GetFirstOnPageKey(pMe, iThisIndex)); if (cmpint > 0) iFrom = iThisIndex + 1; else if (cmpint < 0) iTo = iThisIndex - 1; else { bFound = TRUE; break; } } if (!bFound) { idx2 = iTo; //prev } else { idx2 = iThisIndex; } } if (!bFound) { // the search word is on the page number idx if it's anywhere uint32 netr = IdxsynFile_LoadPage(pMe, idx2); iFrom = 1; // Needn't search the first word anymore. iTo = netr - 1; iThisIndex = 0; while (iFrom <= iTo) { iThisIndex = (iFrom + iTo) / 2; cmpint = stardict_strcmp(str, pMe->page.entries[iThisIndex].keystr); if (cmpint > 0) iFrom = iThisIndex + 1; else if (cmpint < 0) iTo = iThisIndex - 1; else { bFound = TRUE; break; } } idx2 *= ENTR_PER_PAGE; if (!bFound) { int best, back; idx2 += iFrom; //next idx_suggest2 = idx2; best = prefix_match (str, pMe->page.entries[idx_suggest2 % ENTR_PER_PAGE].keystr); for (;;) { if ((iTo = idx_suggest2 - 1) < 0) break; if (idx_suggest2 % ENTR_PER_PAGE == 0) IdxsynFile_LoadPage(pMe, iTo / ENTR_PER_PAGE); back = prefix_match (str, pMe->page.entries[iTo % ENTR_PER_PAGE].keystr); if (!back || back < best) break; best = back; idx_suggest2 = iTo; } } else { idx2 += iThisIndex; idx_suggest2 = idx2; } } else { idx2 *=ENTR_PER_PAGE; idx_suggest2 = idx2; } *idx = idx2; *idx_suggest = idx_suggest2; return bFound; }
void builddata(gchar *datafilename, glong &wordcount, glong &idxfilesize, glong &synwordcount, std::list<std::string> *TagList, std::list<std::string> *ElementList) { struct stat stats; if (stat (datafilename, &stats) == -1) { printf("File %s not exist!\n", datafilename); return; } FILE *datafile; datafile = fopen(datafilename,"r"); gchar *buffer = (gchar *)g_malloc (stats.st_size + 1); fread (buffer, 1, stats.st_size, datafile); fclose (datafile); buffer[stats.st_size] = '\0'; GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000); GArray *array2 = g_array_sized_new(FALSE,FALSE, sizeof(struct _synworditem),20000); gchar *p, *p1, *p2, *p3, *p4, *p5; p = buffer; struct _worditem worditem; while (1) { p1 = strstr(p, "<单词块>"); if (!p1) { g_print("over\n"); break; } p1 += strlen("<单词块>"); p2 = strstr(p1, "</单词块>"); if (!p2) { g_print("Error, no </单词块>\n"); return; } *p2='\0'; p2 += strlen("</单词块>"); p3 = strstr(p1, "<单词>"); p5 = p1; while (g_ascii_isspace(*p5)) p5++; if (p5!=p3) { g_print("Warning, not begin with <单词>.\n"); } if (!p3) { g_print("Error, no <单词>\n"); return; } p3 += strlen("<单词>"); p4 = strstr(p3, "</单词>"); if (!p4) { g_print("Error, no </单词>\n"); return; } *p4='\0'; p4 += strlen("</单词>"); worditem.word = get_cdata(p3); if (!worditem.word) { return; } if (!worditem.word[0]) { g_print("Bad word!\n"); p = p2; continue; } while (g_ascii_isspace(*p4)) { p4++; } worditem.definition = p4; g_strstrip(worditem.definition); if (!worditem.definition[0]) { g_print("Bad definition!\n"); return; } ParseUserData Data; Data.word = worditem.word; Data.definition = worditem.definition; std::list<std::string> WordList; Data.WordList = &WordList; Data.array = array2; Data.TagList = TagList; Data.ElementList = ElementList; parse_definition(worditem.definition, &Data); g_array_append_val(array, worditem); p = p2; } g_array_sort(array,comparefunc); g_array_sort(array2,comparefunc2); gchar *basefilename = g_strdup(datafilename); p = strchr(basefilename, '.'); if (p) *p='\0'; gchar idxfilename[256]; gchar dicfilename[256]; sprintf(idxfilename, "powerword2007_%s.idx", basefilename); sprintf(dicfilename, "powerword2007_%s.dict", basefilename); FILE *idxfile = fopen(idxfilename,"w"); FILE *dicfile = fopen(dicfilename,"w"); guint32 offset_old; guint32 tmpglong; struct _worditem *pworditem; gint definition_len; gulong i; for (i=0; i< array->len; i++) { offset_old = ftell(dicfile); pworditem = &g_array_index(array, struct _worditem, i); definition_len = strlen(pworditem->definition); fwrite(pworditem->definition, 1 ,definition_len,dicfile); fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile); tmpglong = g_htonl(offset_old); fwrite(&(tmpglong),sizeof(guint32),1,idxfile); tmpglong = g_htonl(definition_len); fwrite(&(tmpglong),sizeof(guint32),1,idxfile); } idxfilesize = ftell(idxfile); fclose(idxfile); fclose(dicfile); g_print("%s wordcount: %d\n", datafilename, array->len); wordcount = array->len; synwordcount = array2->len; if (array2->len) { gchar synfilename[256]; sprintf(synfilename, "powerword2007_%s.syn", basefilename); FILE *synfile = fopen(synfilename,"w"); struct _synworditem *psynworditem; gint iFrom, iTo, iThisIndex, cmpint; bool bFound; for (i=0; i< array2->len; i++) { psynworditem = &g_array_index(array2, struct _synworditem, i); fwrite(psynworditem->synword, 1, strlen(psynworditem->synword)+1, synfile); g_free(psynworditem->synword); bFound=false; iFrom=0; iTo=array->len-1; while (iFrom<=iTo) { iThisIndex=(iFrom+iTo)/2; pworditem = &g_array_index(array, struct _worditem, iThisIndex); cmpint = stardict_strcmp(psynworditem->origword, pworditem->word); if (cmpint>0) iFrom=iThisIndex+1; else if (cmpint<0) iTo=iThisIndex-1; else { bFound=true; break; } } if (!bFound) { g_print("Error, %s not find.\n", psynworditem->origword); return; } do { if (iThisIndex==0) break; pworditem = &g_array_index(array, struct _worditem, iThisIndex-1); if (strcmp(psynworditem->origword, pworditem->word)==0) iThisIndex--; else break; } while (true); bFound=false; do { pworditem = &g_array_index(array, struct _worditem, iThisIndex); if (strcmp(psynworditem->origword, pworditem->word)==0) { if (psynworditem->definition == pworditem->definition) { bFound=true; break; } else iThisIndex++; } else break; } while (true); if (!bFound) { g_print("Error, %s definition not find.\n", psynworditem->origword); return; } tmpglong = g_htonl(iThisIndex); fwrite(&(tmpglong),sizeof(guint32),1, synfile); } fclose(synfile); g_print("synwordcount: %d\n", array2->len); }
VerifResult binary_dict_parser_t::load_syn_file(void) { synfilename = basefilename + ".syn"; VerifResult result = VERIF_RESULT_OK; if (dict_info.get_synwordcount() == 0) { if (g_file_test(synfilename.c_str(), G_FILE_TEST_EXISTS)) { g_warning(syn_file_exist_msg); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) { g_message(fixed_process_syn_file_msg); } else return result; } else return result; } guint32 synfilesize; { stardict_stat_t stats; if (g_stat (synfilename.c_str(), &stats) == -1) { std::string error(g_strerror(errno)); g_warning(syn_file_no_found_msg, synfilename.c_str(), error.c_str()); result = VERIF_RESULT_CRITICAL; if(fix_errors) { dict_info.set_synwordcount(0); g_message(fixed_ignore_syn_file_msg); return result; } else return result; } synfilesize = stats.st_size; } g_message(loading_syn_file_msg, synfilename.c_str()); synindex.clear(); synindex.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_synwordcount())); std::vector<gchar> buf(synfilesize+1); gchar *buffer_begin = &buf[0]; gchar *buffer_end = buffer_begin+synfilesize; { FILE *synfile = g_fopen(synfilename.c_str(),"rb"); if(!synfile) { std::string error(g_strerror(errno)); g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); result = VERIF_RESULT_CRITICAL; if(fix_errors) { dict_info.set_synwordcount(0); g_message(fixed_ignore_syn_file_msg); return result; } else return result; } if(synfilesize != fread (buffer_begin, 1, synfilesize, synfile)) { std::string error(g_strerror(errno)); g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); result = VERIF_RESULT_CRITICAL; fclose (synfile); if(fix_errors) { dict_info.set_synwordcount(0); g_message(fixed_ignore_syn_file_msg); return result; } else return result; } fclose (synfile); } const char *p=buffer_begin; int wordlen; gint cmpvalue; guint wordcount=0; synitem_t synitem, presynitem; size_t size_remain; // to the end of the synonyms file while (p < buffer_end) { size_remain = buffer_end - p; const char* const word_end = reinterpret_cast<const char*>(memchr(p, '\0', size_remain)); if(!word_end) { g_warning(syn_file_truncated_err); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) g_message(fixed_ignore_file_tail_msg); break; } synitem.word = p; wordlen = synitem.word.length(); if (!g_utf8_validate(synitem.word.c_str(), wordlen, NULL)) { g_warning(word_invalid_utf8_err, synitem.word.c_str()); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { synitem.word = fix_utf8_str(synitem.word); wordlen = synitem.word.length(); g_message(fixed_utf8_drop_invalid_char_msg); } } { // check for invalid chars typedef std::list<const char*> str_list_t; str_list_t invalid_chars; const char* const word = synitem.word.c_str(); if(check_xml_string_chars(word, invalid_chars)) { result = combine_result(result, VERIF_RESULT_WARNING); g_message(word_invalid_char_value_err, word, print_char_codes(invalid_chars).c_str()); if(fix_errors) { g_message(fixed_drop_invalid_char_msg); fix_xml_string_chars(word, synitem.word); wordlen = synitem.word.length(); } } } if (wordlen > 0) { if (wordlen>=MAX_INDEX_KEY_SIZE) { g_warning(long_word_err, synitem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { wordlen = truncate_utf8_string(synitem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); synitem.word.resize(wordlen); g_message(fixed_word_truncated_msg); } } bool have_spaces = false; if (g_ascii_isspace(synitem.word[0])) { g_message(word_begin_space_err, synitem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); have_spaces = true; } if (g_ascii_isspace(synitem.word[wordlen-1])) { g_message(word_end_space_err, synitem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); have_spaces = true; } if(have_spaces && fix_errors) { g_message(fixed_trim_spaces); const char* new_beg; size_t new_len; trim_spaces(synitem.word.c_str(), new_beg, new_len); if(new_len == 0) synitem.word.clear(); else { std::string tmp(new_beg, new_len); synitem.word = tmp; } } } if (check_stardict_key_chars(synitem.word.c_str())) { g_message(word_forbidden_chars_err, synitem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); if(fix_errors) { g_message(fixed_drop_invalid_char_msg); std::string tmp; fix_stardict_key_chars(synitem.word.c_str(), tmp); synitem.word = tmp; wordlen = synitem.word.length(); } } if (wordlen==0) { g_warning(empty_word_err); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) g_message(fixed_ignore_word_msg); } if (!presynitem.word.empty() && !synitem.word.empty()) { cmpvalue=stardict_strcmp(presynitem.word.c_str(), synitem.word.c_str()); if (cmpvalue>0) { g_warning(wrong_word_order_err, presynitem.word.c_str(), synitem.word.c_str()); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) g_message(fixed_words_reordered_msg); } } p = word_end +1; size_remain = buffer_end - p; if(size_remain < sizeof(guint32)) { g_warning(syn_file_truncated_err); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) g_message(fixed_ignore_file_tail_msg); break; } synitem.index = g_ntohl(*reinterpret_cast<const guint32 *>(p)); if (synitem.index>=dict_info.get_wordcount()) { g_warning(wrong_index_err, synitem.word.c_str(), synitem.index); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { synitem.word.clear(); g_message(fixed_ignore_word_msg); } } p+=sizeof(guint32); presynitem = synitem; wordcount++; synindex.push_back(synitem); } // while g_assert(p <= buffer_end); if (wordcount != dict_info.get_synwordcount()) { g_warning(incorrect_syn_word_cnt_err, dict_info.get_synwordcount(), wordcount); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { dict_info.set_synwordcount(wordcount); g_message(fixed_msg); } } for(size_t i=0; i < synindex.size(); ++i) { for(size_t j=i+1; j < synindex.size() && synindex[i].word == synindex[j].word; ++j) { if(synindex[i].index == synindex[j].index) { g_warning(duplicate_syn_item_err, synindex[i].word.c_str(), synindex[i].index); result = combine_result(result, VERIF_RESULT_NOTE); break; } } } if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) { g_warning(load_syn_file_failed_err, synfilename.c_str()); if(fix_errors) { dict_info.set_synwordcount(0); synindex.clear(); g_message(fixed_ignore_syn_file_msg); result = VERIF_RESULT_CRITICAL; } } return result; }
VerifResult binary_dict_parser_t::load_idx_file(void) { VerifResult result = VERIF_RESULT_OK; { VerifResult res = prepare_idx_file(); result = combine_result(result, res); if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= res) return result; } guint32 idxfilesize; { stardict_stat_t stats; if (g_stat (idxfilename.c_str(), &stats) == -1) { std::string error(g_strerror(errno)); g_critical(file_not_found_idx_err, idxfilename.c_str(), error.c_str()); return combine_result(result, VERIF_RESULT_FATAL); } idxfilesize = (guint32)stats.st_size; } g_message(loading_idx_file_msg, idxfilename_orig.c_str()); if (dict_info.get_index_file_size() != idxfilesize) { g_warning(incorrect_idx_file_size_err, dict_info.get_index_file_size(), idxfilesize); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { dict_info.set_index_file_size(idxfilesize); g_message(fixed_msg); } else return result; } index.clear(); index.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_wordcount())); std::vector<gchar> buf(idxfilesize+1); gchar * const buffer_beg = &buf[0]; gchar * const buffer_end = buffer_beg+idxfilesize; { FILE *idxfile = g_fopen(idxfilename.c_str(),"rb"); if(!idxfile) { std::string error(g_strerror(errno)); g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); return combine_result(result, VERIF_RESULT_FATAL); } if(idxfilesize != fread(buffer_beg, 1, idxfilesize, idxfile)) { std::string error(g_strerror(errno)); g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); fclose(idxfile); return combine_result(result, VERIF_RESULT_FATAL); } fclose(idxfile); } const char *p=buffer_beg; int wordlen; gint cmpvalue; guint wordcount=0; worditem_t worditem, preworditem; size_t size_remain; // to the end of the index file while (p < buffer_end) { size_remain = buffer_end - p; const char* const word_end = reinterpret_cast<const char*>(memchr(p, '\0', size_remain)); if(!word_end) { g_warning(index_file_truncated_err); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) g_message(fixed_ignore_file_tail_msg); break; } worditem.word = p; wordlen = worditem.word.length(); if (!g_utf8_validate(worditem.word.c_str(), wordlen, NULL)) { g_warning(word_invalid_utf8_err, worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { worditem.word = fix_utf8_str(worditem.word, 0); wordlen = worditem.word.length(); g_message(fixed_utf8_drop_invalid_char_msg); } } { // check for invalid chars typedef std::list<const char*> str_list_t; str_list_t invalid_chars; const char* const word = worditem.word.c_str(); if(check_xml_string_chars(word, invalid_chars)) { result = combine_result(result, VERIF_RESULT_WARNING); g_message(word_invalid_char_value_err, word, print_char_codes(invalid_chars).c_str()); if(fix_errors) { g_message(fixed_drop_invalid_char_msg); fix_xml_string_chars(word, worditem.word); wordlen = worditem.word.length(); } } } if (wordlen > 0) { if (wordlen>=MAX_INDEX_KEY_SIZE) { g_warning(long_word_err, worditem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { wordlen = truncate_utf8_string(worditem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); worditem.word.resize(wordlen); g_message(fixed_word_truncated_msg); } } bool have_spaces = false; if (g_ascii_isspace(worditem.word[0])) { g_message(word_begin_space_err, worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); have_spaces = true; } if (g_ascii_isspace(worditem.word[wordlen-1])) { g_message(word_end_space_err, worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); have_spaces = true; } if(have_spaces && fix_errors) { g_message(fixed_trim_spaces); const char* new_beg; size_t new_len; trim_spaces(worditem.word.c_str(), new_beg, new_len); if(new_len == 0) worditem.word.clear(); else { std::string tmp(new_beg, new_len); worditem.word = tmp; } } } if(check_stardict_key_chars(worditem.word.c_str())) { g_message(word_forbidden_chars_err, worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_NOTE); if(fix_errors) { g_message(fixed_drop_invalid_char_msg); std::string tmp; fix_stardict_key_chars(worditem.word.c_str(), tmp); worditem.word = tmp; wordlen = worditem.word.length(); } } if (wordlen==0) { g_warning(empty_word_err); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) g_message(fixed_ignore_word_msg); } if (!preworditem.word.empty() && !worditem.word.empty()) { cmpvalue=stardict_strcmp(preworditem.word.c_str(), worditem.word.c_str()); if (cmpvalue>0) { g_warning(wrong_word_order_err, preworditem.word.c_str(), worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) g_message(fixed_words_reordered_msg); } } p = word_end + 1; size_remain = buffer_end - p; if(size_remain < 2 * sizeof(guint32)) { g_warning(index_file_truncated_err); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) g_message(fixed_ignore_file_tail_msg); break; } worditem.offset = g_ntohl(*reinterpret_cast<const guint32 *>(p)); p += sizeof(guint32); worditem.size = g_ntohl(*reinterpret_cast<const guint32 *>(p)); p += sizeof(guint32); if (worditem.size==0) { g_warning(empty_block_err, worditem.word.c_str()); result = combine_result(result, VERIF_RESULT_WARNING); if(fix_errors) { worditem.word.clear(); g_message(fixed_ignore_word_msg); } } preworditem = worditem; wordcount++; index.push_back(worditem); } // while g_assert(p <= buffer_end); if (dict_info.get_wordcount() != wordcount) { g_warning(incorrect_word_cnt_err, dict_info.get_wordcount(), wordcount); result = combine_result(result, VERIF_RESULT_CRITICAL); if(fix_errors) { dict_info.set_wordcount(wordcount); g_message(fixed_msg); } } for(size_t i=0; i < index.size(); ++i) { if(index[i].word.empty()) continue; for(size_t j=i+1; j < index.size() && index[i].word == index[j].word; ++j) { if(index[i].offset == index[j].offset && index[i].size == index[j].size) { g_warning(duplicate_index_item_err, index[i].word.c_str(), index[i].offset, index[i].size); result = combine_result(result, VERIF_RESULT_NOTE); break; } } } return result; }
void convert_babylonfile(const char *filename, print_info_t print_info, bool strip_html) { struct stat stats; if (g_stat (filename, &stats) == -1) { print_info("File not exist!\n"); return; } gchar *basefilename = g_path_get_basename(filename); gchar *ch = strrchr(basefilename, '.'); if (ch) *ch = '\0'; gchar *dirname = g_path_get_dirname(filename); FILE *tabfile; tabfile = g_fopen(filename,"r"); gchar *buffer = (gchar *)g_malloc (stats.st_size + 1); size_t readsize = fread (buffer, 1, stats.st_size, tabfile); fclose (tabfile); buffer[readsize] = '\0'; GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000); GArray *array2 = g_array_sized_new(FALSE,FALSE, sizeof(struct _synworditem),20000); gchar *p, *p1, *p2, *p3, *p4, *p5; p = buffer; if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters. p+=3; struct _worditem worditem; struct _synworditem synworditem; gint linenum=1; int stripmethod; if (strip_html) stripmethod = 0; else stripmethod = 1; std::string sametypesequence = "m"; std::string bookname; std::string author; std::string email; std::string website; std::string description; std::string date; bool print_sameword; if (*p == '\n') { print_sameword = false; p++; linenum++; while (1) { if (*p == '\n') { p++; linenum++; break; } p++; p1 = strchr(p, '\n'); if (!p1) { return; } *p1 = '\0'; p1++; linenum++; if (g_str_has_prefix(p, "stripmethod=")) { p += sizeof("stripmethod=") -1; if (strcmp(p, "striphtml")==0) stripmethod = 0; else if (strcmp(p, "stripnewline")==0) stripmethod = 1; else if (strcmp(p, "keep")==0) stripmethod = 2; } else if (g_str_has_prefix(p, "sametypesequence=")) { p += sizeof("sametypesequence=") -1; sametypesequence = p; } else if (g_str_has_prefix(p, "bookname=")) { p += sizeof("bookname=") -1; bookname = p; } else if (g_str_has_prefix(p, "author=")) { p += sizeof("author=") -1; author = p; } else if (g_str_has_prefix(p, "email=")) { p += sizeof("email=") -1; email = p; } else if (g_str_has_prefix(p, "website=")) { p += sizeof("website=") -1; website = p; } else if (g_str_has_prefix(p, "date=")) { p += sizeof("date=") -1; date = p; } else if (g_str_has_prefix(p, "description=")) { p += sizeof("description=") -1; description = p; } p = p1; } } else { print_sameword = true; } while (1) { if (*p == '\0') { print_info("Over\n"); break; } p1 = strchr(p,'\n'); if (!p1) { gchar *str = g_strdup_printf("Error, no end line 1: %d\n", linenum); print_info(str); g_free(str); return; } *p1 = '\0'; p1++; linenum++; p2 = strchr(p1,'\n'); if (!p2) { gchar *str = g_strdup_printf("Error, no end line 2: %d\n", linenum); print_info(str); g_free(str); return; } *p2 = '\0'; p2++; linenum++; p3=p2; if (*p3 != '\n') { gchar *str = g_strdup_printf("Error, not null line %d", linenum); print_info(str); g_free(str); return; } *p3='\0'; p3++; linenum++; if (stripmethod == 0) { html_strstrip(p1, linenum-2, print_info); } else if (stripmethod == 1) { newline_strstrip(p1, linenum-2, print_info); } else if (stripmethod == 2) { } g_strstrip(p1); if (!(*p1)) { gchar *str = g_strdup_printf("%s-%d, bad definition!!!\n", basefilename, linenum-1); print_info(str); g_free(str); p= p3; continue; } p4 = strchr(p, '|'); if (p4) { *p4 = '\0'; worditem.word = p; g_strstrip(worditem.word); if (!worditem.word[0]) { gchar *str = g_strdup_printf("%s-%d, bad word!!!\n", basefilename, linenum-2); print_info(str); g_free(str); p=p3; continue; } worditem.definition = p1; g_array_append_val(array, worditem); std::list <std::string> WordList; WordList.push_back(worditem.word); p4++; while (true) { p5 = strchr(p4, '|'); if (p5) { *p5 = '\0'; synworditem.synword = p4; g_strstrip(synworditem.synword); if (!synworditem.synword[0]) { gchar *str = g_strdup_printf("%s-%d, bad word!!!\n", basefilename, linenum-2); print_info(str); g_free(str); p4 = p5+1; continue; } bool find = false; for (std::list<std::string>::const_iterator it=WordList.begin(); it!=WordList.end(); ++it) { if (*it == synworditem.synword) { find= true; break; } } if (find) { if (print_sameword) { gchar *str = g_strdup_printf("Same word: %s\n", synworditem.synword); print_info(str); g_free(str); } p4 = p5+1; continue; } else { WordList.push_back(synworditem.synword); } synworditem.origword = worditem.word; synworditem.definition = worditem.definition; g_array_append_val(array2, synworditem); p4 = p5+1; } else { synworditem.synword = p4; g_strstrip(synworditem.synword); if (!synworditem.synword[0]) { gchar *str = g_strdup_printf("%s-%d, bad word!!!\n", basefilename, linenum-2); print_info(str); g_free(str); break; } bool find = false; for (std::list<std::string>::const_iterator it=WordList.begin(); it!=WordList.end(); ++it) { if (*it == synworditem.synword) { find= true; break; } } if (find) { if (print_sameword) { gchar *str = g_strdup_printf("Same word: %s\n", synworditem.synword); print_info(str); g_free(str); } break; } synworditem.origword = worditem.word; synworditem.definition = worditem.definition; g_array_append_val(array2, synworditem); break; } } } else { worditem.word = p; g_strstrip(worditem.word); if (!worditem.word[0]) { gchar *str = g_strdup_printf("%s-%d, bad word!!!\n", basefilename, linenum-2); print_info(str); g_free(str); p=p3; continue; } worditem.definition = p1; g_array_append_val(array, worditem); } p= p3; } g_array_sort(array,comparefunc); g_array_sort(array2,comparefunc2); gchar ifofilename[256]; gchar idxfilename[256]; gchar dicfilename[256]; sprintf(ifofilename, "%s" G_DIR_SEPARATOR_S "%s.ifo", dirname, basefilename); sprintf(idxfilename, "%s" G_DIR_SEPARATOR_S "%s.idx", dirname, basefilename); sprintf(dicfilename, "%s" G_DIR_SEPARATOR_S "%s.dict", dirname, basefilename); FILE *ifofile = g_fopen(ifofilename,"wb"); FILE *idxfile = g_fopen(idxfilename,"wb"); FILE *dicfile = g_fopen(dicfilename,"wb"); guint32 offset_old; guint32 tmpglong; struct _worditem *pworditem; gint definition_len; gulong i; for (i=0; i< array->len; i++) { offset_old = ftell(dicfile); pworditem = &g_array_index(array, struct _worditem, i); definition_len = strlen(pworditem->definition); fwrite(pworditem->definition, 1 ,definition_len,dicfile); fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile); tmpglong = g_htonl(offset_old); fwrite(&(tmpglong),sizeof(guint32),1,idxfile); tmpglong = g_htonl(definition_len); fwrite(&(tmpglong),sizeof(guint32),1,idxfile); } fclose(idxfile); fclose(dicfile); gchar *str = g_strdup_printf("%s wordcount: %d\n", basefilename, array->len); print_info(str); g_free(str); if (array2->len) { gchar synfilename[256]; sprintf(synfilename, "%s" G_DIR_SEPARATOR_S "%s.syn", dirname, basefilename); FILE *synfile = g_fopen(synfilename,"wb"); struct _synworditem *psynworditem; gint iFrom, iTo, iThisIndex, cmpint; bool bFound; for (i=0; i< array2->len; i++) { psynworditem = &g_array_index(array2, struct _synworditem, i); fwrite(psynworditem->synword, 1, strlen(psynworditem->synword)+1, synfile); bFound=false; iFrom=0; iTo=array->len-1; while (iFrom<=iTo) { iThisIndex=(iFrom+iTo)/2; pworditem = &g_array_index(array, struct _worditem, iThisIndex); cmpint = stardict_strcmp(psynworditem->origword, pworditem->word); if (cmpint>0) iFrom=iThisIndex+1; else if (cmpint<0) iTo=iThisIndex-1; else { bFound=true; break; } } if (!bFound) { gchar *str = g_strdup_printf("Error, %s not find.\n", psynworditem->origword); print_info(str); g_free(str); return; } do { if (iThisIndex==0) break; pworditem = &g_array_index(array, struct _worditem, iThisIndex-1); if (strcmp(psynworditem->origword, pworditem->word)==0) iThisIndex--; else break; } while (true); bFound=false; do { pworditem = &g_array_index(array, struct _worditem, iThisIndex); if (strcmp(psynworditem->origword, pworditem->word)==0) { if (psynworditem->definition == pworditem->definition) { bFound=true; break; } else iThisIndex++; } else break; } while (true); if (!bFound) { gchar *str = g_strdup_printf("Error, %s definition not find.\n", psynworditem->origword); print_info(str); g_free(str); return; } tmpglong = g_htonl(iThisIndex); fwrite(&(tmpglong),sizeof(guint32),1, synfile); } fclose(synfile); gchar *str = g_strdup_printf("%s synwordcount: %d\n", basefilename, array2->len); print_info(str); g_free(str); }