gint search4string(gint srchtype, GjitenDicfile *dicfile, gchar *srchstrg, guint32 *res_index, gint *hit_pos, gint *res_len, gchar *res_str) { gint search_result; gchar *linestart, *lineend; gint copySize = 1023; static gchar *linsrchptr; if (dicfile->status == DICFILE_NOT_INITIALIZED) { if (dicfile_init(dicfile) == FALSE) return SRCH_FAIL; } if (dicfile->status != DICFILE_OK) return SRCH_FAIL; if ((dicfile != conf.mmaped_dicfile) && (conf.mmaped_dicfile != NULL)) { //free mem of previously used dicfile munmap(conf.mmaped_dicfile->mem, conf.mmaped_dicfile->size); conf.mmaped_dicfile->mem = NULL; conf.mmaped_dicfile = NULL; } if (conf.mmaped_dicfile == NULL) { //mmap dicfile into memory conf.mmaped_dicfile = dicfile; dicfile->mem = (gchar *) mmap(NULL, dicfile->size, PROT_READ, MAP_SHARED, dicfile->file, 0); if (dicfile->mem == NULL) gjiten_abort_with_msg("mmap() failed\n"); conf.mmaped_dicfile = dicfile; } if (srchtype == SRCH_START) { linsrchptr = dicfile->mem; } bad_hit: search_result = SRCH_FAIL; // assume search fails linsrchptr = strstr(linsrchptr, srchstrg); if (linsrchptr != NULL) { // if we have a match linestart = linsrchptr; while ((*linestart != '\n') && (linestart != dicfile->mem)) linestart--; // find beginning of line if (linestart == dicfile->mem) { if ((isKanjiChar(g_utf8_get_char(linestart)) == FALSE) && (isKanaChar(g_utf8_get_char(linestart)) == FALSE)) { linsrchptr++; goto bad_hit; } } linestart++; lineend = linestart; *hit_pos = linsrchptr - linestart; while (*lineend != '\n') { // find end of line lineend++; if (lineend >= dicfile->mem + dicfile->size) { printf("weird.\n"); break; } } linsrchptr++; if ((lineend - linestart + 1) < 1023) copySize = lineend - linestart + 1; else copySize = 1023; strncpy(res_str, linestart, copySize); res_str[copySize] = 0; *res_index = (guint32)linestart; search_result = SRCH_OK; // search succeeded } return search_result; }
GList* load_radkfile(GHashTable **pp_rad_info_hash, GHashTable **pp_kanji_info_hash, GList *rad_info_list) { int error = FALSE; gint rad_cnt = 0; gchar *radkfile_ptr; gchar *radkfile_end; RadInfo *rad_info = NULL; KanjiInfo *kanji_info; gchar *radkfile = NULL; GHashTable *rad_info_hash = *pp_rad_info_hash; GHashTable *kanji_info_hash = *pp_kanji_info_hash; radkfile = read_file(RADKFILE_NAME); if (radkfile == NULL) gjiten_abort_with_msg("failed to read radkfile %s\n", RADKFILE_NAME); if (error == TRUE) { gjiten_print_error("Error opening %s.\n " \ "Check your preferences or read the documentation!", RADKFILE_NAME); return; } radkfile_end = radkfile + strlen(radkfile); //FIXME: lseek radkfile_ptr = radkfile; //parse the content of the file while((radkfile_ptr < radkfile_end) && (radkfile_ptr != NULL)) { //if comment (first char on this line is #), skip this line if (*radkfile_ptr == '#') { radkfile_ptr = get_eof_line(radkfile_ptr, radkfile_end); continue; } //if radical info line (first char on this line is $) if (*radkfile_ptr == '$') { rad_cnt++; //Increase number of radicals found radkfile_ptr = g_utf8_next_char(radkfile_ptr); //move the pointer forward until the character is wide (kanji) while (g_unichar_iswide(g_utf8_get_char(radkfile_ptr)) == FALSE) { radkfile_ptr = g_utf8_next_char(radkfile_ptr); } //new rad_info to be stored in the rad_info_hash and rad_info_list rad_info = g_new0(RadInfo, 1); rad_info->kanji_info_list = NULL; rad_info_list = g_list_prepend(rad_info_list, rad_info); //store radical character //the characters in the file are in UTF8 format. We need unicode. gunichar utf8radical = g_utf8_get_char(radkfile_ptr); gunichar *p_str_radical = g_new0(gunichar, 1); g_unichar_to_utf8(utf8radical, (gchar*)p_str_radical); rad_info->radical = p_str_radical; //Find stroke number (move until digit detected) while (g_ascii_isdigit(*radkfile_ptr) == FALSE) { radkfile_ptr = g_utf8_next_char(radkfile_ptr); } //Store the stroke number rad_info->strokes = atoi(radkfile_ptr); //insert this radical as key and the info as value g_hash_table_insert(rad_info_hash, (gpointer)rad_info->radical, rad_info); //Goto next line radkfile_ptr = get_eof_line(radkfile_ptr, radkfile_end); } else { //search the kanji to be stored in the list of the kanji key / radical info list //the kanji are located between radical $ markers and the radical info while ((*radkfile_ptr != '$') && (radkfile_ptr < radkfile_end)) { if (*radkfile_ptr == '\n') { radkfile_ptr++; continue; } gunichar utf8kanji = g_utf8_get_char(radkfile_ptr); gchar *kanji = g_new0(gchar, sizeof(gunichar)); g_unichar_to_utf8(utf8kanji, kanji); //search in the kanji infohash if this kanji is alderly present, //if not, create a new kanji and add it kanji_info = g_hash_table_lookup(kanji_info_hash, kanji); if (kanji_info == NULL) { kanji_info = g_new0(KanjiInfo, 1); kanji_info->rad_info_list = NULL; kanji_info->kanji = kanji; //insert this kanji as a key and the kanji info as value g_hash_table_insert(kanji_info_hash, (gpointer) kanji, (gpointer) kanji_info); } //add the kanji and the radical info in their respective lists kanji_info->rad_info_list = g_list_prepend(kanji_info->rad_info_list, rad_info); rad_info->kanji_info_list = g_list_prepend(rad_info->kanji_info_list, kanji_info); //navigate to next character radkfile_ptr = g_utf8_next_char(radkfile_ptr); } } } return rad_info_list; }