コード例 #1
0
ファイル: dicutil.c プロジェクト: 2ion/gjiten
gint search4string(gint srchtype, GjitenDicfile *dicfile, gchar *srchstrg, guint32 *res_index, gint *hit_pos, gint *res_len, gchar *res_str) { 
  gint search_result;
  gchar *linestart, *lineend; 
  gint copySize = 1023;
  static gchar *linsrchptr;

	if (dicfile->status == DICFILE_NOT_INITIALIZED) {
		if (dicfile_init(dicfile) == FALSE) return SRCH_FAIL; 
	}
	if (dicfile->status != DICFILE_OK) return SRCH_FAIL;

  if ((dicfile != conf.mmaped_dicfile) && (conf.mmaped_dicfile != NULL)) {
    //free mem of previously used dicfile	
		munmap(conf.mmaped_dicfile->mem, conf.mmaped_dicfile->size);
		conf.mmaped_dicfile->mem = NULL;
		conf.mmaped_dicfile = NULL;
	}

	if (conf.mmaped_dicfile == NULL) {
    //mmap dicfile into memory	
		conf.mmaped_dicfile = dicfile;
    dicfile->mem = (gchar *) mmap(NULL, dicfile->size, PROT_READ, MAP_SHARED, dicfile->file, 0);
    if (dicfile->mem == NULL) gjiten_abort_with_msg("mmap() failed\n");
		conf.mmaped_dicfile = dicfile;
  }

  if (srchtype == SRCH_START) {
    linsrchptr = dicfile->mem;
  }
 bad_hit:
  search_result = SRCH_FAIL; // assume search fails 
  linsrchptr = strstr(linsrchptr, srchstrg);
  if (linsrchptr != NULL) {  // if we have a match
    linestart = linsrchptr;
    while ((*linestart != '\n') && (linestart != dicfile->mem)) linestart--; // find beginning of line
    if (linestart == dicfile->mem) {   
      if ((isKanjiChar(g_utf8_get_char(linestart)) == FALSE) && (isKanaChar(g_utf8_get_char(linestart)) == FALSE)) {
				linsrchptr++;
				goto bad_hit;
      }
    }

    linestart++;
    lineend = linestart;
    *hit_pos = linsrchptr - linestart;
    while (*lineend != '\n') { // find end of line
      lineend++;
      if (lineend >= dicfile->mem + dicfile->size) { 
				printf("weird.\n");
				break;
      }
    }
    linsrchptr++;	
    if ((lineend - linestart + 1) < 1023) copySize = lineend - linestart + 1;
    else copySize = 1023;
    strncpy(res_str, linestart, copySize);
    res_str[copySize] = 0;
    *res_index  = (guint32)linestart;
    search_result = SRCH_OK; // search succeeded 
  }
  return search_result;
}
コード例 #2
0
ファイル: kanjiutils.c プロジェクト: nicolas-raoul/gjitenkai
GList* load_radkfile(GHashTable **pp_rad_info_hash, 
                     GHashTable **pp_kanji_info_hash,
                     GList      *rad_info_list) {
  int error = FALSE;
  gint rad_cnt = 0;
  gchar *radkfile_ptr;
  gchar *radkfile_end;
  RadInfo *rad_info = NULL;
  KanjiInfo *kanji_info;

  gchar *radkfile = NULL;

  GHashTable *rad_info_hash   = *pp_rad_info_hash;
  GHashTable *kanji_info_hash = *pp_kanji_info_hash;

  radkfile = read_file(RADKFILE_NAME);

  if (radkfile == NULL) gjiten_abort_with_msg("failed to read radkfile %s\n", RADKFILE_NAME);
  
  if (error == TRUE) {
    gjiten_print_error("Error opening %s.\n "                           \
                       "Check your preferences or read the documentation!",
                       RADKFILE_NAME);
    return;
  }

  radkfile_end = radkfile + strlen(radkfile); //FIXME: lseek
  radkfile_ptr = radkfile;
    
  //parse the content of the file
  while((radkfile_ptr < radkfile_end) && (radkfile_ptr != NULL)) {

    //if comment (first char on this line is #), skip this line
    if (*radkfile_ptr == '#') {  
      radkfile_ptr = get_eof_line(radkfile_ptr, radkfile_end); 
      continue;
    }
    
    //if radical info line (first char on this line is $)
    if (*radkfile_ptr == '$') {

      rad_cnt++;          //Increase number of radicals found
      radkfile_ptr = g_utf8_next_char(radkfile_ptr);

      //move the pointer forward until the character is wide (kanji)
      while (g_unichar_iswide(g_utf8_get_char(radkfile_ptr)) == FALSE) {
        radkfile_ptr = g_utf8_next_char(radkfile_ptr);
      }
      
      //new rad_info to be stored in the rad_info_hash and rad_info_list
      rad_info = g_new0(RadInfo, 1);
      rad_info->kanji_info_list = NULL;
      rad_info_list = g_list_prepend(rad_info_list, rad_info);

      //store radical character
      //the characters in the file are in UTF8 format. We need unicode.  
      gunichar utf8radical = g_utf8_get_char(radkfile_ptr);
      gunichar *p_str_radical = g_new0(gunichar, 1);
      g_unichar_to_utf8(utf8radical, (gchar*)p_str_radical);
      rad_info->radical = p_str_radical;
      
      //Find stroke number (move until digit detected)
      while (g_ascii_isdigit(*radkfile_ptr) == FALSE) {
        radkfile_ptr = g_utf8_next_char(radkfile_ptr);
      }

      //Store the stroke number
      rad_info->strokes = atoi(radkfile_ptr);  

      //insert this radical as key and the info as value
      g_hash_table_insert(rad_info_hash, (gpointer)rad_info->radical, rad_info);
      
      //Goto next line
      radkfile_ptr = get_eof_line(radkfile_ptr, radkfile_end);
    }
    else {
      //search the kanji to be stored in the list of the kanji key / radical info list
      //the kanji are located between radical $ markers and the radical info
      while ((*radkfile_ptr != '$') && (radkfile_ptr < radkfile_end)) {
        if (*radkfile_ptr == '\n') {
          radkfile_ptr++;
          continue;
        }

        gunichar utf8kanji = g_utf8_get_char(radkfile_ptr);

        gchar *kanji = g_new0(gchar, sizeof(gunichar));
        g_unichar_to_utf8(utf8kanji, kanji);
        
        //search in the kanji infohash if this kanji is alderly present, 
        //if not, create a new kanji and add it
        kanji_info = g_hash_table_lookup(kanji_info_hash, kanji);
        if (kanji_info == NULL) {
          kanji_info = g_new0(KanjiInfo, 1);
          kanji_info->rad_info_list = NULL;
          kanji_info->kanji = kanji;

          //insert this kanji as a key and the kanji info as value
          g_hash_table_insert(kanji_info_hash, (gpointer) kanji, (gpointer) kanji_info);
        }

        //add the kanji and the radical info in their respective lists
        kanji_info->rad_info_list = g_list_prepend(kanji_info->rad_info_list, rad_info);
        rad_info->kanji_info_list = g_list_prepend(rad_info->kanji_info_list, kanji_info);

        //navigate to next character
        radkfile_ptr = g_utf8_next_char(radkfile_ptr);
      }
    }
  }

  return rad_info_list;
}