コード例 #1
0
ファイル: test-string.c プロジェクト: mooz/groonga
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_str *string;
  const gchar *normalized_text;
  guint normalized_text_len;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags);
  normalized_text = cut_take_strndup(string->norm, string->norm_blen);
  normalized_text_len = string->norm_blen;
  grn_test_assert(grn_str_close(&context, string));

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_int(strlen(encoded_expected), normalized_text_len);
}
コード例 #2
0
static gchar *
get_next_line (GstSubParse * self)
{
  char *line = NULL;
  const char *line_end;
  int line_len;
  gboolean have_r = FALSE;

  line_end = strchr (self->textbuf->str, '\n');

  if (!line_end) {
    /* end-of-line not found; return for more data */
    return NULL;
  }

  /* get rid of '\r' */
  if (line_end != self->textbuf->str && *(line_end - 1) == '\r') {
    line_end--;
    have_r = TRUE;
  }

  line_len = line_end - self->textbuf->str;
  line = convert_encoding (self, self->textbuf->str, line_len);
  self->textbuf = g_string_erase (self->textbuf, 0,
      line_len + (have_r ? 2 : 1));
  return line;
}
コード例 #3
0
ファイル: test-string.c プロジェクト: WEIC-DEV/groonga
void
test_normalize_broken(gconstpointer data)
{
  grn_obj *string;
  const gchar *input, *encoded_input;
  const gchar *normalized_text;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  guint normalized_text_length, normalized_text_n_characters;
  int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_string_open(&context, encoded_input, input_length,
                           GRN_NORMALIZER_AUTO, flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  cut_assert_equal_string("", normalized_text);
  cut_assert_equal_int(0, normalized_text_length);
  cut_assert_equal_int(0, normalized_text_n_characters);
}
コード例 #4
0
ファイル: gui.cpp プロジェクト: SL-RU/rpb
string GUILable::SetText(string str)
{
  str = convert_encoding(str, "UTF-8", "CP1251");
  text = str;
  curPos = 0;
  return str;
}
コード例 #5
0
ファイル: test-string.c プロジェクト: WEIC-DEV/groonga
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_obj *string;
  const gchar *normalized_text;
  guint normalized_text_length;
  guint normalized_text_n_characters;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_string_open(&context,
                           encoded_input,
                           strlen(encoded_input),
                           GRN_NORMALIZER_AUTO,
                           flags);
  grn_string_get_normalized(&context, string,
                            &normalized_text,
                            &normalized_text_length,
                            &normalized_text_n_characters);
  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
  grn_obj_unlink(&context, string);

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
  cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
                        normalized_text_n_characters);
}
コード例 #6
0
ファイル: text_line.cpp プロジェクト: untgames/funner
stl::wstring towstring (const TextLine& line)
{
  if (sizeof (wchar_t) == 4)
    return stl::wstring ((const wchar_t*)line.TextUtf32 (), line.TextLength ());

  stl::wstring result;
  
  result.fast_resize (line.TextLength ());

  const void* source           = line.TextUtf32 ();
  size_t      source_size      = line.TextLength () * sizeof (unsigned int);
  void*       destination      = &result [0];
  size_t      destination_size = result.size () * sizeof (wchar_t);

  convert_encoding (common::Encoding_UTF32LE, source, source_size, common::Encoding_UTF16LE, destination, destination_size);  
  
  return result;
}
コード例 #7
0
ファイル: test-string.c プロジェクト: mooz/groonga
void
test_charlen_broken(gconstpointer data)
{
  const gchar *input, *encoded_input, *encoded_input_end;
  grn_encoding encoding;
  gint input_length;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);

  input = gcut_data_get_string(data, "input");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  encoded_input_end = encoded_input + input_length;
  cut_assert_equal_uint(0, grn_charlen(&context,
                                       encoded_input,
                                       encoded_input_end));
}
コード例 #8
0
ファイル: eplkup.c プロジェクト: Downfy/rikaisama
/*------------------------------------------------------------------------
-- Name: print_title_to_out_file
--
-- Description:
--   Print the title of the book to the output file.
--
-- Parameters:
--   None.
--
-- Returns:
--   None.
--
------------------------------------------------------------------------*/
static void print_title_to_out_file(void)
{
    EB_Error_Code error_code;
    char *status_conv = NULL;
    FILE *out_file = NULL;

    /* Get the title of the subbook */
    error_code = eb_subbook_title2(&book, subbook_index, title);

    if(error_code != EB_SUCCESS)
    {
        fprintf(stderr, "Error: Failed to get the title: %s\n", eb_error_message(error_code));
        die(1);
    }

    /* Convert title from EUC-JP to UTF-8 */
    status_conv = convert_encoding(conv_buf, MAXLEN_CONV, title, EB_MAX_TITLE_LENGTH, "UTF-8", "EUC-JP");

    if(status_conv == NULL)
    {
        fprintf(stderr, "Error: Something went wrong when trying to encode the title\n");
        die(1);
    }

    out_file = fopen(out_path, "w");

    if(out_file == NULL)
    {
        fprintf(stderr, "Error: Could not open output file, \"%s\"\n", out_path);
        die(1);
    }

    /* Output the text to file (in UTF-8) */
    fwrite(conv_buf, 1, strlen(conv_buf), out_file);

    fclose(out_file);

} /* print_title_to_out_file */
コード例 #9
0
ファイル: test-string.c プロジェクト: mooz/groonga
void
test_normalize_broken(gconstpointer data)
{
  grn_str *string;
  const gchar *input, *encoded_input;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_str_open(&context, encoded_input, input_length, flags);
  cut_assert_equal_string("", string->norm);
  cut_assert_equal_int(0, string->norm_blen);
  grn_test_assert(grn_str_close(&context, string));
}
コード例 #10
0
ファイル: eplkup.c プロジェクト: Downfy/rikaisama
/*------------------------------------------------------------------------
-- Name: lookup_word
--
-- Description:
--   Lookup the input word and send the results to the output file.
--
-- Parameters:
--   None.
--
-- Returns:
--   None.
--
------------------------------------------------------------------------*/
static void lookup_word(void)
{
    char lookup_word_utf8[MAXLEN_LOOKUP_WORD + 1];
    char lookup_word_eucjp[MAXLEN_LOOKUP_WORD + 1];
    char *status_conv = NULL;
    EB_Error_Code error_code;
    EB_Hit hits[MAX_HITS];
    FILE *in_file = NULL;
    FILE *out_file = NULL;
    int hit_count;
    int heading_length;
    int text_length;
    int i;

    /* Get the word to lookup */
    in_file = fopen(in_path, "r");

    if(in_file == NULL)
    {
        fprintf(stderr, "Error: Could not open input file: \"%s\"", in_path);
        die(1);
    }

    if(fgets(lookup_word_utf8, MAXLEN_LOOKUP_WORD, in_file) == NULL)
    {
        fclose(in_file);
        fprintf(stderr, "Error: Could not read word from input file: \"%s\"", in_path);
        die(1);
    }

    fclose(in_file);

    /* Remove the final '\n' */
    if(lookup_word_utf8[strlen(lookup_word_utf8) - 1] == '\n')
    {
        lookup_word_utf8[strlen(lookup_word_utf8) - 1] = '\0';
    }

    /* Convert the lookup word from UTF-8 to EUC-JP */
    status_conv = convert_encoding(lookup_word_eucjp, MAXLEN_LOOKUP_WORD, lookup_word_utf8, strlen(lookup_word_utf8), "EUC-JP", "UTF-8");

    if(status_conv == NULL)
    {
        fprintf(stderr, "Error: Something went wront when trying to encode the lookup word\n");
        die(1);
    }

    /* Perform an exact search of the lookup word */
    error_code = eb_search_exactword(&book, lookup_word_eucjp);

    if(error_code != EB_SUCCESS)
    {
        fprintf(stderr, "Error: Failed to search for the word, %s: %s\n", eb_error_message(error_code), lookup_word_eucjp);
        die(1);
    }

    while(1)
    {
        /* Get the list of hits */
        error_code = eb_hit_list(&book, MAX_HITS, hits, &hit_count);

        if(error_code != EB_SUCCESS)
        {
            fprintf(stderr, "Error: Failed to get hit entries, %s\n", eb_error_message(error_code));
            die(1);
        }

        /* Are we done? */
        if(hit_count == 0)
        {
            break;
        }

        /* Create the output file */
        out_file = fopen(out_path, "w");

        if(out_file == NULL)
        {
            fprintf(stderr, "Error: Could not open output file, \"%s\"\n", out_path);
            die(1);
        }

        /* Output only the number of hits? */
        if(show_hit_count)
        {
            fprintf(out_file, "{HITS: %d}\n", hit_count);
        }

        /* Determine the max number of hits to output */
        hit_count = MIN(hit_count, max_hits_to_output);

        /* For each search hit, print the hit information to the output file */
        for(i = 0; i < hit_count; i++)
        {
            /* Did the user specify a particular hit index to output? */
            if(hit_to_output >= 0)
            {
                i = hit_to_output;
            }

            /* Output the hit number */
            if(print_hit_number && (hit_count > 1) && (hit_to_output == -1))
            {
                fprintf(out_file, "{ENTRY: %d}\n", i);
            }

            /* Print the heading of the hit to file */
            if(print_heading)
            {
                /* Seek to the heading */
                error_code = eb_seek_text(&book, &(hits[i].heading));

                if(error_code != EB_SUCCESS)
                {
                    fprintf(stderr, "Error: Failed to seek the subbook, %s\n", eb_error_message(error_code));
                    fclose(out_file);
                    die(1);
                }

                /* Read the heading */
                error_code = eb_read_heading(&book, NULL, &hookset, NULL, MAXLEN_HEADING, heading, &heading_length);

                if(error_code != EB_SUCCESS)
                {
                    fprintf(stderr, "Error: Failed to read the subbook, %s\n", eb_error_message(error_code));
                    fclose(out_file);
                    die(1);
                }

                /* Convert from EUC-JP to UTF-8 */
                status_conv = convert_encoding(conv_buf, MAXLEN_CONV, heading, heading_length, "UTF-8", "EUC-JP");

                if(status_conv == NULL)
                {
                    fprintf(stderr, "Error: Something went wrong when trying to encode the the heading\n");
                    fclose(out_file);
                    die(1);
                }

                /* Replace gaiji that have UTF-8 equivalents */
                replace_gaiji_with_utf8(final_buf, conv_buf);

                /* Output the header to file (in UTF-8) */
                fprintf(out_file, "%s\n", conv_buf);
            }

            /* Print the text of the hit to file */
            if(print_text)
            {
                /* Seek to the text */
                error_code = eb_seek_text(&book, &(hits[i].text));

                if(error_code != EB_SUCCESS)
                {
                    fprintf(stderr, "Error: Failed to seek the subbook, %s\n", eb_error_message(error_code));
                    fclose(out_file);
                    die(1);
                }

                /* Read the text*/
                error_code = eb_read_text(&book, NULL, &hookset, NULL, MAXLEN_TEXT, text, &text_length);

                if(error_code != EB_SUCCESS)
                {
                    fprintf(stderr, "Error: Failed to read the subbook, %s\n", eb_error_message(error_code));
                    fclose(out_file);
                    die(1);
                }
            }

            /* Convert from EUC-JP to UTF-8 */
            status_conv = convert_encoding(conv_buf, MAXLEN_CONV, text, text_length, "UTF-8", "EUC-JP");

            if(status_conv == NULL)
            {
                fprintf(stderr, "Error: Something went wrong when trying to encode the the text\n");
                fclose(out_file);
                die(1);
            }

            /* Replace gaiji that have UTF-8 equivalents */
            replace_gaiji_with_utf8(final_buf, conv_buf);

            /* Output the text to file (in UTF-8) */
            fwrite(final_buf, 1, strlen(final_buf), out_file);

            /* Since the user specified a hit index, don't display the other hits */
            if(hit_to_output >= 0)
            {
                break;
            }
        }

        fclose(out_file);
    }

} /* lookup_word */
コード例 #11
0
ファイル: eplkup.c プロジェクト: Downfy/rikaisama
/*------------------------------------------------------------------------
-- Name: lookup_link
--
-- Description:
--   Lookup the input link and send the results to the output file.
--
-- Parameters:
--   None.
--
-- Returns:
--   None.
--
------------------------------------------------------------------------*/
static void lookup_link(void)
{
    EB_Error_Code error_code;
    EB_Position position;
    FILE *in_file = NULL;
    FILE *out_file = NULL;
    char link_text[MAXLEN_LOOKUP_WORD] = "";
    char *status_conv = NULL;
    int text_length;
    int parse_result;

    in_file = fopen(in_path, "r");

    if(in_file == NULL)
    {
        fprintf(stderr, "Error: Could not open input file: \"%s\"", in_path);
        die(1);
    }

    if(fgets(link_text, MAXLEN_LOOKUP_WORD, in_file) == NULL)
    {
        fclose(in_file);
        fprintf(stderr, "Error: Could not read word from input file: \"%s\"", in_path);
        die(1);
    }

    fclose(in_file);

    /* Parse the location of the link in the subbook */
    parse_result = sscanf(link_text, "%X %X", &position.page, &position.offset);

    /* If link was not parsed correctly (2 is the expected number of fields in the input file) */
    if(parse_result != 2)
    {
        fprintf(stderr, "Error: Could not parse link from input file, %d.\n", parse_result);
        die(1);
    }

    error_code = eb_seek_text(&book, &position);

    if(error_code != EB_SUCCESS)
    {
        fprintf(stderr, "Error: Failed to seek text, \"%s\"\n", eb_error_message(error_code));
        die(1);
    }

    error_code = eb_read_text(&book, NULL, &hookset, NULL, MAXLEN_TEXT, text, &text_length);

    if(error_code != EB_SUCCESS)
    {
        fprintf(stderr, "Error: Failed to read text, \"%s\"\n", eb_error_message(error_code));
        die(1);
    }

    /* Convert from EUC-JP to UTF-8 */
    status_conv = convert_encoding(conv_buf, MAXLEN_CONV, text, text_length, "UTF-8", "EUC-JP");

    if(status_conv == NULL)
    {
        fprintf(stderr, "Error: Something went wrong when trying to encode the the text\n");
        die(1);
    }

    /* Replace gaiji that have UTF-8 equivalents */
    replace_gaiji_with_utf8(final_buf, conv_buf);

    out_file = fopen(out_path, "w");

    /* Output the text to file (in UTF-8) */
    fwrite(final_buf, 1, strlen(final_buf), out_file);

    fclose(out_file);

} /* lookup_link */