void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_str *string; const gchar *normalized_text; guint normalized_text_len; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags); normalized_text = cut_take_strndup(string->norm, string->norm_blen); normalized_text_len = string->norm_blen; grn_test_assert(grn_str_close(&context, string)); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_int(strlen(encoded_expected), normalized_text_len); }
static gchar * get_next_line (GstSubParse * self) { char *line = NULL; const char *line_end; int line_len; gboolean have_r = FALSE; line_end = strchr (self->textbuf->str, '\n'); if (!line_end) { /* end-of-line not found; return for more data */ return NULL; } /* get rid of '\r' */ if (line_end != self->textbuf->str && *(line_end - 1) == '\r') { line_end--; have_r = TRUE; } line_len = line_end - self->textbuf->str; line = convert_encoding (self, self->textbuf->str, line_len); self->textbuf = g_string_erase (self->textbuf, 0, line_len + (have_r ? 2 : 1)); return line; }
void test_normalize_broken(gconstpointer data) { grn_obj *string; const gchar *input, *encoded_input; const gchar *normalized_text; grn_encoding input_encoding, context_encoding; gint input_length; guint normalized_text_length, normalized_text_n_characters; int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_string_open(&context, encoded_input, input_length, GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); cut_assert_equal_string("", normalized_text); cut_assert_equal_int(0, normalized_text_length); cut_assert_equal_int(0, normalized_text_n_characters); }
string GUILable::SetText(string str) { str = convert_encoding(str, "UTF-8", "CP1251"); text = str; curPos = 0; return str; }
void test_normalize(gconstpointer data) { const gchar *utf8_expected, *encoded_expected; const gchar *utf8_input, *encoded_input; grn_obj *string; const gchar *normalized_text; guint normalized_text_length; guint normalized_text_n_characters; int flags; grn_encoding encoding; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES; utf8_input = gcut_data_get_string(data, "input"); encoded_input = convert_encoding(utf8_input, encoding); string = grn_string_open(&context, encoded_input, strlen(encoded_input), GRN_NORMALIZER_AUTO, flags); grn_string_get_normalized(&context, string, &normalized_text, &normalized_text_length, &normalized_text_n_characters); normalized_text = cut_take_strndup(normalized_text, normalized_text_length); grn_obj_unlink(&context, string); utf8_expected = gcut_data_get_string(data, "expected"); encoded_expected = convert_encoding(utf8_expected, encoding); cut_assert_equal_string(encoded_expected, normalized_text); cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length); cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1), normalized_text_n_characters); }
stl::wstring towstring (const TextLine& line) { if (sizeof (wchar_t) == 4) return stl::wstring ((const wchar_t*)line.TextUtf32 (), line.TextLength ()); stl::wstring result; result.fast_resize (line.TextLength ()); const void* source = line.TextUtf32 (); size_t source_size = line.TextLength () * sizeof (unsigned int); void* destination = &result [0]; size_t destination_size = result.size () * sizeof (wchar_t); convert_encoding (common::Encoding_UTF32LE, source, source_size, common::Encoding_UTF16LE, destination, destination_size); return result; }
void test_charlen_broken(gconstpointer data) { const gchar *input, *encoded_input, *encoded_input_end; grn_encoding encoding; gint input_length; encoding = gcut_data_get_int(data, "encoding"); GRN_CTX_SET_ENCODING(&context, encoding); input = gcut_data_get_string(data, "input"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, encoding); if (input_length < 0) { input_length = strlen(encoded_input); } encoded_input_end = encoded_input + input_length; cut_assert_equal_uint(0, grn_charlen(&context, encoded_input, encoded_input_end)); }
/*------------------------------------------------------------------------ -- Name: print_title_to_out_file -- -- Description: -- Print the title of the book to the output file. -- -- Parameters: -- None. -- -- Returns: -- None. -- ------------------------------------------------------------------------*/ static void print_title_to_out_file(void) { EB_Error_Code error_code; char *status_conv = NULL; FILE *out_file = NULL; /* Get the title of the subbook */ error_code = eb_subbook_title2(&book, subbook_index, title); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to get the title: %s\n", eb_error_message(error_code)); die(1); } /* Convert title from EUC-JP to UTF-8 */ status_conv = convert_encoding(conv_buf, MAXLEN_CONV, title, EB_MAX_TITLE_LENGTH, "UTF-8", "EUC-JP"); if(status_conv == NULL) { fprintf(stderr, "Error: Something went wrong when trying to encode the title\n"); die(1); } out_file = fopen(out_path, "w"); if(out_file == NULL) { fprintf(stderr, "Error: Could not open output file, \"%s\"\n", out_path); die(1); } /* Output the text to file (in UTF-8) */ fwrite(conv_buf, 1, strlen(conv_buf), out_file); fclose(out_file); } /* print_title_to_out_file */
void test_normalize_broken(gconstpointer data) { grn_str *string; const gchar *input, *encoded_input; grn_encoding input_encoding, context_encoding; gint input_length; int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES; context_encoding = gcut_data_get_int(data, "context-encoding"); GRN_CTX_SET_ENCODING(&context, context_encoding); input = gcut_data_get_string(data, "input"); input_encoding = gcut_data_get_int(data, "input-encoding"); input_length = gcut_data_get_int(data, "input-length"); encoded_input = convert_encoding(input, input_encoding); if (input_length < 0) { input_length = strlen(encoded_input); } string = grn_str_open(&context, encoded_input, input_length, flags); cut_assert_equal_string("", string->norm); cut_assert_equal_int(0, string->norm_blen); grn_test_assert(grn_str_close(&context, string)); }
/*------------------------------------------------------------------------ -- Name: lookup_word -- -- Description: -- Lookup the input word and send the results to the output file. -- -- Parameters: -- None. -- -- Returns: -- None. -- ------------------------------------------------------------------------*/ static void lookup_word(void) { char lookup_word_utf8[MAXLEN_LOOKUP_WORD + 1]; char lookup_word_eucjp[MAXLEN_LOOKUP_WORD + 1]; char *status_conv = NULL; EB_Error_Code error_code; EB_Hit hits[MAX_HITS]; FILE *in_file = NULL; FILE *out_file = NULL; int hit_count; int heading_length; int text_length; int i; /* Get the word to lookup */ in_file = fopen(in_path, "r"); if(in_file == NULL) { fprintf(stderr, "Error: Could not open input file: \"%s\"", in_path); die(1); } if(fgets(lookup_word_utf8, MAXLEN_LOOKUP_WORD, in_file) == NULL) { fclose(in_file); fprintf(stderr, "Error: Could not read word from input file: \"%s\"", in_path); die(1); } fclose(in_file); /* Remove the final '\n' */ if(lookup_word_utf8[strlen(lookup_word_utf8) - 1] == '\n') { lookup_word_utf8[strlen(lookup_word_utf8) - 1] = '\0'; } /* Convert the lookup word from UTF-8 to EUC-JP */ status_conv = convert_encoding(lookup_word_eucjp, MAXLEN_LOOKUP_WORD, lookup_word_utf8, strlen(lookup_word_utf8), "EUC-JP", "UTF-8"); if(status_conv == NULL) { fprintf(stderr, "Error: Something went wront when trying to encode the lookup word\n"); die(1); } /* Perform an exact search of the lookup word */ error_code = eb_search_exactword(&book, lookup_word_eucjp); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to search for the word, %s: %s\n", eb_error_message(error_code), lookup_word_eucjp); die(1); } while(1) { /* Get the list of hits */ error_code = eb_hit_list(&book, MAX_HITS, hits, &hit_count); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to get hit entries, %s\n", eb_error_message(error_code)); die(1); } /* Are we done? */ if(hit_count == 0) { break; } /* Create the output file */ out_file = fopen(out_path, "w"); if(out_file == NULL) { fprintf(stderr, "Error: Could not open output file, \"%s\"\n", out_path); die(1); } /* Output only the number of hits? */ if(show_hit_count) { fprintf(out_file, "{HITS: %d}\n", hit_count); } /* Determine the max number of hits to output */ hit_count = MIN(hit_count, max_hits_to_output); /* For each search hit, print the hit information to the output file */ for(i = 0; i < hit_count; i++) { /* Did the user specify a particular hit index to output? */ if(hit_to_output >= 0) { i = hit_to_output; } /* Output the hit number */ if(print_hit_number && (hit_count > 1) && (hit_to_output == -1)) { fprintf(out_file, "{ENTRY: %d}\n", i); } /* Print the heading of the hit to file */ if(print_heading) { /* Seek to the heading */ error_code = eb_seek_text(&book, &(hits[i].heading)); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to seek the subbook, %s\n", eb_error_message(error_code)); fclose(out_file); die(1); } /* Read the heading */ error_code = eb_read_heading(&book, NULL, &hookset, NULL, MAXLEN_HEADING, heading, &heading_length); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to read the subbook, %s\n", eb_error_message(error_code)); fclose(out_file); die(1); } /* Convert from EUC-JP to UTF-8 */ status_conv = convert_encoding(conv_buf, MAXLEN_CONV, heading, heading_length, "UTF-8", "EUC-JP"); if(status_conv == NULL) { fprintf(stderr, "Error: Something went wrong when trying to encode the the heading\n"); fclose(out_file); die(1); } /* Replace gaiji that have UTF-8 equivalents */ replace_gaiji_with_utf8(final_buf, conv_buf); /* Output the header to file (in UTF-8) */ fprintf(out_file, "%s\n", conv_buf); } /* Print the text of the hit to file */ if(print_text) { /* Seek to the text */ error_code = eb_seek_text(&book, &(hits[i].text)); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to seek the subbook, %s\n", eb_error_message(error_code)); fclose(out_file); die(1); } /* Read the text*/ error_code = eb_read_text(&book, NULL, &hookset, NULL, MAXLEN_TEXT, text, &text_length); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to read the subbook, %s\n", eb_error_message(error_code)); fclose(out_file); die(1); } } /* Convert from EUC-JP to UTF-8 */ status_conv = convert_encoding(conv_buf, MAXLEN_CONV, text, text_length, "UTF-8", "EUC-JP"); if(status_conv == NULL) { fprintf(stderr, "Error: Something went wrong when trying to encode the the text\n"); fclose(out_file); die(1); } /* Replace gaiji that have UTF-8 equivalents */ replace_gaiji_with_utf8(final_buf, conv_buf); /* Output the text to file (in UTF-8) */ fwrite(final_buf, 1, strlen(final_buf), out_file); /* Since the user specified a hit index, don't display the other hits */ if(hit_to_output >= 0) { break; } } fclose(out_file); } } /* lookup_word */
/*------------------------------------------------------------------------ -- Name: lookup_link -- -- Description: -- Lookup the input link and send the results to the output file. -- -- Parameters: -- None. -- -- Returns: -- None. -- ------------------------------------------------------------------------*/ static void lookup_link(void) { EB_Error_Code error_code; EB_Position position; FILE *in_file = NULL; FILE *out_file = NULL; char link_text[MAXLEN_LOOKUP_WORD] = ""; char *status_conv = NULL; int text_length; int parse_result; in_file = fopen(in_path, "r"); if(in_file == NULL) { fprintf(stderr, "Error: Could not open input file: \"%s\"", in_path); die(1); } if(fgets(link_text, MAXLEN_LOOKUP_WORD, in_file) == NULL) { fclose(in_file); fprintf(stderr, "Error: Could not read word from input file: \"%s\"", in_path); die(1); } fclose(in_file); /* Parse the location of the link in the subbook */ parse_result = sscanf(link_text, "%X %X", &position.page, &position.offset); /* If link was not parsed correctly (2 is the expected number of fields in the input file) */ if(parse_result != 2) { fprintf(stderr, "Error: Could not parse link from input file, %d.\n", parse_result); die(1); } error_code = eb_seek_text(&book, &position); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to seek text, \"%s\"\n", eb_error_message(error_code)); die(1); } error_code = eb_read_text(&book, NULL, &hookset, NULL, MAXLEN_TEXT, text, &text_length); if(error_code != EB_SUCCESS) { fprintf(stderr, "Error: Failed to read text, \"%s\"\n", eb_error_message(error_code)); die(1); } /* Convert from EUC-JP to UTF-8 */ status_conv = convert_encoding(conv_buf, MAXLEN_CONV, text, text_length, "UTF-8", "EUC-JP"); if(status_conv == NULL) { fprintf(stderr, "Error: Something went wrong when trying to encode the the text\n"); die(1); } /* Replace gaiji that have UTF-8 equivalents */ replace_gaiji_with_utf8(final_buf, conv_buf); out_file = fopen(out_path, "w"); /* Output the text to file (in UTF-8) */ fwrite(final_buf, 1, strlen(final_buf), out_file); fclose(out_file); } /* lookup_link */