/* convert file using UNIX98 iconv functions returns 0 on success, nonzero error code otherwise when iconv implementation is not transitive (ICONV_TRANSITIVE is not defined), it may help to perform conversion via Unicode, so we try it too (probably UCS-2/ISO-10646, but maybe UTF-8---whatever has been detected at configure time) */ int convert_iconv(File *file, EncaEncoding from_enc) { static int ascii = ENCA_CS_UNKNOWN; File *tempfile = NULL; int err; iconv_t icd; if (!enca_charset_is_known(ascii)) { ascii = enca_name_to_charset("ascii"); assert(enca_charset_is_known(ascii)); } /* When iconv doesn't know the encodings, it can't convert between them. * We also don't try conversion to ASCII, it can only damage the files and * upset users, nothing else. * And fail early on really silly surfaces. */ if (!enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV) || (enca_charset_is_known(options.target_enc.charset) && !enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ICONV)) || options.target_enc.charset == ascii || !acceptable_surface(from_enc) || !acceptable_surface(options.target_enc)) return ERR_CANNOT; /* Is the conversion possible? */ if (do_iconv_open(from_enc, options.target_enc, &icd) != 0) return ERR_CANNOT; /* Since iconv doesn't recode files in place, we make a temporary file and copy contents of file fname to it. save the current content first, then copy the rest. When the file is stdin, fake-reopen it to stdout. */ err = ERR_IOFAIL; if ((tempfile = file_temporary(file->buffer, 1)) && file_write(tempfile) != -1 && copy_and_convert(file, tempfile, NULL) == 0 && (!file->name || file_seek(file, 0, SEEK_SET) == 0) && file_seek(tempfile, 0, SEEK_SET) == 0 && (!file->name || file_truncate(file, 0) == 0) && (file->name || (file_close(file) == 0 && file_open(file, "wb") == 0))) { /* Create the second buffer when we don't have any yet but don't make it unnecessarily large, system default suffices */ if (!buffer_iconv) buffer_iconv = buffer_new(0); tempfile->buffer = buffer_iconv; err = iconv_one_step(tempfile, file, icd); } file_free(tempfile); do_iconv_close(icd); return err; }
char *PRXCOL::Decode(PGLOBAL g, const char *cnm) { char *buf= (char*)PlugSubAlloc(g, NULL, strlen(cnm) + 1); uint dummy_errors; uint32 len= copy_and_convert(buf, strlen(cnm) + 1, &my_charset_latin1, cnm, strlen(cnm), &my_charset_utf8_general_ci, &dummy_errors); buf[len]= '\0'; return buf; } // end of Decode
/* convert file using GNU recode library returns 0 on success, nonzero error code otherwise */ int convert_recode(File *file, EncaEncoding from_enc) { RECODE_REQUEST request; RECODE_TASK task; File *tempfile = NULL; bool success; const char *encreq; /* Allocate librecode outer if we are called first time. */ if (outer == NULL) { if ((outer = recode_new_outer(false)) == NULL) { fprintf(stderr, "%s: recode library doesn't like us\n", program_name); return ERR_LIBCOM; } } /* Construct recode request string, try to mimic surfaceless converter now. */ { EncaEncoding enc; enc.charset = from_enc.charset; enc.surface = from_enc.surface | ENCA_SURFACE_REMOVE; encreq = format_request_string(enc, options.target_enc, ENCA_SURFACE_EOL_LF); } /* Create a recode request from it. */ request = get_recode_request(encreq); if (request == NULL) return ERR_CANNOT; /* Now we have to distinguish between file and stdin, namely because * in case of stdin, it's first part is already loaded in the buffer. */ if (file->name != NULL) { /* File is a regular file. Since recode doesn't recode files in place, we make a temporary file and copy contents of file fname to it. */ if (file_seek(file, 0, SEEK_SET) != 0) return ERR_IOFAIL; file->buffer->pos = 0; if ((tempfile = file_temporary(file->buffer, 1)) == NULL || copy_and_convert(file, tempfile, NULL) != 0 || file_seek(file, 0, SEEK_SET) != 0 || file_seek(tempfile, 0, SEEK_SET) != 0 || file_truncate(file, 0) != 0) { file_free(tempfile); return ERR_IOFAIL; } /* Create a task from the request. */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = tempfile->stream; task->output.name = NULL; task->output.file = file->stream; /* Now run conversion temporary file -> original. */ success = recode_perform_task(task); /* If conversion wasn't successfull, original file is probably damaged (damned librecode!) try to restore it from the temporary copy. */ if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged file `%s'. " "Trying to recover... ", program_name, file->name); tempfile->buffer->pos = 0; if (file_seek(tempfile, 0, SEEK_SET) != -1 && file_seek(file, 0, SEEK_SET) != -1 && file_truncate(file, file->size) == 0 && copy_and_convert(tempfile, file, NULL) == 0) fprintf(stderr, "succeeded.\n"); else fprintf(stderr, "failed\n"); } else print_recode_warning(task->error_so_far, file->name); } recode_delete_task(task); file_free(tempfile); } else { /* File is stdin. First recode begining saved in io_buffer, then append rest of stdin. */ enum recode_error errmax = RECODE_NO_ERROR; /* Create a task from the request. * Set it up for buffer -> stdout conversion */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = NULL; task->input.buffer = (char*)file->buffer->data; task->input.cursor = (char*)file->buffer->data; task->input.limit = (char*)file->buffer->data + file->buffer->pos; task->output.name = NULL; task->output.file = stdout; success = recode_perform_task(task); if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged `%s'. " "No way to recover in a pipe.\n", program_name, ffname_r(NULL)); recode_delete_task(task); return ERR_IOFAIL; } else errmax = task->error_so_far; } recode_delete_task(task); /* Create a task from the request. * Set it up for stdin -> stdout conversion */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = stdin; task->output.name = NULL; task->output.file = stdout; success = recode_perform_task(task); if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged `%s'. " "No way to recover in a pipe.\n", program_name, ffname_r(NULL)); recode_delete_task(task); return ERR_IOFAIL; } else { if (errmax < task->error_so_far) errmax = task->error_so_far; } } if (errmax >= enca_recode_fail_level) print_recode_warning(errmax, ffname_r(NULL)); recode_delete_task(task); } /* return ERR_IOFAIL on failure since it means file-related problems */ return success ? ERR_OK : ERR_IOFAIL; }
/* perform one conversion step using conversion descriptor icd reading for file_from and putting result to file_to */ static int iconv_one_step(File *file_from, File *file_to, iconv_t icd) { size_t size_from, size_to, n; char *p_from, *p_to; int hit_eof; /* convert */ do { /* read to io_buffer */ if (file_read(file_from) == -1) return ERR_IOFAIL; p_from = (char*)file_from->buffer->data; size_from = file_from->buffer->pos; hit_eof = (ssize_t)file_from->buffer->size > file_from->buffer->pos; /* convert without reading more data until io_buffer is exhausted or some error occurs */ do { p_to = (char*)file_to->buffer->data; size_to = file_to->buffer->size; n = iconv(icd, (ICONV_CONST char**)&p_from, &size_from, &p_to, &size_to); file_to->buffer->pos = file_to->buffer->size - size_to; if (n != (size_t)-1 || errno != E2BIG) break; if (file_write(file_to) == -1) return ERR_IOFAIL; } while (1); if (n == (size_t)-1) { /* EINVAL means some multibyte sequence has been splitted---that's ok, move it to the begining and go on */ if (errno == EINVAL && !hit_eof) { memmove(file_from->buffer->data, p_from, size_from); file_from->buffer->pos = size_from; } else { /* but other errors are critical, conversion and try to recover */ fprintf(stderr, "%s: Iconv conversion error on `%s': %s\n", program_name, ffname_r(file_from->name), strerror(errno)); if (file_from->name && file_to->name) { Buffer *buf; int err; /* regular file */ fprintf(stderr, "Trying to recover... "); if (file_seek(file_from, 0, SEEK_SET) != 0 || file_seek(file_to, 0, SEEK_SET) != 0 || file_truncate(file_to, file_to->size) != 0) { fprintf(stderr, "failed\n"); return ERR_IOFAIL; } file_from->buffer->pos = 0; buf = file_to->buffer; file_to->buffer = file_from->buffer; err = copy_and_convert(file_from, file_to, NULL); file_to->buffer = buf; if (err != 0) { fprintf(stderr, "failed\n"); return ERR_IOFAIL; } fprintf(stderr, "succeeded.\n"); } else { fprintf(stderr, "No way to recover in a pipe.\n"); return ERR_IOFAIL; } return ERR_MALFORM; } } else file_from->buffer->pos = 0; /* write the remainder */ if (file_write(file_to) == -1) return ERR_IOFAIL; } while (!hit_eof); /* file might end with an unfinished multibyte sequence */ if (size_from > 0) { fprintf(stderr, "%s: File `%s' seems to be truncated, " "the trailing incomplete multibyte sequence " "has been lost\n", program_name, ffname_r(file_from->name)); return ERR_MALFORM; } return ERR_OK; }
/* process file named fname this is the `boss' function returns 0 on succes, 1 on failure, 2 on troubles */ static int process_file(EncaAnalyser an, const char *fname) { static int utf8 = ENCA_CS_UNKNOWN; static Buffer *buffer = NULL; /* persistent i/o buffer */ int ot_is_convert = (options.output_type == OTYPE_CONVERT); EncaEncoding result; /* the guessed encoding */ File *file; /* the processed file */ if (!an) { buffer_free(buffer); return 0; } /* Initialize when we are called the first time. */ if (buffer == NULL) buffer = buffer_new(buffer_size); if (!enca_charset_is_known(utf8)) { utf8 = enca_name_to_charset("utf8"); assert(enca_charset_is_known(utf8)); } /* Read sample. */ file = file_new(fname, buffer); if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) { file_free(file); return EXIT_TROUBLE; } if (file_read(file) == -1) { file_free(file); return EXIT_TROUBLE; } if (!ot_is_convert) file_close(file); /* Guess encoding. */ dwim_libenca_options(an, file); if (ot_is_convert) result = enca_analyse_const(an, buffer->data, buffer->pos); else result = enca_analyse(an, buffer->data, buffer->pos); /* Is conversion required? */ if (ot_is_convert) { int err = 0; if (enca_charset_is_known(result.charset)) err = convert(file, result); else { if (enca_errno(an) != ENCA_EEMPTY) { fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n", program_name, ffname_r(file->name)); } /* Copy stdin to stdout unchanged. */ if (file->name == NULL) err = copy_and_convert(file, file, NULL); } file_free(file); if ((err == ERR_OK && !enca_charset_is_known(result.charset) && enca_errno(an) != ENCA_EEMPTY) || err == ERR_CANNOT) return 1; return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE; } /* Print results. */ print_results(file->name, an, result, enca_errno(an)); if (result.charset == utf8) double_utf8_chk(an, buffer->data, buffer->pos); file_free(file); return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE; }
/* fork and the child executes Settings.Convertor on fname create temporary file containing stdin when fname == NULL and convert it passing special option STDOUT to convertor (that is assumed to delete the temporary file itself) from_enc, to_enc are encoding names as should be passed to convertor returns 0 on success, nonzero on failure; on critical failure (like we cannot fork()) it simply aborts */ int convert_external(File *file, const EncaEncoding from_enc) { /* special fourth parameter passed to external convertor to instruct it to send result to stdout */ static const char *STDOUT_CONV = "-"; pid_t pid; int status; File *tempfile = NULL; char *from_name, *target_name; if (*extern_convertor == '\0') { fprintf(stderr, "%s: No external convertor defined!\n", program_name); return ERR_CANNOT; } if (options.verbosity_level > 2) fprintf(stderr, " launching `%s' to convert `%s'\n", extern_convertor, ffname_r(file->name)); /* Is conversion of stdin requested? */ if (file->name == NULL) { /* Then we have to copy it to a temporary file. */ tempfile = file_temporary(file->buffer, 0); if (tempfile == NULL) return ERR_IOFAIL; if (copy_and_convert(file, tempfile, NULL) != 0) { file_unlink(tempfile->name); file_free(tempfile); return ERR_IOFAIL; } } /* Construct the charset names before fork() */ from_name = enca_strconcat(enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(from_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); if (enca_charset_is_known(options.target_enc.charset) && (options.target_enc.surface & ENCA_SURFACE_UNKNOWN) == 0) { target_name = enca_strconcat(enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(options.target_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); } else target_name = enca_strdup(options.target_enc_str); /* Fork. */ pid = vfork(); if (pid == 0) { /* Child. */ if (tempfile) execlp(extern_convertor, extern_convertor, from_name, target_name, tempfile->name, STDOUT_CONV, NULL); else execlp(extern_convertor, extern_convertor, from_name, target_name, file->name, NULL); exit(ERR_EXEC); } /* Parent. */ if (pid == -1) { fprintf(stderr, "%s: Cannot fork() to execute convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } /* Wait until the child returns. */ if (waitpid(pid, &status, 0) == -1) { /* Error. */ fprintf(stderr, "%s: wait_pid() error while waiting for convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } if (!WIFEXITED(status)) { /* Child exited abnormally. */ fprintf(stderr, "%s: Child convertor process has been murdered.\n", program_name); exit(EXIT_TROUBLE); } enca_free(from_name); enca_free(target_name); if (tempfile) { unlink(tempfile->name); file_free(tempfile); } /* Child exited normally, test exit status. */ if (WEXITSTATUS(status) != EXIT_SUCCESS) { /* This means child was unable to execute convertor or convertor failed. */ fprintf(stderr, "%s: External convertor failed (error code %d)\n", program_name, WEXITSTATUS(status)); if (WEXITSTATUS(status) == ERR_EXEC) return ERR_EXEC; else return ERR_CANNOT; } /* Success! Wow! */ return ERR_OK; }