/* convert file using UNIX98 iconv functions returns 0 on success, nonzero error code otherwise when iconv implementation is not transitive (ICONV_TRANSITIVE is not defined), it may help to perform conversion via Unicode, so we try it too (probably UCS-2/ISO-10646, but maybe UTF-8---whatever has been detected at configure time) */ int convert_iconv(File *file, EncaEncoding from_enc) { static int ascii = ENCA_CS_UNKNOWN; File *tempfile = NULL; int err; iconv_t icd; if (!enca_charset_is_known(ascii)) { ascii = enca_name_to_charset("ascii"); assert(enca_charset_is_known(ascii)); } /* When iconv doesn't know the encodings, it can't convert between them. * We also don't try conversion to ASCII, it can only damage the files and * upset users, nothing else. * And fail early on really silly surfaces. */ if (!enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV) || (enca_charset_is_known(options.target_enc.charset) && !enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ICONV)) || options.target_enc.charset == ascii || !acceptable_surface(from_enc) || !acceptable_surface(options.target_enc)) return ERR_CANNOT; /* Is the conversion possible? */ if (do_iconv_open(from_enc, options.target_enc, &icd) != 0) return ERR_CANNOT; /* Since iconv doesn't recode files in place, we make a temporary file and copy contents of file fname to it. save the current content first, then copy the rest. When the file is stdin, fake-reopen it to stdout. */ err = ERR_IOFAIL; if ((tempfile = file_temporary(file->buffer, 1)) && file_write(tempfile) != -1 && copy_and_convert(file, tempfile, NULL) == 0 && (!file->name || file_seek(file, 0, SEEK_SET) == 0) && file_seek(tempfile, 0, SEEK_SET) == 0 && (!file->name || file_truncate(file, 0) == 0) && (file->name || (file_close(file) == 0 && file_open(file, "wb") == 0))) { /* Create the second buffer when we don't have any yet but don't make it unnecessarily large, system default suffices */ if (!buffer_iconv) buffer_iconv = buffer_new(0); tempfile->buffer = buffer_iconv; err = iconv_one_step(tempfile, file, icd); } file_free(tempfile); do_iconv_close(icd); return err; }
/* try to ask for conversion from from_enc to to_enc returns 0 on success, nonzero on failure on fatal error simply aborts program */ static int do_iconv_open(EncaEncoding from_enc, EncaEncoding to_enc, iconv_t *icd) { const char *to_name, *from_name; if (!enca_charset_is_known(to_enc.charset)) to_name = options.target_enc_str; else to_name = enca_charset_name(to_enc.charset, ENCA_NAME_STYLE_ICONV); from_name = enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV); assert(from_name != NULL); assert(to_name != NULL); /* Iconv_open() paramters has reverse order than we use. */ *icd = iconv_open(to_name, from_name); if (*icd != (iconv_t)-1) return 0; /* Failure, EINVAL means this conversion is not possible. */ if (errno == EINVAL) return ERR_CANNOT; /* But otherwise we are in deep trouble, we've got out of memory or file descriptors. */ fprintf(stderr, "%s: Aborting: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); return 0; }
/* process file named fname this is the `boss' function returns 0 on succes, 1 on failure, 2 on troubles */ static int process_file(EncaAnalyser an, const char *fname) { static int utf8 = ENCA_CS_UNKNOWN; static Buffer *buffer = NULL; /* persistent i/o buffer */ int ot_is_convert = (options.output_type == OTYPE_CONVERT); EncaEncoding result; /* the guessed encoding */ File *file; /* the processed file */ if (!an) { buffer_free(buffer); return 0; } /* Initialize when we are called the first time. */ if (buffer == NULL) buffer = buffer_new(buffer_size); if (!enca_charset_is_known(utf8)) { utf8 = enca_name_to_charset("utf8"); assert(enca_charset_is_known(utf8)); } /* Read sample. */ file = file_new(fname, buffer); if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) { file_free(file); return EXIT_TROUBLE; } if (file_read(file) == -1) { file_free(file); return EXIT_TROUBLE; } if (!ot_is_convert) file_close(file); /* Guess encoding. */ dwim_libenca_options(an, file); if (ot_is_convert) result = enca_analyse_const(an, buffer->data, buffer->pos); else result = enca_analyse(an, buffer->data, buffer->pos); /* Is conversion required? */ if (ot_is_convert) { int err = 0; if (enca_charset_is_known(result.charset)) err = convert(file, result); else { if (enca_errno(an) != ENCA_EEMPTY) { fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n", program_name, ffname_r(file->name)); } /* Copy stdin to stdout unchanged. */ if (file->name == NULL) err = copy_and_convert(file, file, NULL); } file_free(file); if ((err == ERR_OK && !enca_charset_is_known(result.charset) && enca_errno(an) != ENCA_EEMPTY) || err == ERR_CANNOT) return 1; return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE; } /* Print results. */ print_results(file->name, an, result, enca_errno(an)); if (result.charset == utf8) double_utf8_chk(an, buffer->data, buffer->pos); file_free(file); return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE; }
/* fork and the child executes Settings.Convertor on fname create temporary file containing stdin when fname == NULL and convert it passing special option STDOUT to convertor (that is assumed to delete the temporary file itself) from_enc, to_enc are encoding names as should be passed to convertor returns 0 on success, nonzero on failure; on critical failure (like we cannot fork()) it simply aborts */ int convert_external(File *file, const EncaEncoding from_enc) { /* special fourth parameter passed to external convertor to instruct it to send result to stdout */ static const char *STDOUT_CONV = "-"; pid_t pid; int status; File *tempfile = NULL; char *from_name, *target_name; if (*extern_convertor == '\0') { fprintf(stderr, "%s: No external convertor defined!\n", program_name); return ERR_CANNOT; } if (options.verbosity_level > 2) fprintf(stderr, " launching `%s' to convert `%s'\n", extern_convertor, ffname_r(file->name)); /* Is conversion of stdin requested? */ if (file->name == NULL) { /* Then we have to copy it to a temporary file. */ tempfile = file_temporary(file->buffer, 0); if (tempfile == NULL) return ERR_IOFAIL; if (copy_and_convert(file, tempfile, NULL) != 0) { file_unlink(tempfile->name); file_free(tempfile); return ERR_IOFAIL; } } /* Construct the charset names before fork() */ from_name = enca_strconcat(enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(from_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); if (enca_charset_is_known(options.target_enc.charset) && (options.target_enc.surface & ENCA_SURFACE_UNKNOWN) == 0) { target_name = enca_strconcat(enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(options.target_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); } else target_name = enca_strdup(options.target_enc_str); /* Fork. */ pid = vfork(); if (pid == 0) { /* Child. */ if (tempfile) execlp(extern_convertor, extern_convertor, from_name, target_name, tempfile->name, STDOUT_CONV, NULL); else execlp(extern_convertor, extern_convertor, from_name, target_name, file->name, NULL); exit(ERR_EXEC); } /* Parent. */ if (pid == -1) { fprintf(stderr, "%s: Cannot fork() to execute convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } /* Wait until the child returns. */ if (waitpid(pid, &status, 0) == -1) { /* Error. */ fprintf(stderr, "%s: wait_pid() error while waiting for convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } if (!WIFEXITED(status)) { /* Child exited abnormally. */ fprintf(stderr, "%s: Child convertor process has been murdered.\n", program_name); exit(EXIT_TROUBLE); } enca_free(from_name); enca_free(target_name); if (tempfile) { unlink(tempfile->name); file_free(tempfile); } /* Child exited normally, test exit status. */ if (WEXITSTATUS(status) != EXIT_SUCCESS) { /* This means child was unable to execute convertor or convertor failed. */ fprintf(stderr, "%s: External convertor failed (error code %d)\n", program_name, WEXITSTATUS(status)); if (WEXITSTATUS(status) == ERR_EXEC) return ERR_EXEC; else return ERR_CANNOT; } /* Success! Wow! */ return ERR_OK; }