Exemplo n.º 1
0
/* convert file using GNU recode library
   returns 0 on success, nonzero error code otherwise */
int
convert_recode(File *file,
               EncaEncoding from_enc)
{
  RECODE_REQUEST request;
  RECODE_TASK task;
  File *tempfile = NULL;
  bool success;
  const char *encreq;

  /* Allocate librecode outer if we are called first time. */
  if (outer == NULL) {
    if ((outer = recode_new_outer(false)) == NULL) {
      fprintf(stderr, "%s: recode library doesn't like us\n",
                      program_name);
      return ERR_LIBCOM;
    }
  }

  /* Construct recode request string,
     try to mimic surfaceless converter now. */
  {
    EncaEncoding enc;

    enc.charset = from_enc.charset;
    enc.surface = from_enc.surface | ENCA_SURFACE_REMOVE;
    encreq = format_request_string(enc, options.target_enc,
                                   ENCA_SURFACE_EOL_LF);
  }
  /* Create a recode request from it. */
  request = get_recode_request(encreq);
  if (request == NULL)
    return ERR_CANNOT;

  /* Now we have to distinguish between file and stdin, namely because
   * in case of stdin, it's first part is already loaded in the buffer. */
  if (file->name != NULL) {
    /* File is a regular file.
       Since recode doesn't recode files in place, we make a temporary file
       and copy contents of file fname to it. */
    if (file_seek(file, 0, SEEK_SET) != 0)
      return ERR_IOFAIL;
    file->buffer->pos = 0;

    if ((tempfile = file_temporary(file->buffer, 1)) == NULL
        || copy_and_convert(file, tempfile, NULL) != 0
        || file_seek(file, 0, SEEK_SET) != 0
        || file_seek(tempfile, 0, SEEK_SET) != 0
        || file_truncate(file, 0) != 0) {
      file_free(tempfile);
      return ERR_IOFAIL;
    }

    /* Create a task from the request. */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = tempfile->stream;
    task->output.name = NULL;
    task->output.file = file->stream;

    /* Now run conversion temporary file -> original. */
    success = recode_perform_task(task);

    /* If conversion wasn't successfull, original file is probably damaged
       (damned librecode!) try to restore it from the temporary copy. */
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged file `%s'. "
                        "Trying to recover... ",
                        program_name,
                        file->name);
        tempfile->buffer->pos = 0;
        if (file_seek(tempfile, 0, SEEK_SET) != -1
            && file_seek(file, 0, SEEK_SET) != -1
            && file_truncate(file, file->size) == 0
            && copy_and_convert(tempfile, file, NULL) == 0)
          fprintf(stderr, "succeeded.\n");
        else
          fprintf(stderr, "failed\n");
      }
      else
        print_recode_warning(task->error_so_far, file->name);
    }

    recode_delete_task(task);
    file_free(tempfile);
  }
  else {
    /* File is stdin.
       First recode begining saved in io_buffer, then append rest of stdin. */
    enum recode_error errmax = RECODE_NO_ERROR;

    /* Create a task from the request.
     * Set it up for buffer -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = NULL;
    task->input.buffer = (char*)file->buffer->data;
    task->input.cursor = (char*)file->buffer->data;
    task->input.limit = (char*)file->buffer->data + file->buffer->pos;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else
        errmax = task->error_so_far;
    }
    recode_delete_task(task);

    /* Create a task from the request.
     * Set it up for stdin -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = stdin;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else {
        if (errmax < task->error_so_far)
          errmax = task->error_so_far;
      }
    }
    if (errmax >= enca_recode_fail_level)
      print_recode_warning(errmax, ffname_r(NULL));

    recode_delete_task(task);
  }

  /* return ERR_IOFAIL on failure since it means file-related problems */
  return success ? ERR_OK : ERR_IOFAIL;
}
Exemplo n.º 2
0
/* perform one conversion step using conversion descriptor icd
   reading for file_from and putting result to file_to */
static int
iconv_one_step(File *file_from,
               File *file_to,
               iconv_t icd)
{
  size_t size_from, size_to, n;
  char *p_from, *p_to;
  int hit_eof;

  /* convert */
  do {
    /* read to io_buffer */
    if (file_read(file_from) == -1)
      return ERR_IOFAIL;

    p_from = (char*)file_from->buffer->data;
    size_from = file_from->buffer->pos;
    hit_eof = (ssize_t)file_from->buffer->size > file_from->buffer->pos;
    /* convert without reading more data until io_buffer is exhausted or some
       error occurs */
    do {
      p_to = (char*)file_to->buffer->data;
      size_to = file_to->buffer->size;
      n = iconv(icd,
                (ICONV_CONST char**)&p_from, &size_from,
                &p_to, &size_to);
      file_to->buffer->pos = file_to->buffer->size - size_to;
      if (n != (size_t)-1 || errno != E2BIG)
        break;

      if (file_write(file_to) == -1)
        return ERR_IOFAIL;

    } while (1);

    if (n == (size_t)-1) {
      /* EINVAL means some multibyte sequence has been splitted---that's ok,
         move it to the begining and go on */
      if (errno == EINVAL && !hit_eof) {
        memmove(file_from->buffer->data, p_from, size_from);
        file_from->buffer->pos = size_from;
      }
      else {
        /* but other errors are critical, conversion and try to recover  */
        fprintf(stderr, "%s: Iconv conversion error on `%s': %s\n",
                        program_name,
                        ffname_r(file_from->name),
                        strerror(errno));
        if (file_from->name && file_to->name) {
          Buffer *buf;
          int err;

          /* regular file */
          fprintf(stderr, "Trying to recover... ");
          if (file_seek(file_from, 0, SEEK_SET) != 0
              || file_seek(file_to, 0, SEEK_SET) != 0
              || file_truncate(file_to, file_to->size) != 0) {
            fprintf(stderr, "failed\n");
            return ERR_IOFAIL;
          }
          file_from->buffer->pos = 0;
          buf = file_to->buffer;
          file_to->buffer = file_from->buffer;
          err = copy_and_convert(file_from, file_to, NULL);
          file_to->buffer = buf;

          if (err != 0) {
            fprintf(stderr, "failed\n");
            return ERR_IOFAIL;
          }
          fprintf(stderr, "succeeded.\n");
        }
        else {
          fprintf(stderr, "No way to recover in a pipe.\n");
          return ERR_IOFAIL;
        }

        return ERR_MALFORM;
      }
    }
    else file_from->buffer->pos = 0;

    /* write the remainder */
    if (file_write(file_to) == -1)
      return ERR_IOFAIL;

  } while (!hit_eof);

  /* file might end with an unfinished multibyte sequence */
  if (size_from > 0) {
    fprintf(stderr, "%s: File `%s' seems to be truncated, "
                    "the trailing incomplete multibyte sequence "
                    "has been lost\n",
                    program_name,
                    ffname_r(file_from->name));
    return ERR_MALFORM;
  }

  return ERR_OK;
}
Exemplo n.º 3
0
/* process file named fname
   this is the `boss' function
   returns 0 on succes, 1 on failure, 2 on troubles */
static int
process_file(EncaAnalyser an,
             const char *fname)
{
  static int utf8 = ENCA_CS_UNKNOWN;
  static Buffer *buffer = NULL; /* persistent i/o buffer */
  int ot_is_convert = (options.output_type == OTYPE_CONVERT);

  EncaEncoding result; /* the guessed encoding */
  File *file; /* the processed file */

  if (!an) {
    buffer_free(buffer);
    return 0;
  }

  /* Initialize when we are called the first time. */
  if (buffer == NULL)
    buffer = buffer_new(buffer_size);

  if (!enca_charset_is_known(utf8)) {
    utf8 = enca_name_to_charset("utf8");
    assert(enca_charset_is_known(utf8));
  }

  /* Read sample. */
  file = file_new(fname, buffer);
  if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (file_read(file) == -1) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (!ot_is_convert)
    file_close(file);

  /* Guess encoding. */
  dwim_libenca_options(an, file);
  if (ot_is_convert)
    result = enca_analyse_const(an, buffer->data, buffer->pos);
  else
    result = enca_analyse(an, buffer->data, buffer->pos);

  /* Is conversion required? */
  if (ot_is_convert) {
    int err = 0;

    if (enca_charset_is_known(result.charset))
      err = convert(file, result);
    else {
      if (enca_errno(an) != ENCA_EEMPTY) {
        fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n",
                        program_name,
                        ffname_r(file->name));
      }
      /* Copy stdin to stdout unchanged. */
      if (file->name == NULL)
        err = copy_and_convert(file, file, NULL);
    }

    file_free(file);
    if ((err == ERR_OK && !enca_charset_is_known(result.charset)
         && enca_errno(an) != ENCA_EEMPTY)
        || err == ERR_CANNOT)
      return 1;

    return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE;
  }

  /* Print results. */
  print_results(file->name, an, result, enca_errno(an));
  if (result.charset == utf8)
    double_utf8_chk(an, buffer->data, buffer->pos);

  file_free(file);

  return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE;
}
Exemplo n.º 4
0
/**
 * Prints results.
 **/
static void
print_results(const char *fname,
              EncaAnalyser an,
              EncaEncoding result,
              int gerrno)
{
  char *s;
  EncaSurface surf = result.surface
                     & ~enca_charset_natural_surface(result.charset);

  if (options.prefix_filename)
    printf("%s: ", ffname_r(fname));

  switch (options.output_type) {
    case OTYPE_ALIASES:
    print_aliases(result.charset);
    break;

    case OTYPE_CANON:
    if (surf) {
      s = enca_get_surface_name(surf, ENCA_NAME_STYLE_ENCA);
      fputs(enca_charset_name(result.charset, ENCA_NAME_STYLE_ENCA), stdout);
      puts(s);
      enca_free(s);
    }
    else
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_ENCA));
    break;

    case OTYPE_HUMAN:
    case OTYPE_DETAILS:
    if (surf) {
      s = enca_get_surface_name(surf, ENCA_NAME_STYLE_HUMAN);
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_HUMAN));
      indent_surface(s);
      enca_free(s);
    }
    else
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_HUMAN));
    break;

    case OTYPE_RFC1345:
    puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_RFC1345));
    break;

    case OTYPE_CS2CS:
    if (enca_charset_name(result.charset, ENCA_NAME_STYLE_CSTOCS) != NULL)
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_CSTOCS));
    else
      puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_CSTOCS));
    break;

    case OTYPE_ICONV:
    if (enca_charset_name(result.charset, ENCA_NAME_STYLE_ICONV) != NULL)
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_ICONV));
    else
      puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_ICONV));
    break;

    case OTYPE_MIME:
    if (enca_charset_name(result.charset, ENCA_NAME_STYLE_MIME) != NULL)
      puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_MIME));
    else
      puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_MIME));
    break;

    default:
    abort();
    break;
  }

  if (gerrno && options.output_type == OTYPE_DETAILS) {
    printf("  Failure reason: %s.\n", enca_strerror(an, gerrno));
  }
}
Exemplo n.º 5
0
/* fork and the child executes Settings.Convertor on fname
   create temporary file containing stdin when fname == NULL and convert it
   passing special option STDOUT to convertor (that is assumed to delete
   the temporary file itself)
   from_enc, to_enc are encoding names as should be passed to convertor
   returns 0 on success, nonzero on failure;
   on critical failure (like we cannot fork()) it simply aborts */
int
convert_external(File *file,
                 const EncaEncoding from_enc)
{
  /* special fourth parameter passed to external convertor to instruct it to
  send result to stdout */
  static const char *STDOUT_CONV = "-";

  pid_t pid;
  int status;
  File *tempfile = NULL;
  char *from_name, *target_name;

  if (*extern_convertor == '\0') {
    fprintf(stderr, "%s: No external convertor defined!\n", program_name);
    return ERR_CANNOT;
  }

  if (options.verbosity_level > 2)
    fprintf(stderr, "    launching `%s' to convert `%s'\n",
                    extern_convertor, ffname_r(file->name));

  /* Is conversion of stdin requested? */
  if (file->name == NULL) {
    /* Then we have to copy it to a temporary file. */
    tempfile = file_temporary(file->buffer, 0);
    if (tempfile == NULL)
      return ERR_IOFAIL;

    if (copy_and_convert(file, tempfile, NULL) != 0) {
      file_unlink(tempfile->name);
      file_free(tempfile);
      return ERR_IOFAIL;
    }
  }

  /* Construct the charset names before fork() */
  from_name = enca_strconcat(enca_charset_name(from_enc.charset,
                                               ENCA_NAME_STYLE_ENCA),
                             enca_get_surface_name(from_enc.surface,
                                                   ENCA_NAME_STYLE_ENCA),
                             NULL);
  if (enca_charset_is_known(options.target_enc.charset)
      && (options.target_enc.surface & ENCA_SURFACE_UNKNOWN) == 0) {
    target_name
      = enca_strconcat(enca_charset_name(options.target_enc.charset,
                                         ENCA_NAME_STYLE_ENCA),
                       enca_get_surface_name(options.target_enc.surface,
                                             ENCA_NAME_STYLE_ENCA),
                       NULL);
  }
  else
    target_name = enca_strdup(options.target_enc_str);

  /* Fork. */
  pid = vfork();
  if (pid == 0) {
    /* Child. */
    if (tempfile)
      execlp(extern_convertor, extern_convertor,
             from_name, target_name, tempfile->name,
             STDOUT_CONV, NULL);
    else
      execlp(extern_convertor, extern_convertor,
             from_name, target_name, file->name, NULL);

    exit(ERR_EXEC);
  }

  /* Parent. */
  if (pid == -1) {
    fprintf(stderr, "%s: Cannot fork() to execute convertor: %s\n",
                    program_name,
                    strerror(errno));
    exit(EXIT_TROUBLE);
  }
  /* Wait until the child returns. */
  if (waitpid(pid, &status, 0) == -1) {
    /* Error. */
    fprintf(stderr, "%s: wait_pid() error while waiting for convertor: %s\n",
                    program_name,
                    strerror(errno));
    exit(EXIT_TROUBLE);
  }
  if (!WIFEXITED(status)) {
    /* Child exited abnormally. */
    fprintf(stderr, "%s: Child convertor process has been murdered.\n",
                    program_name);
    exit(EXIT_TROUBLE);
  }

  enca_free(from_name);
  enca_free(target_name);

  if (tempfile) {
    unlink(tempfile->name);
    file_free(tempfile);
  }

  /* Child exited normally, test exit status. */
  if (WEXITSTATUS(status) != EXIT_SUCCESS) {
    /* This means child was unable to execute convertor or convertor failed. */
    fprintf(stderr, "%s: External convertor failed (error code %d)\n",
                    program_name,
                    WEXITSTATUS(status));
    if (WEXITSTATUS(status) == ERR_EXEC)
      return ERR_EXEC;
    else
      return ERR_CANNOT;
  }
  /* Success!  Wow! */
  return ERR_OK;
}