Example #1
0
/* convert file using UNIX98 iconv functions
   returns 0 on success, nonzero error code otherwise
   when iconv implementation is not transitive (ICONV_TRANSITIVE is not
   defined), it may help to perform conversion via Unicode, so we try it too
   (probably UCS-2/ISO-10646, but maybe UTF-8---whatever has been detected
   at configure time) */
int
convert_iconv(File *file,
              EncaEncoding from_enc)
{
  static int ascii = ENCA_CS_UNKNOWN;
  File *tempfile = NULL;
  int err;
  iconv_t icd;

  if (!enca_charset_is_known(ascii)) {
    ascii = enca_name_to_charset("ascii");
    assert(enca_charset_is_known(ascii));
  }

  /* When iconv doesn't know the encodings, it can't convert between them.
   * We also don't try conversion to ASCII, it can only damage the files and
   * upset users, nothing else.
   * And fail early on really silly surfaces. */
  if (!enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV)
      || (enca_charset_is_known(options.target_enc.charset)
          && !enca_charset_name(options.target_enc.charset,
                                ENCA_NAME_STYLE_ICONV))
      || options.target_enc.charset == ascii
      || !acceptable_surface(from_enc)
      || !acceptable_surface(options.target_enc))
    return ERR_CANNOT;

  /* Is the conversion possible? */
  if (do_iconv_open(from_enc, options.target_enc, &icd) != 0)
    return ERR_CANNOT;

  /* Since iconv doesn't recode files in place, we make a temporary file
     and copy contents of file fname to it.
     save the current content first, then copy the rest.
     When the file is stdin, fake-reopen it to stdout. */
  err = ERR_IOFAIL;
  if ((tempfile = file_temporary(file->buffer, 1))
      && file_write(tempfile) != -1
      && copy_and_convert(file, tempfile, NULL) == 0
      && (!file->name || file_seek(file, 0, SEEK_SET) == 0)
      && file_seek(tempfile, 0, SEEK_SET) == 0
      && (!file->name || file_truncate(file, 0) == 0)
      && (file->name || (file_close(file) == 0
                         && file_open(file, "wb") == 0))) {
    /* Create the second buffer when we don't have any yet
      but don't make it unnecessarily large, system default suffices */
    if (!buffer_iconv)
      buffer_iconv = buffer_new(0);
    tempfile->buffer = buffer_iconv;

    err = iconv_one_step(tempfile, file, icd);
  }

  file_free(tempfile);
  do_iconv_close(icd);
  return err;
}
Example #2
0
char *PRXCOL::Decode(PGLOBAL g, const char *cnm)
  {
  char  *buf= (char*)PlugSubAlloc(g, NULL, strlen(cnm) + 1);
  uint   dummy_errors;
  uint32 len= copy_and_convert(buf, strlen(cnm) + 1,
                               &my_charset_latin1,
                               cnm, strlen(cnm),
                               &my_charset_utf8_general_ci,
                               &dummy_errors);
  buf[len]= '\0';
  return buf;
  } // end of Decode
Example #3
0
/* convert file using GNU recode library
   returns 0 on success, nonzero error code otherwise */
int
convert_recode(File *file,
               EncaEncoding from_enc)
{
  RECODE_REQUEST request;
  RECODE_TASK task;
  File *tempfile = NULL;
  bool success;
  const char *encreq;

  /* Allocate librecode outer if we are called first time. */
  if (outer == NULL) {
    if ((outer = recode_new_outer(false)) == NULL) {
      fprintf(stderr, "%s: recode library doesn't like us\n",
                      program_name);
      return ERR_LIBCOM;
    }
  }

  /* Construct recode request string,
     try to mimic surfaceless converter now. */
  {
    EncaEncoding enc;

    enc.charset = from_enc.charset;
    enc.surface = from_enc.surface | ENCA_SURFACE_REMOVE;
    encreq = format_request_string(enc, options.target_enc,
                                   ENCA_SURFACE_EOL_LF);
  }
  /* Create a recode request from it. */
  request = get_recode_request(encreq);
  if (request == NULL)
    return ERR_CANNOT;

  /* Now we have to distinguish between file and stdin, namely because
   * in case of stdin, it's first part is already loaded in the buffer. */
  if (file->name != NULL) {
    /* File is a regular file.
       Since recode doesn't recode files in place, we make a temporary file
       and copy contents of file fname to it. */
    if (file_seek(file, 0, SEEK_SET) != 0)
      return ERR_IOFAIL;
    file->buffer->pos = 0;

    if ((tempfile = file_temporary(file->buffer, 1)) == NULL
        || copy_and_convert(file, tempfile, NULL) != 0
        || file_seek(file, 0, SEEK_SET) != 0
        || file_seek(tempfile, 0, SEEK_SET) != 0
        || file_truncate(file, 0) != 0) {
      file_free(tempfile);
      return ERR_IOFAIL;
    }

    /* Create a task from the request. */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = tempfile->stream;
    task->output.name = NULL;
    task->output.file = file->stream;

    /* Now run conversion temporary file -> original. */
    success = recode_perform_task(task);

    /* If conversion wasn't successfull, original file is probably damaged
       (damned librecode!) try to restore it from the temporary copy. */
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged file `%s'. "
                        "Trying to recover... ",
                        program_name,
                        file->name);
        tempfile->buffer->pos = 0;
        if (file_seek(tempfile, 0, SEEK_SET) != -1
            && file_seek(file, 0, SEEK_SET) != -1
            && file_truncate(file, file->size) == 0
            && copy_and_convert(tempfile, file, NULL) == 0)
          fprintf(stderr, "succeeded.\n");
        else
          fprintf(stderr, "failed\n");
      }
      else
        print_recode_warning(task->error_so_far, file->name);
    }

    recode_delete_task(task);
    file_free(tempfile);
  }
  else {
    /* File is stdin.
       First recode begining saved in io_buffer, then append rest of stdin. */
    enum recode_error errmax = RECODE_NO_ERROR;

    /* Create a task from the request.
     * Set it up for buffer -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = NULL;
    task->input.buffer = (char*)file->buffer->data;
    task->input.cursor = (char*)file->buffer->data;
    task->input.limit = (char*)file->buffer->data + file->buffer->pos;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else
        errmax = task->error_so_far;
    }
    recode_delete_task(task);

    /* Create a task from the request.
     * Set it up for stdin -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = stdin;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else {
        if (errmax < task->error_so_far)
          errmax = task->error_so_far;
      }
    }
    if (errmax >= enca_recode_fail_level)
      print_recode_warning(errmax, ffname_r(NULL));

    recode_delete_task(task);
  }

  /* return ERR_IOFAIL on failure since it means file-related problems */
  return success ? ERR_OK : ERR_IOFAIL;
}
Example #4
0
/* perform one conversion step using conversion descriptor icd
   reading for file_from and putting result to file_to */
static int
iconv_one_step(File *file_from,
               File *file_to,
               iconv_t icd)
{
  size_t size_from, size_to, n;
  char *p_from, *p_to;
  int hit_eof;

  /* convert */
  do {
    /* read to io_buffer */
    if (file_read(file_from) == -1)
      return ERR_IOFAIL;

    p_from = (char*)file_from->buffer->data;
    size_from = file_from->buffer->pos;
    hit_eof = (ssize_t)file_from->buffer->size > file_from->buffer->pos;
    /* convert without reading more data until io_buffer is exhausted or some
       error occurs */
    do {
      p_to = (char*)file_to->buffer->data;
      size_to = file_to->buffer->size;
      n = iconv(icd,
                (ICONV_CONST char**)&p_from, &size_from,
                &p_to, &size_to);
      file_to->buffer->pos = file_to->buffer->size - size_to;
      if (n != (size_t)-1 || errno != E2BIG)
        break;

      if (file_write(file_to) == -1)
        return ERR_IOFAIL;

    } while (1);

    if (n == (size_t)-1) {
      /* EINVAL means some multibyte sequence has been splitted---that's ok,
         move it to the begining and go on */
      if (errno == EINVAL && !hit_eof) {
        memmove(file_from->buffer->data, p_from, size_from);
        file_from->buffer->pos = size_from;
      }
      else {
        /* but other errors are critical, conversion and try to recover  */
        fprintf(stderr, "%s: Iconv conversion error on `%s': %s\n",
                        program_name,
                        ffname_r(file_from->name),
                        strerror(errno));
        if (file_from->name && file_to->name) {
          Buffer *buf;
          int err;

          /* regular file */
          fprintf(stderr, "Trying to recover... ");
          if (file_seek(file_from, 0, SEEK_SET) != 0
              || file_seek(file_to, 0, SEEK_SET) != 0
              || file_truncate(file_to, file_to->size) != 0) {
            fprintf(stderr, "failed\n");
            return ERR_IOFAIL;
          }
          file_from->buffer->pos = 0;
          buf = file_to->buffer;
          file_to->buffer = file_from->buffer;
          err = copy_and_convert(file_from, file_to, NULL);
          file_to->buffer = buf;

          if (err != 0) {
            fprintf(stderr, "failed\n");
            return ERR_IOFAIL;
          }
          fprintf(stderr, "succeeded.\n");
        }
        else {
          fprintf(stderr, "No way to recover in a pipe.\n");
          return ERR_IOFAIL;
        }

        return ERR_MALFORM;
      }
    }
    else file_from->buffer->pos = 0;

    /* write the remainder */
    if (file_write(file_to) == -1)
      return ERR_IOFAIL;

  } while (!hit_eof);

  /* file might end with an unfinished multibyte sequence */
  if (size_from > 0) {
    fprintf(stderr, "%s: File `%s' seems to be truncated, "
                    "the trailing incomplete multibyte sequence "
                    "has been lost\n",
                    program_name,
                    ffname_r(file_from->name));
    return ERR_MALFORM;
  }

  return ERR_OK;
}
Example #5
0
/* process file named fname
   this is the `boss' function
   returns 0 on succes, 1 on failure, 2 on troubles */
static int
process_file(EncaAnalyser an,
             const char *fname)
{
  static int utf8 = ENCA_CS_UNKNOWN;
  static Buffer *buffer = NULL; /* persistent i/o buffer */
  int ot_is_convert = (options.output_type == OTYPE_CONVERT);

  EncaEncoding result; /* the guessed encoding */
  File *file; /* the processed file */

  if (!an) {
    buffer_free(buffer);
    return 0;
  }

  /* Initialize when we are called the first time. */
  if (buffer == NULL)
    buffer = buffer_new(buffer_size);

  if (!enca_charset_is_known(utf8)) {
    utf8 = enca_name_to_charset("utf8");
    assert(enca_charset_is_known(utf8));
  }

  /* Read sample. */
  file = file_new(fname, buffer);
  if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (file_read(file) == -1) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (!ot_is_convert)
    file_close(file);

  /* Guess encoding. */
  dwim_libenca_options(an, file);
  if (ot_is_convert)
    result = enca_analyse_const(an, buffer->data, buffer->pos);
  else
    result = enca_analyse(an, buffer->data, buffer->pos);

  /* Is conversion required? */
  if (ot_is_convert) {
    int err = 0;

    if (enca_charset_is_known(result.charset))
      err = convert(file, result);
    else {
      if (enca_errno(an) != ENCA_EEMPTY) {
        fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n",
                        program_name,
                        ffname_r(file->name));
      }
      /* Copy stdin to stdout unchanged. */
      if (file->name == NULL)
        err = copy_and_convert(file, file, NULL);
    }

    file_free(file);
    if ((err == ERR_OK && !enca_charset_is_known(result.charset)
         && enca_errno(an) != ENCA_EEMPTY)
        || err == ERR_CANNOT)
      return 1;

    return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE;
  }

  /* Print results. */
  print_results(file->name, an, result, enca_errno(an));
  if (result.charset == utf8)
    double_utf8_chk(an, buffer->data, buffer->pos);

  file_free(file);

  return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE;
}
/* fork and the child executes Settings.Convertor on fname
   create temporary file containing stdin when fname == NULL and convert it
   passing special option STDOUT to convertor (that is assumed to delete
   the temporary file itself)
   from_enc, to_enc are encoding names as should be passed to convertor
   returns 0 on success, nonzero on failure;
   on critical failure (like we cannot fork()) it simply aborts */
int
convert_external(File *file,
                 const EncaEncoding from_enc)
{
  /* special fourth parameter passed to external convertor to instruct it to
  send result to stdout */
  static const char *STDOUT_CONV = "-";

  pid_t pid;
  int status;
  File *tempfile = NULL;
  char *from_name, *target_name;

  if (*extern_convertor == '\0') {
    fprintf(stderr, "%s: No external convertor defined!\n", program_name);
    return ERR_CANNOT;
  }

  if (options.verbosity_level > 2)
    fprintf(stderr, "    launching `%s' to convert `%s'\n",
                    extern_convertor, ffname_r(file->name));

  /* Is conversion of stdin requested? */
  if (file->name == NULL) {
    /* Then we have to copy it to a temporary file. */
    tempfile = file_temporary(file->buffer, 0);
    if (tempfile == NULL)
      return ERR_IOFAIL;

    if (copy_and_convert(file, tempfile, NULL) != 0) {
      file_unlink(tempfile->name);
      file_free(tempfile);
      return ERR_IOFAIL;
    }
  }

  /* Construct the charset names before fork() */
  from_name = enca_strconcat(enca_charset_name(from_enc.charset,
                                               ENCA_NAME_STYLE_ENCA),
                             enca_get_surface_name(from_enc.surface,
                                                   ENCA_NAME_STYLE_ENCA),
                             NULL);
  if (enca_charset_is_known(options.target_enc.charset)
      && (options.target_enc.surface & ENCA_SURFACE_UNKNOWN) == 0) {
    target_name
      = enca_strconcat(enca_charset_name(options.target_enc.charset,
                                         ENCA_NAME_STYLE_ENCA),
                       enca_get_surface_name(options.target_enc.surface,
                                             ENCA_NAME_STYLE_ENCA),
                       NULL);
  }
  else
    target_name = enca_strdup(options.target_enc_str);

  /* Fork. */
  pid = vfork();
  if (pid == 0) {
    /* Child. */
    if (tempfile)
      execlp(extern_convertor, extern_convertor,
             from_name, target_name, tempfile->name,
             STDOUT_CONV, NULL);
    else
      execlp(extern_convertor, extern_convertor,
             from_name, target_name, file->name, NULL);

    exit(ERR_EXEC);
  }

  /* Parent. */
  if (pid == -1) {
    fprintf(stderr, "%s: Cannot fork() to execute convertor: %s\n",
                    program_name,
                    strerror(errno));
    exit(EXIT_TROUBLE);
  }
  /* Wait until the child returns. */
  if (waitpid(pid, &status, 0) == -1) {
    /* Error. */
    fprintf(stderr, "%s: wait_pid() error while waiting for convertor: %s\n",
                    program_name,
                    strerror(errno));
    exit(EXIT_TROUBLE);
  }
  if (!WIFEXITED(status)) {
    /* Child exited abnormally. */
    fprintf(stderr, "%s: Child convertor process has been murdered.\n",
                    program_name);
    exit(EXIT_TROUBLE);
  }

  enca_free(from_name);
  enca_free(target_name);

  if (tempfile) {
    unlink(tempfile->name);
    file_free(tempfile);
  }

  /* Child exited normally, test exit status. */
  if (WEXITSTATUS(status) != EXIT_SUCCESS) {
    /* This means child was unable to execute convertor or convertor failed. */
    fprintf(stderr, "%s: External convertor failed (error code %d)\n",
                    program_name,
                    WEXITSTATUS(status));
    if (WEXITSTATUS(status) == ERR_EXEC)
      return ERR_EXEC;
    else
      return ERR_CANNOT;
  }
  /* Success!  Wow! */
  return ERR_OK;
}