Esempio n. 1
0
static int determine_outfp(void *data, GtError *err)
{
  GtOutputFileInfo *ofi = (GtOutputFileInfo*) data;
  GtFileMode file_mode;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(ofi);
  if (!gt_str_length(ofi->output_filename))
    *ofi->outfp = NULL; /* no output file given -> use stdout */
  else { /* outputfile given -> create generic file pointer */
    gt_assert(!(ofi->gzip && ofi->bzip2));
    if (ofi->gzip)
      file_mode = GT_FILE_MODE_GZIP;
    else if (ofi->bzip2)
      file_mode = GT_FILE_MODE_BZIP2;
    else
      file_mode = GT_FILE_MODE_UNCOMPRESSED;
    if (file_mode != GT_FILE_MODE_UNCOMPRESSED &&
        strcmp(gt_str_get(ofi->output_filename) +
               gt_str_length(ofi->output_filename) -
               strlen(gt_file_mode_suffix(file_mode)),
               gt_file_mode_suffix(file_mode))) {
      gt_warning("output file '%s' doesn't have correct suffix '%s', appending "
                 "it", gt_str_get(ofi->output_filename),
                 gt_file_mode_suffix(file_mode));
      gt_str_append_cstr(ofi->output_filename, gt_file_mode_suffix(file_mode));
    }
    if (!ofi->force && gt_file_exists(gt_str_get(ofi->output_filename))) {
        gt_error_set(err, "file \"%s\" exists already, use option -%s to "
                     "overwrite", gt_str_get(ofi->output_filename),
                     GT_FORCE_OPT_CSTR);
        had_err = -1;
    }
    if (!had_err) {
      *ofi->outfp = gt_file_xopen_file_mode(file_mode,
                                            gt_str_get(ofi->output_filename),
                                            "w");
      gt_assert(*ofi->outfp);
    }
  }
  return had_err;
}
Esempio n. 2
0
static int store_in_subset_file(void *data, GthSA *sa,
                                const char *outputfilename, GtError *err)
{
  Store_in_subset_file_data *store_in_subset_file_data =
    (Store_in_subset_file_data*) data;
  double split_determing_percentage = 0.0;
  unsigned long filenum;
  char filenamesuffix[4];
  int had_err = 0;

  gt_error_check(err);

  /* filter before we do any further processing */
  if (gth_sa_filter_filter_sa(store_in_subset_file_data->sa_filter, sa)) {
    /* and free it afterwards */
    gth_sa_delete(sa);
    /* discard */
    return 0;
  }

  /* check whether we got a new output file to process */
  if (!store_in_subset_file_data->current_outputfilename) {
    store_in_subset_file_data->current_outputfilename =
      gt_cstr_dup(outputfilename);
  }
  else if (strcmp(store_in_subset_file_data->current_outputfilename,
                  outputfilename)) {
    /* close current output files */
    close_output_files(store_in_subset_file_data);
    gt_free(store_in_subset_file_data->current_outputfilename);
 }

  /* determine in which file the current sa needs to be put */
  switch (store_in_subset_file_data->gthsplitinfo->splitmode) {
    case ALIGNMENTSCORE_SPLIT:
      split_determing_percentage = gth_sa_score(sa);
      strcpy(filenamesuffix, "scr");
      break;
    case COVERAGE_SPLIT:
      split_determing_percentage = gth_sa_coverage(sa);
      strcpy(filenamesuffix, "cov");
      break;
    default: gt_assert(0);
  }
  gt_assert(split_determing_percentage >= 0.0);
  /* XXX: change into an assertion when coverage problem is fixed */
  if (split_determing_percentage > 1.0)
    split_determing_percentage = 1.0;

  if (split_determing_percentage == 1.0)
    filenum = store_in_subset_file_data->num_of_subset_files - 1;
  else {
    filenum =  floor(split_determing_percentage * 100.0 /
                           store_in_subset_file_data->gthsplitinfo->range);
  }
  gt_assert(filenum < store_in_subset_file_data->num_of_subset_files);

  /* make sure the file exists and is open */
  if (!store_in_subset_file_data->subset_files[filenum]) {
    gt_assert(store_in_subset_file_data->subset_filenames[filenum] == NULL);
    store_in_subset_file_data->subset_filenames[filenum] = gt_str_new();
    gt_str_append_cstr_nt(store_in_subset_file_data->subset_filenames[filenum],
                          outputfilename,
                          gt_file_basename_length(outputfilename));
    gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum],
                       '.');
    gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum],
                       filenamesuffix);
    gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum],
                        filenum *
                        store_in_subset_file_data->gthsplitinfo->range);
    gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum],
                       '-');
    gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum],
                     (filenum + 1) *
                     store_in_subset_file_data->gthsplitinfo->range);
    gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum],
                       gt_file_mode_suffix(store_in_subset_file_data
                                           ->gthsplitinfo->file_mode));

    /* if not disabled by -force, check if file already exists */
    if (!store_in_subset_file_data->gthsplitinfo->force) {
      store_in_subset_file_data->subset_files[filenum] =
        gt_file_open(store_in_subset_file_data->gthsplitinfo->file_mode,
                     gt_str_get(store_in_subset_file_data
                                ->subset_filenames[filenum]), "r", NULL);
      if (store_in_subset_file_data->subset_files[filenum]) {
        gt_error_set(err, "file \"%s\" exists already. use option -%s to "
                     "overwrite", gt_str_get(store_in_subset_file_data
                                             ->subset_filenames[filenum]),
                     GT_FORCE_OPT_CSTR);
        had_err = -1;
      }
    }
    if (!had_err) {
      /* open split file for writing */
      store_in_subset_file_data->subset_files[filenum] =
          gt_file_xopen_file_mode(store_in_subset_file_data->gthsplitinfo
                                  ->file_mode,
                                  gt_str_get(store_in_subset_file_data
                                             ->subset_filenames[filenum]), "w");
      /* store XML header in file */
      gth_xml_show_leader(true,
                          store_in_subset_file_data->subset_files[filenum]);
    }
  }

  /* put it there */
  if (!had_err) {
    gth_xml_inter_sa_visitor_set_outfp(store_in_subset_file_data->sa_visitor,
                                       store_in_subset_file_data
                                       ->subset_files[filenum]);
    gth_sa_visitor_visit_sa(store_in_subset_file_data->sa_visitor, sa);
  }

  /* adjust counter */
  if (!had_err)
    store_in_subset_file_data->subset_file_sa_counter[filenum]++;

  /* and free it afterwards */
  gth_sa_delete(sa);

  return had_err;
}
Esempio n. 3
0
static int split_fasta_file(const char *filename, unsigned long max_filesize,
                            bool force, GtError *err)
{
  GtFile *srcfp = NULL, *destfp = NULL;
  GtStr *destfilename = NULL;
  unsigned long filenum = 0, bytecount = 0, separator_pos;
  int read_bytes, had_err = 0;
  char buf[BUFSIZ];

  gt_error_check(err);
  gt_assert(filename && max_filesize);

  /* open source file */
  srcfp = gt_file_xopen(filename, "r");
  gt_assert(srcfp);

  /* read start characters */
  if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) {
    gt_error_set(err, "file \"%s\" is empty", filename);
    had_err = -1;
  }
  bytecount += read_bytes;

  /* make sure the file is in fasta format */
  if (!had_err && buf[0] != '>') {
    gt_error_set(err, "file is not in FASTA format");
    had_err = -1;
  }

  if (!had_err) {
    /* open destination file */
    destfilename = gt_str_new();
    gt_str_append_cstr_nt(destfilename, filename,
                          gt_file_basename_length(filename));
    gt_str_append_char(destfilename, '.');
    gt_str_append_ulong(destfilename, ++filenum);
    gt_str_append_cstr(destfilename,
                       gt_file_mode_suffix(gt_file_mode(srcfp)));
    if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                  force, err))) {
      had_err = -1;
    }
    if (!had_err)
      gt_file_xwrite(destfp, buf, read_bytes);

    while (!had_err &&
           (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) {
      if (bytecount + read_bytes > max_filesize) {
        int offset = bytecount < max_filesize ? max_filesize - bytecount : 0;
        if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) {
          separator_pos--;
          gt_assert(separator_pos < read_bytes);
          if (separator_pos)
            gt_file_xwrite(destfp, buf, separator_pos);
          /* close current file */
          gt_file_delete(destfp);
          /* open new file */
          gt_str_reset(destfilename);
          gt_str_append_cstr_nt(destfilename, filename,
                                gt_file_basename_length(filename));
          gt_str_append_char(destfilename, '.');
          gt_str_append_ulong(destfilename, ++filenum);
          gt_str_append_cstr(destfilename,
                             gt_file_mode_suffix(gt_file_mode(srcfp)));
          if (!(destfp =
                  gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                 force, err))) {
            had_err = -1;
            break;
          }
          bytecount = read_bytes - separator_pos; /* reset */
          gt_assert(buf[separator_pos] == '>');
          gt_file_xwrite(destfp, buf + separator_pos,
                         read_bytes - separator_pos);
          continue;
        }
      }
      bytecount += read_bytes;
      gt_file_xwrite(destfp, buf, read_bytes);
    }
  }

  /* free */
  gt_str_delete(destfilename);

  /* close current file */
  gt_file_delete(destfp);

  /* close source file */
  gt_file_delete(srcfp);

  return had_err;
}