Beispiel #1
0
static int gt_splitfasta_runner(GT_UNUSED int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
  SplitfastaArguments *arguments = tool_arguments;
  unsigned int num_files;
  int had_err;
  off_t file_size;
  const char* filename;
  gt_error_check(err);
  gt_assert(arguments);

  num_files = arguments->num_files;
  filename = argv[parsed_args];

  if (gt_str_length(arguments->splitdesc)) {
    had_err = split_description(filename, arguments->splitdesc,
                                arguments->width, arguments->force, err);
  }
  else {
    unsigned long max_filesize;
    if (num_files) {
      /* set the maxfile size based on requested number of files */
      file_size = gt_file_estimate_size(filename);
      max_filesize= file_size / num_files ;
    }
    else
      max_filesize= arguments->max_filesize_in_MB << 20;
    had_err = split_fasta_file(filename, max_filesize, arguments->force, err);
  }

  return had_err;
}
Beispiel #2
0
off_t gt_files_estimate_total_size(const GtStrArray *filenames)
{
  GtUword filenum;
  off_t totalsize = 0;

  for (filenum = 0; filenum < gt_str_array_size(filenames); filenum++)
    totalsize += gt_file_estimate_size(gt_str_array_get(filenames, filenum));

  return totalsize;
}
Beispiel #3
0
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name,
                          GtTimer *timer, GtError *err)
{
  int had_err = 0;
  GtStr *name1;
  gt_error_check(err);
  if (timer != NULL)
    gt_timer_show_progress(timer, "write encoding", stdout);
  if (hcr_enc->encdesc_encoder != NULL) {
    GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files,
                                                                 err);
    had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder,
                                        cstr_iterator, name, err);
    gt_cstr_iterator_delete(cstr_iterator);
  }

  if (!had_err)
    had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err);

  if (!had_err && gt_log_enabled()) {
    name1 = gt_str_new_cstr(name);
    gt_str_append_cstr(name1, HCRFILESUFFIX);
    gt_log_log("sequences with qualities encoding overview:");
    gt_log_log("**>");
    if (hcr_enc->page_sampling)
        gt_log_log("applied sampling technique: sampling every " GT_WU
                   "th page",
                   hcr_enc->sampling_rate);
    else if (hcr_enc->regular_sampling)
        gt_log_log("applied sampling technique: sampling every " GT_WU
                   "th read",
                   hcr_enc->sampling_rate);
    else
        gt_log_log("applied sampling technique: none");

    gt_log_log("total number of encoded nucleotide sequences with qualities: "
               GT_WU, hcr_enc->num_of_reads);
    gt_log_log("total number of encoded nucleotides: " GT_LLU,
               hcr_enc->seq_encoder->total_num_of_symbols);
    gt_log_log("bits per nucleotide encoding: %f",
               (gt_file_estimate_size(gt_str_get(name1)) * 8.0) /
                 hcr_enc->seq_encoder->total_num_of_symbols);
    gt_log_log("<**");
    gt_str_delete(name1);
  }
  return had_err;
}