Beispiel #1
0
static void* fileopen_generic(FA *fa, const char *path, const char *mode,
                              GtFileMode genfilemode, bool x,
                              const char *filename, int line, GtError *err)
{
  void  *fp = NULL;
  FAFileInfo *fileinfo;
  gt_error_check(err);
  gt_assert(fa && path && mode);
  fileinfo = gt_malloc(sizeof (FAFileInfo));
  fileinfo->filename = filename;
  fileinfo->line = line;
  switch (genfilemode) {
    case GFM_UNCOMPRESSED:
      fp = x ? gt_xfopen(path, mode) : gt_efopen(path, mode, err);
      break;
    case GFM_GZIP:
      fp = x ? gt_xgzopen(path, mode) : gt_egzopen(path, mode, err);
      break;
    case GFM_BZIP2:
      fp = x ? gt_xbzopen(path, mode) : gt_ebzopen(path, mode, err);
      break;
    default: gt_assert(0);
  }
  if (fp)
    gt_hashmap_add(fa->file_pointer, fp, fileinfo);
  else
    gt_free(fileinfo);
  return fp;
}
Beispiel #2
0
static GthBSSMParam* load_old_binary_format(GtStr *path, const char *filename,
        GtError *err)
{
    GthBSSMParam *bssm_param;
    int had_err = 0;
    FILE *file;
    gt_error_check(err);
    gt_assert(path && filename);

    file = gt_xfopen(gt_str_get(path), "r");

    /* read version number and check if equals version number 2 */
    bssm_param = gt_malloc(sizeof *bssm_param);
    gt_xfread(&bssm_param->version_num,  sizeof (unsigned char), 1, file);
    if (bssm_param->version_num != (unsigned char) 2) {
        gt_error_set(err, "BSSM file %s has unrecognized version number %u",
                     filename, bssm_param->version_num);
        had_err = -1;
    }

    if (!had_err) {
        /* read in model variables */
        gt_xfread(&bssm_param->gt_donor_model_set,  sizeof (bool), 1, file);
        gt_xfread(&bssm_param->gc_donor_model_set,  sizeof (bool), 1, file);
        gt_xfread(&bssm_param->ag_acceptor_model_set,  sizeof (bool), 1, file);

        /* check if at least one model is set in file */
        if (!bssm_param->gt_donor_model_set &&
                !bssm_param->gc_donor_model_set &&
                !bssm_param->ag_acceptor_model_set) {
            gt_error_set(err, "BSSM file %s apparently contains no model", filename);
            had_err = -1;
        }
    }

    /* read GT donor site model */
    if (!had_err && bssm_param->gt_donor_model_set)
        had_err = bssm_model_read(&bssm_param->gt_donor_model, file, err);

    /* read GC donor site model */
    if (!had_err && bssm_param->gc_donor_model_set)
        had_err = bssm_model_read(&bssm_param->gc_donor_model, file, err);

    /* read AG acceptor site model */
    if (!had_err && bssm_param->ag_acceptor_model_set)
        had_err = bssm_model_read(&bssm_param->ag_acceptor_model, file, err);

    gt_xfclose(file);

    if (had_err) {
        gth_bssm_param_delete(bssm_param);
        return NULL;
    }
    return bssm_param;
}
static int hmmsearch_process_coarse_hits(
                                       char *table_filename,
                                       GtCondenseq *ces,
                                       GtCondenseqHmmsearchArguments *arguments,
                                       GtLogger *logger,
                                       GtError *err) {
  int had_err = 0;
  GtStr *line = gt_str_new();
  FILE *table = NULL;
  GtSplitter *splitter = gt_splitter_new();
  GtStr *query = gt_str_new(),
        *fine_fasta_filename = gt_str_new_cstr("condenseq");
  GtRBTree *sequences = NULL;
  GtUword filecount = (GtUword) 1;
  unsigned int querycount = 0;
  const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename);
  const GtUword table_name_length = gt_str_length(arguments->outtable_filename);

  table = gt_xfopen(table_filename, "r");

  sequences = gt_rbtree_new(hmmsearch_cmp_seqnum,
                            hmmsearch_tree_free_node, NULL);

  while (!had_err && gt_str_read_next_line(line, table) == 0) {
    char *c_line = gt_str_get(line);
    GtUword uid;
    const GtUword target_column = 0,
          query_column = (GtUword) 3;

    if (c_line[0] != '#') {
      gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' ');
      gt_assert(gt_splitter_size(splitter) == (GtUword) 23);
      if (sscanf(gt_splitter_get_token(splitter, target_column),
                 GT_WU, &uid) != 1) {
        gt_error_set(err, "couldn't parse target number: %s",
                     gt_splitter_get_token(splitter, target_column));
        had_err = -1;
      }
      if (gt_str_length(query) == 0 ||
          strcmp(gt_str_get(query),
                 gt_splitter_get_token(splitter, query_column)) != 0) {
        gt_str_set(query, gt_splitter_get_token(splitter, query_column));
        gt_logger_log(logger, "new query: %s", gt_str_get(query));
        querycount++;
      }
      if (!had_err && querycount == arguments->max_queries) {
        hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
        if (table_name_length != 0)
          gt_str_append_uword(arguments->outtable_filename, filecount++);
        had_err =
          hmmsearch_call_fine_search(table_name_length != 0 ?
                                       arguments->outtable_filename :
                                       NULL,
                                     gt_str_get(fine_fasta_filename),
                                     gt_str_get(arguments->hmmsearch_path),
                                     gt_str_get(arguments->hmm),
                                     logger, err);
        gt_rbtree_clear(sequences);
        gt_str_set_length(fine_fasta_filename, fine_fasta_name_length);
        if (table_name_length != 0)
          gt_str_set_length(arguments->outtable_filename, table_name_length);
        querycount = 0;
      }
      if (!had_err) {
        if (gt_condenseq_each_redundant_seq(ces, uid,
                                            hmmsearch_process_seq,
                                            sequences, err) == 0) {
          had_err = -1;
        }
      }
      gt_splitter_reset(splitter);
    }
    gt_str_reset(line);
  }
  gt_splitter_delete(splitter);
  gt_str_delete(line);
  gt_str_delete(query);
  gt_xfclose(table);

  if (!had_err) {
    hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
    if (table_name_length != 0)
      gt_str_append_uword(arguments->outtable_filename, filecount++);
    had_err =
      hmmsearch_call_fine_search(table_name_length != 0 ?
                                 arguments->outtable_filename :
                                 NULL,
                                 gt_str_get(fine_fasta_filename),
                                 gt_str_get(arguments->hmmsearch_path),
                                 gt_str_get(arguments->hmm),
                                 logger, err);
  }
  gt_log_log("created " GT_WU " files", filecount);
  gt_rbtree_delete(sequences);
  gt_str_delete(fine_fasta_filename);
  return had_err;
}
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}