示例#1
0
static int bssm_model_read(GthBSSMModel *bssm_model, FILE *file, GtError *err)
{
    int had_err = 0;
    gt_error_check(err);
    gt_xfread(&bssm_model->hypothesis_num, sizeof (GtUword), 1, file);
    if (bssm_model->hypothesis_num != HYPOTHESIS7 &&
            bssm_model->hypothesis_num != HYPOTHESIS2) {
        gt_error_set(err, "BSSM model contains unknown hypothesis number");
        had_err = -1;
    }
    if (!had_err) {
        gt_xfread(&bssm_model->window_size_left, sizeof (GtUword), 1, file);
        gt_xfread(&bssm_model->window_size_right, sizeof (GtUword), 1, file);
        switch (bssm_model->hypothesis_num) {
        case HYPOTHESIS2:
            gt_xfread(&bssm_model->hypotables.hypo2table, sizeof (Hypo2table), 1,
                      file);
            break;
        case HYPOTHESIS7:
            gt_xfread(&bssm_model->hypotables.hypo7table, sizeof (Hypo7table), 1,
                      file);
            break;
        default:
            gt_assert(0);
        }
    }
    return had_err;
}
示例#2
0
static GthBSSMParam* load_old_binary_format(GtStr *path, const char *filename,
        GtError *err)
{
    GthBSSMParam *bssm_param;
    int had_err = 0;
    FILE *file;
    gt_error_check(err);
    gt_assert(path && filename);

    file = gt_xfopen(gt_str_get(path), "r");

    /* read version number and check if equals version number 2 */
    bssm_param = gt_malloc(sizeof *bssm_param);
    gt_xfread(&bssm_param->version_num,  sizeof (unsigned char), 1, file);
    if (bssm_param->version_num != (unsigned char) 2) {
        gt_error_set(err, "BSSM file %s has unrecognized version number %u",
                     filename, bssm_param->version_num);
        had_err = -1;
    }

    if (!had_err) {
        /* read in model variables */
        gt_xfread(&bssm_param->gt_donor_model_set,  sizeof (bool), 1, file);
        gt_xfread(&bssm_param->gc_donor_model_set,  sizeof (bool), 1, file);
        gt_xfread(&bssm_param->ag_acceptor_model_set,  sizeof (bool), 1, file);

        /* check if at least one model is set in file */
        if (!bssm_param->gt_donor_model_set &&
                !bssm_param->gc_donor_model_set &&
                !bssm_param->ag_acceptor_model_set) {
            gt_error_set(err, "BSSM file %s apparently contains no model", filename);
            had_err = -1;
        }
    }

    /* read GT donor site model */
    if (!had_err && bssm_param->gt_donor_model_set)
        had_err = bssm_model_read(&bssm_param->gt_donor_model, file, err);

    /* read GC donor site model */
    if (!had_err && bssm_param->gc_donor_model_set)
        had_err = bssm_model_read(&bssm_param->gc_donor_model, file, err);

    /* read AG acceptor site model */
    if (!had_err && bssm_param->ag_acceptor_model_set)
        had_err = bssm_model_read(&bssm_param->ag_acceptor_model, file, err);

    gt_xfclose(file);

    if (had_err) {
        gth_bssm_param_delete(bssm_param);
        return NULL;
    }
    return bssm_param;
}
示例#3
0
/* FIXME: convert to platform-independent variant */
struct seqRangeList *
SRLReadFromStream(FILE *fp, const MRAEnc *alphabet,
                  enum SRLFeatures features, GT_UNUSED GtError *err)
{
  struct seqRangeList *newRangeList;
  size_t numRanges;
  gt_assert(fp && err);
  newRangeList = gt_malloc(sizeof (struct seqRangeList));
  newRangeList->alphabet = alphabet;
  newRangeList->symBits = requiredSymbolBits(MRAEncGetSize(alphabet) - 1);
  if (newRangeList->symBits)
    newRangeList->maxRangeLen =
      (((Seqpos)1) << (symLenStrBits - newRangeList->symBits)) - 1;
  else
    newRangeList->maxRangeLen = ~(Seqpos)0;
  gt_xfread(&(newRangeList->numRanges), sizeof (newRangeList->numRanges), 1,
            fp);
  numRanges = newRangeList->numRanges;
  newRangeList->partialSymSums = NULL;
  newRangeList->ranges = gt_malloc(sizeof (struct seqRange) *
                                   (newRangeList->numRangesStorable
                                   = numRanges));
  gt_xfread(newRangeList->ranges, sizeof (struct seqRange), numRanges, fp);
  if (features & SRL_PARTIAL_SYMBOL_SUMS)
  {
    Seqpos *partialSymSums;
    size_t numSyms = MRAEncGetSize(alphabet), i;
    newRangeList->partialSymSums = partialSymSums =
      gt_malloc(sizeof (Seqpos) * MRAEncGetSize(alphabet) * numRanges);
    memset(partialSymSums, 0, sizeof (Seqpos) * numSyms);
    for (i = 1; i < numRanges; ++i)
    {
      struct seqRange *lastRange = newRangeList->ranges + i - 1;
      Symbol lastSym = seqRangeSym(lastRange, newRangeList->symBits);
      memcpy(partialSymSums + i * numSyms, partialSymSums + (i - 1) * numSyms,
             sizeof (Seqpos) * numSyms);
      partialSymSums[i * numSyms + lastSym] +=
        seqRangeLen(lastRange, newRangeList->symBits);
    }
  }
  return newRangeList;
}
示例#4
0
int gt_file_xread(GtFile *file, void *buf, size_t nbytes)
{
  int rval = -1;
  if (file) {
    switch (file->mode) {
      case GT_FILE_MODE_UNCOMPRESSED:
        rval = gt_xfread(buf, 1, nbytes, file->fileptr.file);
        break;
      case GT_FILE_MODE_GZIP:
        rval = gt_xgzread(file->fileptr.gzfile, buf, nbytes);
        break;
      case GT_FILE_MODE_BZIP2:
        rval = gt_xbzread(file->fileptr.bzfile, buf, nbytes);
        break;
      default: gt_assert(0);
    }
  }
  else
    rval = gt_xfread(buf, 1, nbytes, stdin);
  return rval;
}
示例#5
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}