Exemplo n.º 1
0
/*read condenseq data structure from file*/
GtCondenseq *gt_condenseq_new_from_file(const char *indexname,
                                        GtLogger *logger, GtError *err)
{
  int had_err = 0;
  FILE* fp;
  GtEncseqLoader *esl;
  GtEncseq *unique_es;
  GtCondenseq *condenseq = NULL;
  /*load unique_es*/
  esl = gt_encseq_loader_new();
  gt_encseq_loader_disable_autosupport(esl);
  gt_encseq_loader_drop_md5_support(esl);
  gt_encseq_loader_require_ssp_tab(esl);
  unique_es = gt_encseq_loader_load(esl, indexname, err);
  if (!unique_es)
    had_err = -1;
  if (!had_err) {
    gt_encseq_loader_delete(esl);
    condenseq = condenseq_new_empty(gt_encseq_alphabet(unique_es));
    condenseq->filename = gt_cstr_dup(indexname);
    condenseq->unique_es = unique_es;
    fp = gt_fa_fopen_with_suffix(indexname, GT_CONDENSEQ_FILE_SUFFIX,
                                 "rb", err);
    if (fp == NULL) {
      had_err = -1;
    }
    else {
      had_err = condenseq_io(condenseq, fp, gt_io_error_fread, err);
      if (!had_err) {
        GtUword i;
        gt_assert(condenseq->uniques);
        gt_assert(condenseq->links);
        gt_fa_fclose(fp);
        /*create link array for each unique entry*/
        for (i = 0; i < condenseq->udb_nelems; i++) {
          GT_INITARRAY(&(condenseq->uniques[i].links),uint32_t);
        }
        /* check for overflows */
        if (condenseq->ldb_nelems > (GtUword) ((uint32_t) 0 - (uint32_t) 1)) {
          gt_error_set(err, "Overflow, to many link-elements. Can't be stored");
          had_err = -1;
        }
        /* iterate through link entrys and store ids in corresponding unique
          entry array */
        for (i = 0; !had_err && (GtUword) i < condenseq->ldb_nelems; i++) {
          GtUword uid = condenseq->links[i].unique_id;
          gt_assert(uid < condenseq->udb_nelems);
          GT_STOREINARRAY(&(condenseq->uniques[uid].links),
                          uint32_t,
                          10,
                          (uint32_t) i);
        }
      }
    }
  }
  if (!had_err) {
    gt_assert(condenseq != NULL);
    if (condenseq->id_len != GT_UNDEF_UWORD)
      gt_logger_log(logger, "IDs const len: " GT_WU, condenseq->id_len);
    else
      gt_logger_log(logger, "using sdstab to access IDs");
  }
  if (had_err) {
    gt_condenseq_delete(condenseq);
    condenseq = NULL;
  }
  return (condenseq);
}
Exemplo n.º 2
0
static int inputsuffixarray(bool map,
                            Suffixarray *suffixarray,
                            unsigned int demand,
                            const char *indexname,
                            GtLogger *logger,
                            GtError *err)
{
  bool haserr = false;
  GtEncseqLoader *el;
  GtUword totallength = 0;

  gt_error_check(err);
  initsuffixarray(suffixarray);
  el = gt_encseq_loader_new();
  if (!(demand & SARR_DESTAB))
    gt_encseq_loader_do_not_require_des_tab(el);
  else
    gt_encseq_loader_require_des_tab(el);
  if (!(demand & SARR_SDSTAB))
    gt_encseq_loader_do_not_require_sds_tab(el);
  else
    gt_encseq_loader_require_sds_tab(el);
  if (!(demand & SARR_SSPTAB))
    gt_encseq_loader_do_not_require_ssp_tab(el);
  else
    gt_encseq_loader_require_ssp_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  suffixarray->encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (suffixarray->encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err);
  }
  if (!haserr
        && suffixarray->mirroredencseq
        && !gt_encseq_is_mirrored(suffixarray->encseq))
  {
    if (gt_encseq_mirror(suffixarray->encseq, err) != 0)
      haserr = true;
  }
  if (!haserr)
  {
    totallength = gt_encseq_total_length(suffixarray->encseq);
  }
  if (!haserr && (demand & SARR_SUFTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->suftab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                       GT_SUFTABSUFFIX,
                                       suffixarray->numberofallsortedsuffixes,
                                       sizeof (*suffixarray->suftab),
                                       err);
        if (suffixarray->suftab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
#if defined (_LP64) || defined (_WIN64)
      off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX);

      if (filesize == (off_t) sizeof (uint32_t) *
                              suffixarray->numberofallsortedsuffixes)
      {
        gt_logger_log(logger,"read suftab in units of 4 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t,
                         GT_SUFTABSUFFIX);
      } else
      {
        gt_logger_log(logger,"read suftab in units of 8 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                         GT_SUFTABSUFFIX);
      }
#else
      gt_logger_log(logger,"read suftab in units of 4 bytes");
      INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                       GT_SUFTABSUFFIX);
#endif
    }
    if (!haserr && !suffixarray->longest.defined)
    {
      gt_error_set(err,"longest not defined");
      haserr = true;
    }
  }
  if (!haserr && (demand & SARR_LCPTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->lcptab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_LCPTABSUFFIX,
                                         suffixarray->numberofallsortedsuffixes,
                                         sizeof (*suffixarray->lcptab),
                                         err);
        if (suffixarray->lcptab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar,
                       GT_LCPTABSUFFIX);
      if (!haserr &&
          fseek(suffixarray->lcptabstream.fp,
                (GtWord) sizeof (GtUchar),SEEK_SET))
      {
        gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno));
        haserr = true;
      }
    }
    if (!haserr && !suffixarray->numoflargelcpvalues.defined)
    {
      gt_error_set(err,"numoflargelcpvalues not defined");
      haserr = true;
    }
    if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0)
    {
      if (map)
      {
        suffixarray->llvtab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                           GT_LARGELCPTABSUFFIX,
                                           (GtUword)
                                           suffixarray->numoflargelcpvalues.
                                           valueunsignedlong,
                                           sizeof (*suffixarray->llvtab),
                                           err);
        if (suffixarray->llvtab == NULL)
        {
          haserr = true;
        }
      } else
      {
        INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue,
                         GT_LARGELCPTABSUFFIX);
      }
    }
  }
  if (!haserr && (demand & SARR_BWTTAB))
  {
    if (map)
    {
      suffixarray->bwttab
        = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_BWTTABSUFFIX,
                                         totallength+1,
                                         sizeof (*suffixarray->bwttab),
                                         err);
      if (suffixarray->bwttab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar,
                       GT_BWTTABSUFFIX);
    }
  }
  if (!haserr && (demand & SARR_BCKTAB))
  {
    suffixarray->bcktab
      = gt_bcktab_map(indexname,
                      gt_encseq_alphabetnumofchars(suffixarray->encseq),
                      suffixarray->prefixlength,
                      totallength+1,
                      true,
                      err);
    if (suffixarray->bcktab == NULL)
    {
      haserr = true;
    }
  }
  if (haserr)
  {
    gt_freesuffixarray(suffixarray);
  }
  return haserr ? -1 : 0;
}