Ejemplo n.º 1
0
int gt_extractkeysfromfastaindex(const char *indexname,
                                 const GtStr *fileofkeystoextract,
                                 unsigned long linewidth,GtError *err)
{
  GtEncseq *encseq = NULL;
  GtEncseqLoader *el = NULL;
  bool haserr = false;
  unsigned long numofdbsequences = 0, keysize = 0;

  el = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    int retval;

    numofdbsequences = gt_encseq_num_of_sequences(encseq);
    retval = readkeysize(indexname,err);
    if (retval < 0)
    {
      haserr = true;
    }
    keysize = (unsigned long) retval;
  }
  if (!haserr)
  {
    char *keytab;
    unsigned long keytablength;

    keytablength = 1UL + numofdbsequences * (keysize+1);
    keytab = gt_fa_mmap_check_size_with_suffix(indexname,
                                               GT_KEYSTABFILESUFFIX,
                                               keytablength,
                                               sizeof (GtUchar),
                                               err);
    if (keytab == NULL)
    {
      haserr = true;
    } else
    {
      if (itersearchoverallkeys(encseq,keytab,numofdbsequences,
                                keysize,fileofkeystoextract,
                                linewidth,err) != 0)
      {
        haserr = true;
      }
    }
    gt_fa_xmunmap(keytab);
  }
  if (encseq != NULL)
  {
    gt_encseq_delete(encseq);
    encseq = NULL;
  }
  return haserr ? -1 : 0;
}
Ejemplo n.º 2
0
static int inputsuffixarray(bool map,
                            Suffixarray *suffixarray,
                            unsigned int demand,
                            const char *indexname,
                            GtLogger *logger,
                            GtError *err)
{
  bool haserr = false;
  GtEncseqLoader *el;
  GtUword totallength = 0;

  gt_error_check(err);
  initsuffixarray(suffixarray);
  el = gt_encseq_loader_new();
  if (!(demand & SARR_DESTAB))
    gt_encseq_loader_do_not_require_des_tab(el);
  else
    gt_encseq_loader_require_des_tab(el);
  if (!(demand & SARR_SDSTAB))
    gt_encseq_loader_do_not_require_sds_tab(el);
  else
    gt_encseq_loader_require_sds_tab(el);
  if (!(demand & SARR_SSPTAB))
    gt_encseq_loader_do_not_require_ssp_tab(el);
  else
    gt_encseq_loader_require_ssp_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  suffixarray->encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (suffixarray->encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err);
  }
  if (!haserr
        && suffixarray->mirroredencseq
        && !gt_encseq_is_mirrored(suffixarray->encseq))
  {
    if (gt_encseq_mirror(suffixarray->encseq, err) != 0)
      haserr = true;
  }
  if (!haserr)
  {
    totallength = gt_encseq_total_length(suffixarray->encseq);
  }
  if (!haserr && (demand & SARR_SUFTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->suftab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                       GT_SUFTABSUFFIX,
                                       suffixarray->numberofallsortedsuffixes,
                                       sizeof (*suffixarray->suftab),
                                       err);
        if (suffixarray->suftab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
#if defined (_LP64) || defined (_WIN64)
      off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX);

      if (filesize == (off_t) sizeof (uint32_t) *
                              suffixarray->numberofallsortedsuffixes)
      {
        gt_logger_log(logger,"read suftab in units of 4 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t,
                         GT_SUFTABSUFFIX);
      } else
      {
        gt_logger_log(logger,"read suftab in units of 8 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                         GT_SUFTABSUFFIX);
      }
#else
      gt_logger_log(logger,"read suftab in units of 4 bytes");
      INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                       GT_SUFTABSUFFIX);
#endif
    }
    if (!haserr && !suffixarray->longest.defined)
    {
      gt_error_set(err,"longest not defined");
      haserr = true;
    }
  }
  if (!haserr && (demand & SARR_LCPTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->lcptab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_LCPTABSUFFIX,
                                         suffixarray->numberofallsortedsuffixes,
                                         sizeof (*suffixarray->lcptab),
                                         err);
        if (suffixarray->lcptab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar,
                       GT_LCPTABSUFFIX);
      if (!haserr &&
          fseek(suffixarray->lcptabstream.fp,
                (GtWord) sizeof (GtUchar),SEEK_SET))
      {
        gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno));
        haserr = true;
      }
    }
    if (!haserr && !suffixarray->numoflargelcpvalues.defined)
    {
      gt_error_set(err,"numoflargelcpvalues not defined");
      haserr = true;
    }
    if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0)
    {
      if (map)
      {
        suffixarray->llvtab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                           GT_LARGELCPTABSUFFIX,
                                           (GtUword)
                                           suffixarray->numoflargelcpvalues.
                                           valueunsignedlong,
                                           sizeof (*suffixarray->llvtab),
                                           err);
        if (suffixarray->llvtab == NULL)
        {
          haserr = true;
        }
      } else
      {
        INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue,
                         GT_LARGELCPTABSUFFIX);
      }
    }
  }
  if (!haserr && (demand & SARR_BWTTAB))
  {
    if (map)
    {
      suffixarray->bwttab
        = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_BWTTABSUFFIX,
                                         totallength+1,
                                         sizeof (*suffixarray->bwttab),
                                         err);
      if (suffixarray->bwttab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar,
                       GT_BWTTABSUFFIX);
    }
  }
  if (!haserr && (demand & SARR_BCKTAB))
  {
    suffixarray->bcktab
      = gt_bcktab_map(indexname,
                      gt_encseq_alphabetnumofchars(suffixarray->encseq),
                      suffixarray->prefixlength,
                      totallength+1,
                      true,
                      err);
    if (suffixarray->bcktab == NULL)
    {
      haserr = true;
    }
  }
  if (haserr)
  {
    gt_freesuffixarray(suffixarray);
  }
  return haserr ? -1 : 0;
}