Beispiel #1
0
static int encseq_lua_is_mirrored(lua_State *L)
{
  GtEncseq **encseq;
  encseq = check_encseq(L, 1);
  gt_assert(*encseq);
  lua_pushboolean(L, gt_encseq_is_mirrored(*encseq));
  return 1;
}
Beispiel #2
0
static int encseq_lua_unmirror(lua_State *L)
{
  GtEncseq **encseq;
  encseq = check_encseq(L, 1);
  gt_assert(*encseq);
  luaL_argcheck(L, gt_encseq_is_mirrored(*encseq), 1, "is not mirrored");
  gt_encseq_unmirror(*encseq);
  return 0;
}
Beispiel #3
0
static int encseq_lua_mirror(lua_State *L)
{
  GtEncseq **encseq;
  GtError *err = gt_error_new();
  encseq = check_encseq(L, 1);
  gt_assert(*encseq);
  luaL_argcheck(L, !gt_encseq_is_mirrored(*encseq), 1, "is already mirrored");
  if (gt_encseq_mirror(*encseq, err) != 0)
    gt_lua_error(L, err);
  gt_error_delete(err);
  return 0;
}
static void showprjinfo(FILE *outprj,
                        GtReadmode readmode,
                        const GtEncseq *encseq,
                        GtUword numberofallsortedsuffixes,
                        unsigned int prefixlength,
                        GtUword numoflargelcpvalues,
                        double averagelcp,
                        GtUword maxbranchdepth,
                        const Definedunsignedlong *longest)
{
  GtUword totallength;
  GtUword numofsequences;

  totallength = gt_encseq_total_length(encseq);
  fprintf(outprj,"totallength="GT_WU"\n",totallength);
  PRJSPECIALOUT(specialcharacters);
  PRJSPECIALOUT(specialranges);
  PRJSPECIALOUT(realspecialranges);
  PRJSPECIALOUT(lengthofspecialprefix);
  PRJSPECIALOUT(lengthofspecialsuffix);
  PRJSPECIALOUT(wildcards);
  PRJSPECIALOUT(wildcardranges);
  PRJSPECIALOUT(realwildcardranges);
  PRJSPECIALOUT(lengthofwildcardprefix);
  PRJSPECIALOUT(lengthofwildcardsuffix);
  numofsequences = gt_encseq_num_of_sequences(encseq);
  fprintf(outprj,"numofsequences="GT_WU"\n",numofsequences);
  fprintf(outprj,"numofdbsequences="GT_WU"\n",numofsequences);
  fprintf(outprj,"numofquerysequences=0\n");
  fprintf(outprj,"numberofallsortedsuffixes="GT_WU"\n",
          numberofallsortedsuffixes);
  if (longest->defined)
  {
    fprintf(outprj,"longest="GT_WU"\n",longest->valueunsignedlong);
  }
  fprintf(outprj,"prefixlength=%u\n",prefixlength);
  fprintf(outprj,"largelcpvalues="GT_WU"\n",numoflargelcpvalues);
  fprintf(outprj,"averagelcp=%.2f\n",averagelcp);
  fprintf(outprj,"maxbranchdepth="GT_WU"\n",maxbranchdepth);
  fprintf(outprj,"integersize=%u\n",
                  (unsigned int) (sizeof (GtUword) * CHAR_BIT));
  fprintf(outprj,"littleendian=%c\n",gt_is_little_endian() ? '1' : '0');
  fprintf(outprj,"readmode=%u\n",(unsigned int) readmode);
  fprintf(outprj,"mirrored=%c\n", gt_encseq_is_mirrored(encseq) ? '1' : '0');
}
static void gt_readjoiner_assembly_pump_encseq_through_cache(
    const GtEncseq *encseq)
{
  const GtTwobitencoding *twobitencoding = gt_encseq_twobitencoding_export(
      encseq);
  uint64_t sum = 0; /* compute the sum, so that the compiler does no remove the
                       code accessing twobitencoding during optimization */
  GtUword idx, totallength = gt_encseq_total_length(encseq),
                numofunits = ! gt_encseq_is_mirrored(encseq)
                  ? gt_unitsoftwobitencoding(totallength)
                  : gt_unitsoftwobitencoding((totallength - 1)/2);
  for (idx = 0; idx < numofunits; idx++)
    sum += twobitencoding[idx];
  gt_assert(sum > 0);
#ifndef S_SPLINT_S
  gt_log_log("encseq codes-sum: %"PRIu64, sum);
#endif
}
Beispiel #6
0
static int inputsuffixarray(bool map,
                            Suffixarray *suffixarray,
                            unsigned int demand,
                            const char *indexname,
                            GtLogger *logger,
                            GtError *err)
{
  bool haserr = false;
  GtEncseqLoader *el;
  GtUword totallength = 0;

  gt_error_check(err);
  initsuffixarray(suffixarray);
  el = gt_encseq_loader_new();
  if (!(demand & SARR_DESTAB))
    gt_encseq_loader_do_not_require_des_tab(el);
  else
    gt_encseq_loader_require_des_tab(el);
  if (!(demand & SARR_SDSTAB))
    gt_encseq_loader_do_not_require_sds_tab(el);
  else
    gt_encseq_loader_require_sds_tab(el);
  if (!(demand & SARR_SSPTAB))
    gt_encseq_loader_do_not_require_ssp_tab(el);
  else
    gt_encseq_loader_require_ssp_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  suffixarray->encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (suffixarray->encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err);
  }
  if (!haserr
        && suffixarray->mirroredencseq
        && !gt_encseq_is_mirrored(suffixarray->encseq))
  {
    if (gt_encseq_mirror(suffixarray->encseq, err) != 0)
      haserr = true;
  }
  if (!haserr)
  {
    totallength = gt_encseq_total_length(suffixarray->encseq);
  }
  if (!haserr && (demand & SARR_SUFTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->suftab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                       GT_SUFTABSUFFIX,
                                       suffixarray->numberofallsortedsuffixes,
                                       sizeof (*suffixarray->suftab),
                                       err);
        if (suffixarray->suftab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
#if defined (_LP64) || defined (_WIN64)
      off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX);

      if (filesize == (off_t) sizeof (uint32_t) *
                              suffixarray->numberofallsortedsuffixes)
      {
        gt_logger_log(logger,"read suftab in units of 4 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t,
                         GT_SUFTABSUFFIX);
      } else
      {
        gt_logger_log(logger,"read suftab in units of 8 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                         GT_SUFTABSUFFIX);
      }
#else
      gt_logger_log(logger,"read suftab in units of 4 bytes");
      INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                       GT_SUFTABSUFFIX);
#endif
    }
    if (!haserr && !suffixarray->longest.defined)
    {
      gt_error_set(err,"longest not defined");
      haserr = true;
    }
  }
  if (!haserr && (demand & SARR_LCPTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->lcptab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_LCPTABSUFFIX,
                                         suffixarray->numberofallsortedsuffixes,
                                         sizeof (*suffixarray->lcptab),
                                         err);
        if (suffixarray->lcptab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar,
                       GT_LCPTABSUFFIX);
      if (!haserr &&
          fseek(suffixarray->lcptabstream.fp,
                (GtWord) sizeof (GtUchar),SEEK_SET))
      {
        gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno));
        haserr = true;
      }
    }
    if (!haserr && !suffixarray->numoflargelcpvalues.defined)
    {
      gt_error_set(err,"numoflargelcpvalues not defined");
      haserr = true;
    }
    if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0)
    {
      if (map)
      {
        suffixarray->llvtab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                           GT_LARGELCPTABSUFFIX,
                                           (GtUword)
                                           suffixarray->numoflargelcpvalues.
                                           valueunsignedlong,
                                           sizeof (*suffixarray->llvtab),
                                           err);
        if (suffixarray->llvtab == NULL)
        {
          haserr = true;
        }
      } else
      {
        INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue,
                         GT_LARGELCPTABSUFFIX);
      }
    }
  }
  if (!haserr && (demand & SARR_BWTTAB))
  {
    if (map)
    {
      suffixarray->bwttab
        = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_BWTTABSUFFIX,
                                         totallength+1,
                                         sizeof (*suffixarray->bwttab),
                                         err);
      if (suffixarray->bwttab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar,
                       GT_BWTTABSUFFIX);
    }
  }
  if (!haserr && (demand & SARR_BCKTAB))
  {
    suffixarray->bcktab
      = gt_bcktab_map(indexname,
                      gt_encseq_alphabetnumofchars(suffixarray->encseq),
                      suffixarray->prefixlength,
                      totallength+1,
                      true,
                      err);
    if (suffixarray->bcktab == NULL)
    {
      haserr = true;
    }
  }
  if (haserr)
  {
    gt_freesuffixarray(suffixarray);
  }
  return haserr ? -1 : 0;
}
Beispiel #7
0
double *gt_encseq_get_gc(const GtEncseq *encseq,
                         bool with_special,
                         bool calculate,
                         GT_UNUSED GtError *err)
{
  GtEncseqReader *reader;
  GtAlphabet *alphabet;
  double *gc_content;
  /* unit = file or sequence depending on per_file */
  unsigned long char_idx, totallength, max_unit,
                seq_idx = 0,
                nextsep = 0,
                at_count = 0,
                gc_count = 0,
                default_count = 0;
  bool is_mirrored_encseq;
  GtUchar acgt[8], current_c;

  alphabet = gt_encseq_alphabet(encseq);
  gt_assert(gt_alphabet_is_dna(alphabet));
  gt_alphabet_encode_seq(alphabet, acgt,
                         "aAtTcCgG", 8UL);
  totallength = gt_encseq_total_length(encseq);
  reader = gt_encseq_create_reader_with_readmode(encseq,
                                                 GT_READMODE_FORWARD,
                                                 0);
  is_mirrored_encseq = gt_encseq_is_mirrored(encseq);
  if (is_mirrored_encseq)
  {
    max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq));
    gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double));
  }
  else
  {
    max_unit = gt_encseq_num_of_sequences(encseq);
    gc_content = gt_calloc((size_t) max_unit, sizeof (double));
  }

  nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
            gt_encseq_seqlength(encseq, seq_idx);

  for (char_idx = 0; char_idx < totallength; char_idx++)
  {
    if (nextsep == char_idx)
    {
      if (calculate)
      {
        calculate_gc(encseq,
                     gc_content,
                     with_special,
                     seq_idx,
                     gc_count,
                     at_count);
      }
      else
      {
        gc_content[seq_idx] = (double) gc_count;
      }

      seq_idx++;

      nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
                gt_encseq_seqlength(encseq, seq_idx);

      gt_encseq_reader_reinit_with_readmode(reader,
                                            encseq,
                                            GT_READMODE_FORWARD,
                                            char_idx + 1UL);
      gc_count = at_count = default_count = 0UL;
      continue;
    }
    current_c = gt_encseq_reader_next_encoded_char(reader);
    if (current_c == acgt[0] ||
        current_c == acgt[1] ||
        current_c == acgt[2] ||
        current_c == acgt[3])
    {
       at_count++;
    }
    else
    {
      if (current_c == acgt[4] ||
          current_c == acgt[5] ||
          current_c == acgt[6] ||
          current_c == acgt[7])
      {
         gc_count++;
      }
      else
      {
        default_count++;
      }
    }
  }
  if (calculate)
  {
    calculate_gc(encseq,
                 gc_content,
                 with_special,
                 seq_idx,
                 gc_count,
                 at_count);
  }
  else
  {
    gc_content[seq_idx] = (double) gc_count;
  }
  gt_encseq_reader_delete(reader);
  if (is_mirrored_encseq)
  {
    unsigned long double_max_unit = GT_MULT2(max_unit);
    for (seq_idx = 0; seq_idx < max_unit; seq_idx++)
    {
      gc_content[double_max_unit - seq_idx - 1] =
        gc_content[seq_idx];
    }
  }
  return gc_content;
}