示例#1
0
int gt_region_mapping_get_sequence_length(GtRegionMapping *rm,
                                          unsigned long *length, GtStr *seqid,
                                          GtError *err)
{
  unsigned long filenum, seqnum;
  int had_err;
  gt_error_check(err);
  GT_UNUSED GtRange range;
  gt_assert(rm && seqid);
  if (rm->userawseq) {
    return rm->rawlength;
  }
  had_err = update_seq_col_if_necessary(rm, seqid, err);
  if (!had_err) {
    if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
      had_err = gt_seq_col_md5_to_sequence_length(rm->seq_col, length, seqid,
                                                  err);
    }
    else if (rm->usedesc) {
      gt_assert(rm->seqid2seqnum_mapping);
      had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping,
                                            gt_str_get(seqid), &range, &seqnum,
                                            &filenum, NULL, err);
      if (!had_err)
        *length = gt_seq_col_get_sequence_length(rm->seq_col, filenum, seqnum);
    }
    else if (rm->matchdesc) {
      had_err = gt_seq_col_grep_desc_sequence_length(rm->seq_col, length,
                                                     seqid, err);
    }
    else if (rm->useseqno) {
      unsigned long seqno = GT_UNDEF_ULONG;
      gt_assert(rm->encseq);
      if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) {
        gt_error_set(err, "seqid '%s' does not have the form 'seqX' "
                          "where X is a sequence number in the encoded "
                          "sequence", gt_str_get(seqid));
        had_err = -1;
      }
      gt_assert(had_err || seqno != GT_UNDEF_ULONG);
      if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) {
          gt_error_set(err, "trying to access sequence %lu, but encoded "
                            "sequence contains only %lu sequences",
                            seqno, gt_encseq_num_of_sequences(rm->encseq));
          had_err = -1;
      }
      if (!had_err) {
        *length = gt_encseq_seqlength(rm->encseq, seqno);
      }
    }
    else
      *length = gt_seq_col_get_sequence_length(rm->seq_col, 0, 0);
  }
  return had_err;
}
示例#2
0
static void gt_seqorder_get_hdrsorted_seqnums(const GtEncseq *encseq,
                                              GtUword *seqnums,
                                              GtCompareWithData cmpfunc)
{
  GtUword i;
  gt_assert(encseq != NULL);

  for (i = 0UL; i < gt_encseq_num_of_sequences(encseq); i++)
    seqnums[i] = i;
  (void) gt_qsort_r(seqnums, gt_encseq_num_of_sequences(encseq),
                    sizeof (GtUword), (void*) encseq, cmpfunc);
}
示例#3
0
static void gt_seqorder_sort(GtSuffixsortspace *suffixsortspace,
    GtEncseq *encseq)
{
  unsigned long i;
  Sfxstrategy sfxstrategy;

  defaultsfxstrategy(&sfxstrategy, false);
  for (i = 0; i < gt_encseq_num_of_sequences(encseq); i++)
    gt_suffixsortspace_setdirect(suffixsortspace, i,
        gt_encseq_seqstartpos(encseq, i));
  gt_sortallsuffixesfromstart(suffixsortspace,
      gt_encseq_num_of_sequences(encseq), encseq, GT_READMODE_FORWARD, NULL, 0,
      &sfxstrategy, NULL, NULL, NULL);
}
示例#4
0
static GtBioseq* bioseq_new_with_recreate_and_type(GtStr *sequence_file,
                                                   bool recreate, GtError *err)
{
  GtBioseq *bs;
  int had_err = 0;
  gt_error_check(err);
  bs = gt_calloc(1, sizeof *bs);
  if (!strcmp(gt_str_get(sequence_file), "-"))
    bs->use_stdin = true;
  if (!bs->use_stdin && !gt_file_exists(gt_str_get(sequence_file))) {
    gt_error_set(err, "sequence file \"%s\" does not exist or is not readable",
                 gt_str_get(sequence_file));
    had_err = -1;
  }
  if (!had_err) {
    bs->sequence_file = gt_str_ref(sequence_file);
    had_err = bioseq_fill(bs, recreate, err);
  }
  if (had_err) {
    gt_bioseq_delete(bs);
    return NULL;
  }
  gt_assert(bs->encseq);
  bs->descriptions = gt_calloc(gt_encseq_num_of_sequences(bs->encseq),
                               sizeof (char*));
  return bs;
}
示例#5
0
文件: giextract.c 项目: 9beckert/TIR
int gt_extractkeysfromfastaindex(const char *indexname,
                                 const GtStr *fileofkeystoextract,
                                 unsigned long linewidth,GtError *err)
{
  GtEncseq *encseq = NULL;
  GtEncseqLoader *el = NULL;
  bool haserr = false;
  unsigned long numofdbsequences = 0, keysize = 0;

  el = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    int retval;

    numofdbsequences = gt_encseq_num_of_sequences(encseq);
    retval = readkeysize(indexname,err);
    if (retval < 0)
    {
      haserr = true;
    }
    keysize = (unsigned long) retval;
  }
  if (!haserr)
  {
    char *keytab;
    unsigned long keytablength;

    keytablength = 1UL + numofdbsequences * (keysize+1);
    keytab = gt_fa_mmap_check_size_with_suffix(indexname,
                                               GT_KEYSTABFILESUFFIX,
                                               keytablength,
                                               sizeof (GtUchar),
                                               err);
    if (keytab == NULL)
    {
      haserr = true;
    } else
    {
      if (itersearchoverallkeys(encseq,keytab,numofdbsequences,
                                keysize,fileofkeystoextract,
                                linewidth,err) != 0)
      {
        haserr = true;
      }
    }
    gt_fa_xmunmap(keytab);
  }
  if (encseq != NULL)
  {
    gt_encseq_delete(encseq);
    encseq = NULL;
  }
  return haserr ? -1 : 0;
}
示例#6
0
static int encseq_lua_num_of_sequences(lua_State *L)
{
  GtEncseq **encseq;
  encseq = check_encseq(L, 1);
  lua_pushnumber(L, gt_encseq_num_of_sequences(*encseq));
  return 1;
}
示例#7
0
void gt_initstorematch(Storematchinfo *storematch,
                    const GtEncseq *encseq)
{
  unsigned long numofdbsequences = gt_encseq_num_of_sequences(encseq);

  storematch->encseq = encseq;
  GT_INITBITTAB(storematch->hasmatch,numofdbsequences);
}
示例#8
0
int gt_callenumselfmatches(const char *indexname,
                           GtReadmode queryreadmode,
                           unsigned int userdefinedleastlength,
                           GtProcessquerymatch processquerymatch,
                           void *processquerymatchinfo,
                           GtLogger *logger,
                           GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;

  gt_assert(queryreadmode != GT_READMODE_FORWARD);
  if (gt_mapsuffixarray(&suffixarray,
                        SARR_ESQTAB | SARR_SUFTAB | SARR_SSPTAB,
                        indexname,
                        logger,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    unsigned long seqnum, numofsequences, seqlength, seqstartpos;
    GtQuerymatch *querymatchspaceptr = gt_querymatch_new();
    GtQueryrep queryrep;

    numofsequences = gt_encseq_num_of_sequences(suffixarray.encseq);
    queryrep.sequence = NULL;
    queryrep.reversecopy = false;
    queryrep.encseq = suffixarray.encseq;
    queryrep.readmode = queryreadmode;
    for (seqnum = 0; seqnum < numofsequences; seqnum++)
    {
      seqstartpos = gt_encseq_seqstartpos(suffixarray.encseq, seqnum);
      seqlength = gt_encseq_seqlength(suffixarray.encseq, seqnum);
      if (seqlength >= (unsigned long) userdefinedleastlength)
      {
        queryrep.startpos = seqstartpos;
        queryrep.length = seqlength;
        if (gt_querysubstringmatch(true,
                                   &suffixarray,
                                   (uint64_t) seqnum,
                                   &queryrep,
                                   (unsigned long) userdefinedleastlength,
                                   processquerymatch,
                                   processquerymatchinfo,
                                   querymatchspaceptr,
                                   err) != 0)
        {
          haserr = true;
          break;
        }
      }
    }
    gt_querymatch_delete(querymatchspaceptr);
  }
  gt_freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
示例#9
0
GtUchar gt_bioseq_get_encoded_char(const GtBioseq *bs, GtUword index,
                                   GtUword position)
{
  GtUword startpos;
  gt_assert(bs);
  gt_assert(index < gt_encseq_num_of_sequences(bs->encseq));
  startpos = gt_encseq_seqstartpos(bs->encseq, index);
  return gt_encseq_get_encoded_char(bs->encseq, startpos + position,
                                    GT_READMODE_FORWARD);
}
示例#10
0
GtSeq* gt_bioseq_get_seq(GtBioseq *bs, GtUword idx)
{
  GtSeq *seq;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  seq = gt_seq_new_own(gt_bioseq_get_sequence(bs, idx),
                       gt_bioseq_get_sequence_length(bs, idx),
                       gt_encseq_alphabet(bs->encseq));
  gt_seq_set_description(seq, gt_bioseq_get_description(bs, idx));
  return seq;
}
示例#11
0
static GtUword gt_encseq_col_num_of_seqs(const GtSeqCol *sc,
                                               GtUword filenum)
{
  GtEncseqCol *esc;
  /* XXX cache function evaluated values */
  esc = gt_encseq_col_cast(sc);
  gt_assert(esc && filenum < gt_encseq_num_of_files(esc->encseq));
  if (gt_encseq_num_of_files(esc->encseq) == 1 && filenum == 0)
    return gt_encseq_num_of_sequences(esc->encseq);
  else if (filenum == gt_encseq_num_of_files(esc->encseq) - 1) {
    return (gt_encseq_num_of_sequences(esc->encseq)
              - gt_encseq_filenum_first_seqnum(esc->encseq, filenum));
  } else {
    GtUword firstpos, nextpos;
    gt_assert(filenum < gt_encseq_num_of_files(esc->encseq) - 1);
    firstpos = gt_encseq_filenum_first_seqnum(esc->encseq, filenum);
    nextpos = gt_encseq_filenum_first_seqnum(esc->encseq, filenum + 1);
    return nextpos - firstpos;
  }
}
示例#12
0
void gt_bioseq_get_encoded_sequence(const GtBioseq *bs, GtUchar *out,
                                    GtUword idx)
{
  GtUword startpos;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  startpos = gt_encseq_seqstartpos(bs->encseq, idx);
  gt_encseq_extract_encoded(bs->encseq, out, startpos,
                            startpos
                              + gt_encseq_seqlength(bs->encseq, idx) - 1);
}
示例#13
0
void gt_bioseq_get_encoded_sequence_range(const GtBioseq *bs, GtUchar *out,
                                          GtUword idx,
                                          GtUword start,
                                          GtUword end)
{
  GtUword startpos;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq) && end >= start);
  startpos = gt_encseq_seqstartpos(bs->encseq, idx);
  gt_encseq_extract_encoded(bs->encseq, out, startpos + start, startpos + end);
}
示例#14
0
static int encseq_lua_seqstartpos(lua_State *L)
{
  GtEncseq **encseq;
  GtUword pos;
  encseq = check_encseq(L, 1);
  pos = luaL_checknumber(L, 2);
  luaL_argcheck(L, pos < gt_encseq_num_of_sequences(*encseq), 2,
                "cannot exceed number of sequences");
  lua_pushnumber(L, gt_encseq_seqstartpos(*encseq, pos));
  return 1;
}
示例#15
0
static void gt_seqorder_sort(GtSuffixsortspace *suffixsortspace,
                             const GtEncseq *encseq)
{
    Sfxstrategy sfxstrategy;

    defaultsfxstrategy(&sfxstrategy, false);
    gt_suffixsortspace_init_seqstartpos(suffixsortspace,encseq);
    gt_sortallsuffixesfromstart(suffixsortspace,
                                gt_encseq_num_of_sequences(encseq), encseq, GT_READMODE_FORWARD, NULL, 0,
                                &sfxstrategy, NULL, NULL, NULL);
}
示例#16
0
GtQuerysubstringmatchiterator *gt_querysubstringmatchiterator_new(
                                     const GtEncseq *dbencseq,
                                     GtUword totallength,
                                     const ESASuffixptr *suftabpart,
                                     GtReadmode db_readmode,
                                     GtUword numberofsuffixes,
                                     const GtStrArray *query_files,
                                     const GtEncseq *query_encseq,
                                     GtReadmode query_readmode,
                                     unsigned int userdefinedleastlength,
                                     GtError *err)
{
  GtQuerysubstringmatchiterator *qsmi = gt_malloc(sizeof *qsmi);

  qsmi->dbencseq = dbencseq;
  qsmi->suftabpart = suftabpart;
  qsmi->db_readmode = db_readmode;
  qsmi->numberofsuffixes = numberofsuffixes;
  qsmi->totallength = totallength;
  qsmi->userdefinedleastlength = (GtUword) userdefinedleastlength;
  qsmi->queryunitnum = 0;
  qsmi->desc = NULL;
  qsmi->query_for_seqit = NULL;
  qsmi->query_seqlen = 0;
  qsmi->queryrep.sequence = NULL;
  qsmi->queryrep.encseq = query_encseq;
  qsmi->queryrep.readmode = query_readmode;
  qsmi->queryrep.startpos = 0;
  qsmi->dbstart = 0;
  qsmi->matchlength = 0;
  qsmi->querysubstring.queryrep = &qsmi->queryrep;
  qsmi->mmsi = gt_mmsearchiterator_new_empty();
  qsmi->mmsi_defined = false;
  if (query_files == NULL || gt_str_array_size(query_files) == 0)
  {
    gt_assert(query_encseq != NULL);
    qsmi->seqit = NULL;
    qsmi->query_encseq_numofsequences
      = (uint64_t) gt_encseq_num_of_sequences(query_encseq);
  } else
  {
    gt_assert(query_encseq == NULL);
    qsmi->seqit = gt_seq_iterator_sequence_buffer_new(query_files, err);
    if (qsmi->seqit == NULL)
    {
      gt_querysubstringmatchiterator_delete(qsmi);
      return NULL;
    }
    gt_seq_iterator_set_symbolmap(qsmi->seqit,
                        gt_alphabet_symbolmap(gt_encseq_alphabet(dbencseq)));
  }
  return qsmi;
}
示例#17
0
char* gt_bioseq_get_sequence_range(const GtBioseq *bs, GtUword idx,
                                   GtUword start, GtUword end)
{
  char *out;
  GtUword startpos;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq) && end >= start);
  out = gt_malloc((end - start + 1) * sizeof (char));
  startpos = gt_encseq_seqstartpos(bs->encseq, idx);
  gt_encseq_extract_decoded(bs->encseq, out, startpos + start, startpos + end);
  return out;
}
示例#18
0
static int encseq_lua_description(lua_State *L)
{
  GtEncseq **encseq;
  GtUword seqno, desclen;
  const char *string;
  encseq = check_encseq(L, 1);
  seqno = luaL_checknumber(L, 2);
  luaL_argcheck(L, seqno < gt_encseq_num_of_sequences(*encseq), 2,
                "cannot exceed number of sequences");
  string = gt_encseq_description(*encseq, &desclen, seqno);
  lua_pushlstring(L, string, desclen);
  return 1;
}
GtCondenseq *gt_condenseq_new(const GtEncseq *orig_es, GtLogger *logger)
{
  GtCondenseq *condenseq;
  condenseq = condenseq_new_empty(gt_encseq_alphabet(orig_es));

  condenseq->orig_num_seq = gt_encseq_num_of_sequences(orig_es);

  condenseq->ssptab = condenseq_fill_tab(condenseq, orig_es);
  condenseq->orig_length = gt_encseq_total_length(orig_es);

  condenseq_process_descriptions(condenseq, orig_es, logger);
  return condenseq;
}
示例#20
0
char* gt_bioseq_get_sequence(const GtBioseq *bs, GtUword idx)
{
  char *out;
  GtUword startpos;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  out = gt_calloc(gt_encseq_seqlength(bs->encseq, idx), sizeof (char));
  startpos = gt_encseq_seqstartpos(bs->encseq, idx);
  gt_encseq_extract_decoded(bs->encseq, out, startpos,
                            startpos
                              + gt_encseq_seqlength(bs->encseq, idx) - 1);
  return out;
}
示例#21
0
unsigned long gt_contfind_bottomup(Sequentialsuffixarrayreader *ssar,
                     bool show_progressbar, GtBitsequence *contained,
                     unsigned long firstrevcompl,
                     unsigned long read_length /* 0 = variable */)
{
  ContfindBUstate state;
  unsigned long totallength;
  GT_UNUSED int retval;

  gt_assert(ssar != NULL);
  gt_assert(contained != NULL);

  state.contained = contained;
  state.encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  totallength = gt_encseq_total_length(state.encseq);
  state.nofsequences = gt_encseq_num_of_sequences(state.encseq);

  if (read_length == 0)
  {
    prepare_sspbittab_and_shortest(totallength, &state);
  }
  else
  {
    state.shortest = read_length;
    state.spacing = read_length + 1;
  }

  state.show_progressbar = show_progressbar;
  state.csize            = 0;
  state.cmin             = 0;
  state.firstrevcompl    = firstrevcompl;
  state.counter          = 0;

  if (show_progressbar)
  {
    state.progress = 0;
    gt_progressbar_start(&(state.progress),
        (unsigned long long)totallength);
  }

  retval = (read_length == 0)
      ? gt_esa_bottomup_rdjcv(ssar, &state, NULL)
      : gt_esa_bottomup_rdjce(ssar, &state, NULL);
  gt_assert(retval == 0);

  if (show_progressbar)
    gt_progressbar_stop();
  if (read_length == 0)
    gt_free(state.sspbittab);
  return state.counter;
}
示例#22
0
GtSeq* gt_bioseq_get_seq_range(GtBioseq *bs, GtUword idx,
                               GtUword start, GtUword end)
{
  GtSeq *seq;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  gt_assert(end >= start);
  gt_assert(end - start + 1 > gt_encseq_seqlength(bs->encseq, idx));
  seq = gt_seq_new_own(gt_bioseq_get_sequence_range(bs, idx, start, end),
                       end - start + 1,
                       gt_encseq_alphabet(bs->encseq));
  gt_seq_set_description(seq, gt_bioseq_get_description(bs, idx));
  return seq;
}
示例#23
0
void gt_bioseq_show_gc_content(GtBioseq *bs, GtFile *outfp)
{
  gt_assert(bs);
  if (gt_alphabet_is_dna(gt_encseq_alphabet(bs->encseq))) {
    GtUword i, GT_UNUSED purecharlen;
    GtStr *str = gt_str_new();
    purecharlen = gt_encseq_total_length(bs->encseq)
                    - gt_encseq_num_of_sequences(bs->encseq) + 1;
    for (i=0; i < gt_encseq_num_of_sequences(bs->encseq); i++) {
      char *tmp;
      tmp = gt_bioseq_get_sequence(bs, i);
      gt_str_append_cstr(str, tmp);
      gt_free(tmp);
    }
    gt_assert(gt_str_length(str) == purecharlen);
    gt_file_xprintf(outfp, "showing GC-content for sequence file \"%s\"\n",
                    gt_str_get(bs->sequence_file));
    gt_gc_content_show(gt_str_get(str),
                       gt_str_length(str),
                       gt_encseq_alphabet(bs->encseq),
                       outfp);
    gt_str_delete(str);
  }
}
示例#24
0
static char* gt_encseq_col_get_description(const GtSeqCol *sc,
                                           GtUword filenum,
                                           GtUword seqnum)
{
  GtEncseqCol *esc;
  const char *desc;
  GtUword encseq_seqnum, desclen;
  esc = gt_encseq_col_cast(sc);
  gt_assert(esc && filenum < gt_encseq_num_of_files(esc->encseq));
  encseq_seqnum = gt_encseq_filenum_first_seqnum(esc->encseq, filenum) + seqnum;
  gt_assert(encseq_seqnum < gt_encseq_num_of_sequences(esc->encseq));
  desc = gt_encseq_description(esc->encseq, &desclen, encseq_seqnum);
  gt_assert(desc && desclen > 0);
  return gt_cstr_dup_nt(desc, desclen);;
}
示例#25
0
const char* gt_bioseq_get_description(GtBioseq *bs, GtUword idx)
{
  const char *desc;
  char *mydesc;
  GtUword desclen;
  gt_assert(bs && bs->encseq);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  if (!(mydesc = bs->descriptions[idx])) {
    desc = gt_encseq_description(bs->encseq, &desclen, idx);
    mydesc = gt_calloc(desclen + 1, sizeof (char));
    strncpy(mydesc, desc, desclen);
    bs->descriptions[idx] = mydesc;
  }
  return (const char*) mydesc;
}
示例#26
0
void gt_bioseq_delete(GtBioseq *bs)
{
  GtUword i;
  if (!bs) return;
  gt_str_delete(bs->sequence_file);
  gt_md5_tab_delete(bs->md5_tab);
  if (bs->descriptions) {
    for (i = 0; i < gt_encseq_num_of_sequences(bs->encseq); i++) {
      gt_free(bs->descriptions[i]);
    }
    gt_free(bs->descriptions);
  }
  gt_encseq_delete(bs->encseq);
  gt_free(bs);
}
示例#27
0
static int gt_encseq_col_do_grep_desc(GtEncseqCol *esc, GtUword *filenum,
                                      GtUword *seqnum, GtStr *seqid,
                                      GtError *err)
{
  GtUword j;
  const GtSeqInfo *seq_info_ptr;
  GtSeqInfo seq_info;
  bool match = false;
  int had_err = 0;
  gt_error_check(err);

  gt_assert(esc && filenum && seqnum && seqid);
  /* create cache */
  if (!esc->grep_cache)
    esc->grep_cache = gt_seq_info_cache_new();
  /* try to read from cache */
  seq_info_ptr = gt_seq_info_cache_get(esc->grep_cache, gt_str_get(seqid));
  if (seq_info_ptr) {
    *filenum = seq_info_ptr->filenum;
    *seqnum = seq_info_ptr->seqnum;
    return 0;
  }
  for (j = 0; !had_err && j < gt_encseq_num_of_sequences(esc->encseq); j++) {
    const char *desc;
    char *buf;
    GtUword desc_len;
    desc = gt_encseq_description(esc->encseq, &desc_len, j);
    buf = gt_calloc(desc_len + 1, sizeof (char));
    memcpy(buf, desc, desc_len * sizeof (char));
    had_err = gt_grep(&match, gt_str_get(seqid), buf, err);
    gt_free(buf);
    if (!had_err && match) {
      *filenum = seq_info.filenum =
                       gt_encseq_filenum(esc->encseq,
                                         gt_encseq_seqstartpos(esc->encseq, j));
      *seqnum = seq_info.seqnum =
                      j - gt_encseq_filenum_first_seqnum(esc->encseq, *filenum);
      gt_seq_info_cache_add(esc->grep_cache, gt_str_get(seqid), &seq_info);
      break;
    }
  }
  if (!had_err && !match) {
    gt_error_set(err, "no description matched sequence ID '%s'",
                 gt_str_get(seqid));
    had_err = -1;
  }
  return had_err;
}
示例#28
0
void gt_bioseq_show_stat(GtBioseq *bs, GtFile *outfp)
{
  GtUword i, num_of_seqs;
  gt_assert(bs);
  num_of_seqs = gt_bioseq_number_of_sequences(bs);
  gt_file_xprintf(outfp, "showing statistics for sequence file \"%s\"\n",
                  gt_str_get(bs->sequence_file));
  gt_file_xprintf(outfp, "number of sequences: "GT_WU"\n", num_of_seqs);
  gt_file_xprintf(outfp, "total length: "GT_WU"\n",
                    gt_encseq_total_length(bs->encseq)
                      - gt_encseq_num_of_sequences(bs->encseq) + 1);
  for (i = 0; i < num_of_seqs; i++) {
    gt_file_xprintf(outfp, "sequence #"GT_WU" length: "GT_WU"\n", i+1,
                    gt_bioseq_get_sequence_length(bs, i));
  }
}
示例#29
0
static void showprjinfo(FILE *outprj,
                        GtReadmode readmode,
                        const GtEncseq *encseq,
                        GtUword numberofallsortedsuffixes,
                        unsigned int prefixlength,
                        GtUword numoflargelcpvalues,
                        double averagelcp,
                        GtUword maxbranchdepth,
                        const Definedunsignedlong *longest)
{
  GtUword totallength;
  GtUword numofsequences;

  totallength = gt_encseq_total_length(encseq);
  fprintf(outprj,"totallength="GT_WU"\n",totallength);
  PRJSPECIALOUT(specialcharacters);
  PRJSPECIALOUT(specialranges);
  PRJSPECIALOUT(realspecialranges);
  PRJSPECIALOUT(lengthofspecialprefix);
  PRJSPECIALOUT(lengthofspecialsuffix);
  PRJSPECIALOUT(wildcards);
  PRJSPECIALOUT(wildcardranges);
  PRJSPECIALOUT(realwildcardranges);
  PRJSPECIALOUT(lengthofwildcardprefix);
  PRJSPECIALOUT(lengthofwildcardsuffix);
  numofsequences = gt_encseq_num_of_sequences(encseq);
  fprintf(outprj,"numofsequences="GT_WU"\n",numofsequences);
  fprintf(outprj,"numofdbsequences="GT_WU"\n",numofsequences);
  fprintf(outprj,"numofquerysequences=0\n");
  fprintf(outprj,"numberofallsortedsuffixes="GT_WU"\n",
          numberofallsortedsuffixes);
  if (longest->defined)
  {
    fprintf(outprj,"longest="GT_WU"\n",longest->valueunsignedlong);
  }
  fprintf(outprj,"prefixlength=%u\n",prefixlength);
  fprintf(outprj,"largelcpvalues="GT_WU"\n",numoflargelcpvalues);
  fprintf(outprj,"averagelcp=%.2f\n",averagelcp);
  fprintf(outprj,"maxbranchdepth="GT_WU"\n",maxbranchdepth);
  fprintf(outprj,"integersize=%u\n",
                  (unsigned int) (sizeof (GtUword) * CHAR_BIT));
  fprintf(outprj,"littleendian=%c\n",gt_is_little_endian() ? '1' : '0');
  fprintf(outprj,"readmode=%u\n",(unsigned int) readmode);
  fprintf(outprj,"mirrored=%c\n", gt_encseq_is_mirrored(encseq) ? '1' : '0');
}
示例#30
0
static inline void calculate_gc(const GtEncseq *encseq,
                                double *gc_content,
                                bool with_special,
                                unsigned long seq_idx,
                                unsigned long gc_count,
                                unsigned long at_count)
{
  if (with_special)
  {
    gt_assert(seq_idx < gt_encseq_num_of_sequences(encseq));
    gc_content[seq_idx] =
      (double) gc_count / (double) gt_encseq_seqlength(encseq, seq_idx);
  }
  else
  {
    gc_content[seq_idx] = (double) gc_count / (double) (gc_count + at_count);
  }
}