コード例 #1
0
static int outputsortedstring2indexviafileptr(const GtEncseq *encseq,
                                              GtUword mersize,
                                              GtUchar *bytebuffer,
                                              GtUword sizeofbuffer,
                                              FILE *merindexfpout,
                                              FILE *countsfilefpout,
                                              GtUword position,
                                              GtUword countocc,
                                              GtArrayLargecount *largecounts,
                                              GtUword countoutputmers,
                                              GT_UNUSED GtError *err)
{
  gt_encseq_sequence2bytecode(bytebuffer,encseq,position,mersize);
  gt_xfwrite(bytebuffer, sizeof (*bytebuffer), (size_t) sizeofbuffer,
             merindexfpout);
  if (countsfilefpout != NULL)
  {
    GtUchar smallcount;

    if (countocc <= MAXSMALLMERCOUNT)
    {
      smallcount = (GtUchar) countocc;
    } else
    {
      Largecount *lc;

      GT_GETNEXTFREEINARRAY(lc,largecounts,Largecount,32);
      lc->idx = countoutputmers;
      lc->value = countocc;
      smallcount = 0;
    }
    gt_xfwrite(&smallcount, sizeof (smallcount),(size_t) 1,countsfilefpout);
  }
  return 0;
}
コード例 #2
0
void gt_wlis_filter_matches_add(GtWLisFilterMatches *wlismatches,
                                GtUword s_start, GtUword s_end,
                                GtUword q_start, GtUword q_end,
                                GtUword distance,
                                bool store_querymatch)
{
  GtUword aligned_len;
  float prob_id;
  GtWlisItem *current_match;

  gt_assert(wlismatches != NULL);
  GT_GETNEXTFREEINARRAY(current_match,&wlismatches->items,GtWlisItem,
                        wlismatches->items.
                                     allocatedGtWlisItem * 0.2 + 256);
  current_match->startpos[0] = s_start;
  current_match->startpos[1] = q_start;
  current_match->endpos[0] = s_end;
  current_match->endpos[1] = q_end;

  if (store_querymatch)
  {
    gt_assert(wlismatches->items.nextfreeGtWlisItem > 0);
    current_match->oi_di.original_index
      = wlismatches->items.nextfreeGtWlisItem - 1;
  } else
  {
    current_match->oi_di.distance = distance;
  }
  aligned_len = gt_wlis_filter_aligned_len(current_match);
  prob_id = (float) (aligned_len - 2 * distance)/aligned_len;
  current_match->weight = prob_id * prob_id;
}
コード例 #3
0
GtBareEncseq *gt_bare_encseq_new(GtUchar *sequence,GtUword len,
                                 GtUword numofchars)
{
  GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq);
  const GtUchar *readptr;
  GtBareSpecialrange *srptr = NULL;
  GtUword lastspecialrange_length = 0;

  bare_encseq->specialcharacters = 0;
  bare_encseq->numofchars = numofchars;
  bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars,
                                     sizeof *bare_encseq->charcount);
  GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange);
  for (readptr = sequence; readptr < sequence + len; readptr++)
  {
    GtUchar cc = *readptr;
    if (ISSPECIAL(cc))
    {
      if (lastspecialrange_length == 0)
      {
        GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                              GtBareSpecialrange,128UL);
        srptr->start = (GtUword) (readptr - sequence);
      }
      lastspecialrange_length++;
      bare_encseq->specialcharacters++;
    } else
    {
      gt_assert((GtUword) cc < bare_encseq->numofchars);
      bare_encseq->charcount[(int) cc]++;
      if (lastspecialrange_length > 0)
      {
        gt_assert(srptr != NULL);
        srptr->length = lastspecialrange_length;
      }
      lastspecialrange_length = 0;
    }
  }
  if (lastspecialrange_length > 0)
  {
    gt_assert(srptr != NULL);
    srptr->length = lastspecialrange_length;
  }
  bare_encseq->sequence = sequence;
  bare_encseq->totallength = len;
  return bare_encseq;
}
コード例 #4
0
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes,
                                       const GtAlphabet *alphabet,
                                       GtError *err)
{
  GtUchar *writeptr = filecontents, *readptr = filecontents;
  const GtUchar *endptr = filecontents + numofbytes;
  bool firstline = true, haserr = false;
  GtUword lastspecialrange_length = 0;
  GtBareSpecialrange *srptr = NULL;
  GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq);
  const GtUchar *smap = gt_alphabet_symbolmap(alphabet);

  bare_encseq->specialcharacters = 0;
  bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet);
  bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars,
                                     sizeof *bare_encseq->charcount);
  GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange);
  readptr = filecontents;
  while (!haserr && readptr < endptr)
  {
    if (*readptr == '>')
    {
      if (!firstline)
      {
        if (lastspecialrange_length == 0)
        {
          GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                                GtBareSpecialrange,128UL);
          srptr->start = (GtUword) (writeptr - filecontents);
        }
        lastspecialrange_length++;
        *writeptr++ = SEPARATOR;
        bare_encseq->specialcharacters++;
      } else
      {
        firstline = false;
      }
      while (readptr < endptr && *readptr != '\n')
      {
        readptr++;
      }
      readptr++;
    } else
    {
      while (readptr < endptr && *readptr != '\n')
      {
        if (!isspace(*readptr))
        {
          GtUchar cc = smap[*readptr];
          if (cc == UNDEFCHAR)
          {
            gt_error_set(err,"illegal input characters %c\n",*readptr);
            haserr = true;
            break;
          }
          if (ISSPECIAL(cc))
          {
            if (lastspecialrange_length == 0)
            {
              GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                                    GtBareSpecialrange,128UL);
              srptr->start = (GtUword) (writeptr - filecontents);
            }
            lastspecialrange_length++;
            bare_encseq->specialcharacters++;
          } else
          {
            gt_assert((GtUword) cc < bare_encseq->numofchars);
            bare_encseq->charcount[(int) cc]++;
            if (lastspecialrange_length > 0)
            {
              gt_assert(srptr != NULL);
              srptr->length = lastspecialrange_length;
            }
            lastspecialrange_length = 0;
          }
          *writeptr++ = cc;
        }
        readptr++;
      }
      readptr++;
    }
  }
  if (lastspecialrange_length > 0)
  {
    gt_assert(srptr != NULL);
    srptr->length = lastspecialrange_length;
  }
  bare_encseq->sequence = filecontents;
  bare_encseq->totallength = (GtUword) (writeptr - filecontents);
  if (haserr)
  {
    gt_bare_encseq_delete(bare_encseq);
    return NULL;
  }
  return bare_encseq;
}