Exemplo n.º 1
0
static int outputsortedstring2indexviafileptr(const GtEncseq *encseq,
                                              GtUword mersize,
                                              GtUchar *bytebuffer,
                                              GtUword sizeofbuffer,
                                              FILE *merindexfpout,
                                              FILE *countsfilefpout,
                                              GtUword position,
                                              GtUword countocc,
                                              GtArrayLargecount *largecounts,
                                              GtUword countoutputmers,
                                              GT_UNUSED GtError *err)
{
  gt_encseq_sequence2bytecode(bytebuffer,encseq,position,mersize);
  gt_xfwrite(bytebuffer, sizeof (*bytebuffer), (size_t) sizeofbuffer,
             merindexfpout);
  if (countsfilefpout != NULL)
  {
    GtUchar smallcount;

    if (countocc <= MAXSMALLMERCOUNT)
    {
      smallcount = (GtUchar) countocc;
    } else
    {
      Largecount *lc;

      GT_GETNEXTFREEINARRAY(lc,largecounts,Largecount,32);
      lc->idx = countoutputmers;
      lc->value = countocc;
      smallcount = 0;
    }
    gt_xfwrite(&smallcount, sizeof (smallcount),(size_t) 1,countsfilefpout);
  }
  return 0;
}
Exemplo n.º 2
0
static inline void gt_cntlist_show_bit(GtBitsequence *cntlist,
    GtUword nofreads, FILE *file)
{
  gt_assert(file != NULL);
  gt_xfputc(GT_CNTLIST_BIT_HEADER, file);
  gt_xfputc((char)sizeof(GtUword), file);
  gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file);
  gt_xfwrite(cntlist, sizeof (GtBitsequence), GT_NUMOFINTSFORBITS(nofreads),
      file);
}
Exemplo n.º 3
0
/* FIXME: convert to platform-independent variant */
int
SRLSaveToStream(struct seqRangeList *rangeList, FILE *fp)
{
  size_t numRanges;
  gt_assert(rangeList && fp);
  numRanges = rangeList->numRanges;
  gt_xfwrite(&(rangeList->numRanges), sizeof (rangeList->numRanges), 1, fp);
  gt_xfwrite(rangeList->ranges, sizeof (struct seqRange), numRanges, fp);
  return 1;
}
Exemplo n.º 4
0
static void gt_seqorder_output(unsigned long seqnum, GtEncseq *encseq)
{
  GtEncseqReader *esr;
  unsigned long startpos, len, desclen = 0;
  const char *desc = NULL;
  unsigned long i;

  startpos = gt_encseq_seqstartpos(encseq, seqnum);
  len = gt_encseq_seqlength(encseq, seqnum);
  gt_xfputc(GT_FASTA_SEPARATOR, stdout);
  if (gt_encseq_has_description_support(encseq))
  {
    desc = gt_encseq_description(encseq, &desclen, seqnum);
    gt_xfwrite(desc, (size_t)1, (size_t)desclen, stdout);
  }
  gt_xfputc('\n', stdout);
  esr = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD,
      startpos);
  for (i = 0; i < len; i++)
  {
    gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout);
  }
  gt_encseq_reader_delete(esr);
  gt_xfputc('\n', stdout);
}
Exemplo n.º 5
0
void gt_cntlist_write_bin_header(GtUword nofreads, FILE *file)
{
  gt_assert(file != NULL);
  gt_xfputc(GT_CNTLIST_BIN_HEADER, file);
  gt_xfputc((char)sizeof(GtUword), file);
  gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file);
}
Exemplo n.º 6
0
static GtUword outmany0lcpvalues(GtUword many,
                                       FILE *outfplcptab)
{
  GtUword i, countout;
#define GT_LCPBUF_NUMBEROFZEROS 1024
  uint8_t outvalues[GT_LCPBUF_NUMBEROFZEROS] = {0};

  countout = many/GT_LCPBUF_NUMBEROFZEROS;
  for (i=0; i<countout; i++)
  {
    gt_xfwrite(outvalues,sizeof (uint8_t),(size_t) GT_LCPBUF_NUMBEROFZEROS,
               outfplcptab);
  }
  gt_xfwrite(outvalues,sizeof (uint8_t),(size_t) many % GT_LCPBUF_NUMBEROFZEROS,
             outfplcptab);
  return many;
}
Exemplo n.º 7
0
void gt_leftborderbuffer_flush(GtLeftborderOutbuffer *leftborderbuffer)
{
  gt_xfwrite(leftborderbuffer->spaceuint32_t,
             sizeof (*leftborderbuffer->spaceuint32_t),
             (size_t) leftborderbuffer->nextfree,
             leftborderbuffer->fp);
  leftborderbuffer->totalwrite += leftborderbuffer->nextfree;
  leftborderbuffer->nextfree = 0;
}
Exemplo n.º 8
0
int pckbucket2file(const GtStr *indexname,const Pckbuckettable *pckbuckettable,
                   GtError *err)
{
  FILE *fp;
  Seqpos seqposmaxdepth;

  gt_error_check(err);
  fp = opensfxfile(indexname,PCKBUCKETTABLE,"wb",err);
  if (fp == NULL)
  {
    return -1;
  }
  seqposmaxdepth = (Seqpos) pckbuckettable->maxdepth;
  gt_xfwrite(&seqposmaxdepth,sizeof (Seqpos),(size_t) 1,fp);
  gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab),
             (size_t) pckbuckettable->maxnumofvalues,fp);
  gt_fa_fclose(fp);
  return 0;
}
Exemplo n.º 9
0
void gt_file_xwrite(GtFile *file, void *buf, size_t nbytes)
{
  if (!file) {
    gt_xfwrite(buf, 1, nbytes, stdout);
    return;
  }
  switch (file->mode) {
    case GT_FILE_MODE_UNCOMPRESSED:
      gt_xfwrite(buf, 1, nbytes, file->fileptr.file);
      break;
    case GT_FILE_MODE_GZIP:
      gt_xgzwrite(file->fileptr.gzfile, buf, nbytes);
      break;
    case GT_FILE_MODE_BZIP2:
      gt_xbzwrite(file->fileptr.bzfile, buf, nbytes);
      break;
    default: gt_assert(0);
  }
}
Exemplo n.º 10
0
static void gt_Sfxmappedrange_storetmp(GtSfxmappedrange *sfxmappedrange,
                                       GtSfxStoretype usedptrptr,
                                       GtSfxmappedrangetype type,
                                       bool writable)
{
  FILE *outfp;

  gt_assert(sfxmappedrange != NULL);
  sfxmappedrange->ptr = NULL;
  sfxmappedrange->filename = gt_str_new();
  sfxmappedrange->writable = writable;
  outfp = gt_xtmpfp(sfxmappedrange->filename);
  gt_assert(outfp != NULL);
  gt_log_log("write %s to file %s ("GT_WU" units of "GT_WU" bytes)",
             gt_str_get(sfxmappedrange->tablename),
             gt_str_get(sfxmappedrange->filename),
             (GtUword) sfxmappedrange->numofunits,
             (GtUword) sfxmappedrange->sizeofunit);
  switch (type) {
    case GtSfxGtBitsequence:
      gt_xfwrite(*(usedptrptr.bs),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.bs;
      gt_free(*(usedptrptr.bs));
      *(usedptrptr.bs) = NULL;
      break;
    case GtSfxunsignedlong:
      gt_xfwrite(*(usedptrptr.ulong),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.ulong;
      gt_free(*(usedptrptr.ulong));
      *(usedptrptr.ulong) = NULL;
      break;
    case GtSfxuint32_t:
      gt_xfwrite(*(usedptrptr.uint32),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.uint32;
      gt_free(*(usedptrptr.uint32));
      *(usedptrptr.uint32) = NULL;
      break;
   }
   gt_fa_fclose(outfp);
}
Exemplo n.º 11
0
void gt_alphabet_output(const GtAlphabet *alphabet, FILE *fpout)
{
  GtStr *buf;
  gt_assert(alphabet && fpout);
  buf = gt_str_new();
  gt_alphabet_to_str(alphabet, buf);
  gt_xfwrite(gt_str_get(buf), sizeof (char), (size_t) gt_str_length(buf),
             fpout);
  gt_str_delete(buf);
}
Exemplo n.º 12
0
static void outsmalllcpvalues(Lcpoutput2file *lcp2file,
                              GtUword numoflcps)
{
  gt_assert (lcp2file != NULL);
  lcp2file->countoutputlcpvalues += numoflcps;
  gt_assert(lcp2file->outfplcptab != NULL);
  gt_xfwrite(lcp2file->smalllcpvalues,
             sizeof (*lcp2file->smalllcpvalues),
             (size_t) numoflcps,
             lcp2file->outfplcptab);
}
Exemplo n.º 13
0
int gt_pckbuckettable_2file(const char *indexname,
                            const Pckbuckettable *pckbuckettable,
                            GtError *err)
{
  FILE *fp;
  unsigned long seqposmaxdepth;

  gt_error_check(err);
  fp = gt_fa_fopen_with_suffix(indexname,PCKBUCKETTABLE,"wb",err);
  if (fp == NULL)
  {
    return -1;
  }
  seqposmaxdepth = (unsigned long) pckbuckettable->maxdepth;
  gt_xfwrite(&seqposmaxdepth,sizeof (unsigned long),(size_t) 1,fp);
  gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab),
             (size_t) pckbuckettable->maxnumofvalues,fp);
  gt_fa_fclose(fp);
  return 0;
}
Exemplo n.º 14
0
void gt_suffixsortspace_to_file (FILE *outfpsuftab,
                                const GtSuffixsortspace *sssp,
                                unsigned long numberofsuffixes)
{
  size_t basesize = sssp->ulongtab != NULL ? sizeof (*sssp->ulongtab)
                                           : sizeof (*sssp->uinttab);

  gt_xfwrite(sssp->ulongtab != NULL ? (void *) sssp->ulongtab
                                    : (void *) sssp->uinttab,
             basesize,
             (size_t) numberofsuffixes,
             outfpsuftab);
}
Exemplo n.º 15
0
void gt_bitoutstream_append(GtBitOutStream *bitstream,
                            GtBitsequence code,
                            unsigned long bits2write)
{
  if (bitstream->bits_left < bits2write) {
    unsigned int overhang = 0;
    overhang = bits2write - bitstream->bits_left;
    bitstream->bitseqbuffer |= code >> overhang;
    gt_xfwrite(&bitstream->bitseqbuffer,
               sizeof (GtBitsequence),
               1, bitstream->fp);
    bitstream->bitseqbuffer = 0;
    bitstream->bits_left = GT_INTWORDSIZE - overhang;
  }
Exemplo n.º 16
0
int gt_mapspec_pad(FILE *fp, GtUword *bytes_written,
                   GtUword byteoffset, GT_UNUSED GtError *err)
{
  if (byteoffset % (GtUword) GT_WORDSIZE_INBYTES > 0)
  {
    GtUchar padbuffer[GT_WORDSIZE_INBYTES-1] = {0};

    size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES);
    gt_xfwrite(padbuffer,sizeof (GtUchar),padunits,fp);
    *bytes_written = (GtUword) padunits;
  } else
  {
    *bytes_written = 0;
  }
  return 0;
}
Exemplo n.º 17
0
static int giextract_encodedseq2fasta(FILE *fpout,
                                      const GtEncseq *encseq,
                                      unsigned long seqnum,
                                      const Fastakeyquery *fastakeyquery,
                                      unsigned long linewidth,
                                      GT_UNUSED GtError *err)
{
  const char *desc;
  unsigned long desclen;
  bool haserr = false;

  desc = gt_encseq_description(encseq, &desclen, seqnum);
  gt_xfputc('>',fpout);
  if (fastakeyquery != NULL && !COMPLETE(fastakeyquery))
  {
    printf("%s %lu %lu ",fastakeyquery->fastakey,
                         fastakeyquery->frompos,
                         fastakeyquery->topos);
  }
  gt_xfwrite(desc,sizeof *desc,(size_t) desclen,fpout);
  if (!haserr)
  {
    unsigned long frompos, topos, seqstartpos, seqlength ;

    gt_xfputc('\n',fpout);
    seqstartpos = gt_encseq_seqstartpos(encseq, seqnum);
    seqlength = gt_encseq_seqlength(encseq, seqnum);
    if (fastakeyquery != NULL && !COMPLETE(fastakeyquery))
    {
      frompos = fastakeyquery->frompos-1;
      topos = fastakeyquery->topos - fastakeyquery->frompos + 1;
    } else
    {
      frompos = 0;
      topos = seqlength;
    }
    gt_encseq2symbolstring(fpout,
                           encseq,
                           GT_READMODE_FORWARD,
                           seqstartpos + frompos,
                           topos,
                           linewidth);
  }
  return haserr ? -1 : 0;
}
Exemplo n.º 18
0
static void bssm_param_plain_write(const GthBSSMParam *bssm_param, FILE *outfp)
{
    GtStr *str;
    gt_assert(bssm_param && outfp);
    str = gt_str_new();
    gt_str_append_cstr(str, "BSSM = {\n");
    if (bssm_param->gt_donor_model_set) {
        write_model(str, "gt_donor_model", &bssm_param->gt_donor_model);
        gt_str_append_cstr(str, ",\n");
    }
    if (bssm_param->gc_donor_model_set) {
        write_model(str, "gc_donor_model", &bssm_param->gc_donor_model);
        gt_str_append_cstr(str, ",\n");
    }
    if (bssm_param->ag_acceptor_model_set) {
        write_model(str, "ag_acceptor_model", &bssm_param->ag_acceptor_model);
        gt_str_append_char(str, '\n');
    }
    gt_str_append_cstr(str, "}\n");
    gt_xfwrite(gt_str_get(str), sizeof (char), gt_str_length(str), outfp);
    gt_str_delete(str);
}
Exemplo n.º 19
0
int gt_alphabet_to_file(const GtAlphabet *alphabet, const char *indexname,
                        GtError *err)
{
  FILE *al1fp;
  bool haserr = false;

  gt_error_check(err);
  al1fp = gt_fa_fopen_with_suffix(indexname,GT_ALPHABETFILESUFFIX,"wb",err);
  if (al1fp == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    GtStr *buf = gt_str_new();
    gt_alphabet_to_str(alphabet, buf);
    gt_xfwrite(gt_str_get(buf), sizeof (char), (size_t) gt_str_length(buf),
               al1fp);
    gt_fa_xfclose(al1fp);
    gt_str_delete(buf);
  }
  return haserr ? -1 : 0;
}
Exemplo n.º 20
0
static int output_sequence(GtEncseq *encseq, GtEncseqDecodeArguments *args,
                           const char *filename, GtError *err)
{
  GtUword i, j, sfrom, sto;
  int had_err = 0;
  bool has_desc;
  GtEncseqReader *esr;
  gt_assert(encseq);

  if (!(has_desc = gt_encseq_has_description_support(encseq)))
    gt_warning("Missing description support for file %s", filename);

  if (strcmp(gt_str_get(args->mode), "fasta") == 0) {
    /* specify a single sequence to extract */
    if (args->seq != GT_UNDEF_UWORD) {
      if (args->seq >= gt_encseq_num_of_sequences(encseq)) {
        gt_error_set(err,
                     "requested sequence "GT_WU" exceeds number of sequences "
                     "("GT_WU")", args->seq,
                     gt_encseq_num_of_sequences(encseq));
        return -1;
      }
      sfrom = args->seq;
      sto = args->seq + 1;
    } else if (args->seqrng.start != GT_UNDEF_UWORD
                 && args->seqrng.end != GT_UNDEF_UWORD) {
      /* specify a sequence range to extract */
      if (args->seqrng.start >= gt_encseq_num_of_sequences(encseq)
            || args->seqrng.end >= gt_encseq_num_of_sequences(encseq)) {
        gt_error_set(err,
                     "range "GT_WU"-"GT_WU" includes a sequence number "
                     "exceeding the total number of sequences ("GT_WU")",
                     args->seqrng.start,
                     args->seqrng.end,
                     gt_encseq_num_of_sequences(encseq));
        return -1;
      }
      sfrom = args->seqrng.start;
      sto = args->seqrng.end + 1;
    } else {
      /* extract all sequences */
      sfrom = 0;
      sto = gt_encseq_num_of_sequences(encseq);
    }
    for (i = sfrom; i < sto; i++) {
      GtUword desclen, startpos, len;
      char buf[BUFSIZ];
      const char *desc = NULL;
      /* XXX: maybe make this distinction in the functions via readmode? */
      if (!GT_ISDIRREVERSE(args->rm)) {
        startpos = gt_encseq_seqstartpos(encseq, i);
        len = gt_encseq_seqlength(encseq, i);
        if (has_desc) {
          desc = gt_encseq_description(encseq, &desclen, i);
        } else {
          (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i);
          desclen = strlen(buf);
          desc = buf;
        }
      } else {
        startpos = gt_encseq_seqstartpos(encseq, i);
        len = gt_encseq_seqlength(encseq,
                                  gt_encseq_num_of_sequences(encseq)-1-i);
        startpos = gt_encseq_total_length(encseq)
                     - (gt_encseq_seqstartpos(encseq,
                                              gt_encseq_num_of_sequences(
                                                encseq)-1-i) + len);
        if (has_desc) {
          desc = gt_encseq_description(encseq,
                                       &desclen,
                                       gt_encseq_num_of_sequences(encseq)-1-i);
        } else {
          (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i);
          desclen = strlen(buf);
          desc = buf;
        }
      }
      gt_assert(desc);
      /* output description */
      gt_xfputc(GT_FASTA_SEPARATOR, stdout);
      gt_xfwrite(desc, 1, desclen, stdout);
      gt_xfputc('\n', stdout);
      /* XXX: make this more efficient by writing in a buffer first and then
         showing the result */
      if (args->singlechars) {
        for (j = 0; j < len; j++) {
           gt_xfputc(gt_encseq_get_decoded_char(encseq,
                                                startpos + j,
                                                args->rm),
                     stdout);
        }
      } else {
        esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, startpos);
        for (j = 0; j < len; j++) {
           gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout);
        }
        gt_encseq_reader_delete(esr);
      }
      gt_xfputc('\n', stdout);
    }
  }

  if (strcmp(gt_str_get(args->mode), "concat") == 0) {
    GtUword from = 0,
                  to = gt_encseq_total_length(encseq) - 1;
    if (args->rng.start != GT_UNDEF_UWORD && args->rng.end != GT_UNDEF_UWORD) {
      if (args->rng.end > to) {
        had_err = -1;
        gt_error_set(err,
                     "end of range ("GT_WU") exceeds encoded sequence length "
                     "("GT_WU")", args->rng.end, to);
      }
      if (!had_err) {
        from = args->rng.start;
        to = args->rng.end;
      }
    }
    if (!had_err) {
      if (args->singlechars) {
        for (j = from; j <= to; j++) {
          char cc = gt_encseq_get_decoded_char(encseq, j, args->rm);
          if (cc == (char) SEPARATOR)
            cc = gt_str_get(args->sepchar)[0];
          gt_xfputc(cc, stdout);
        }
      } else {
        esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, from);
        if (esr) {
          for (j = from; j <= to; j++) {
            char cc = gt_encseq_reader_next_decoded_char(esr);
            if (cc == (char) SEPARATOR)
              cc = gt_str_get(args->sepchar)[0];
            gt_xfputc(cc, stdout);
          }
          gt_encseq_reader_delete(esr);
        }
      }
      gt_xfputc('\n', stdout);
    }
  }
  return had_err;
}
Exemplo n.º 21
0
static int enumeratelcpintervals(const char *inputindex,
                                 Sequentialsuffixarrayreader *ssar,
                                 const char *storeindex,
                                 bool storecounts,
                                 GtUword mersize,
                                 GtUword minocc,
                                 GtUword maxocc,
                                 bool performtest,
                                 GtLogger *logger,
                                 GtError *err)
{
  TyrDfsstate *state;
  bool haserr = false;
  unsigned int alphasize;

  gt_error_check(err);
  state = gt_malloc(sizeof (*state));
  GT_INITARRAY(&state->occdistribution,Countwithpositions);
  state->esrspace = gt_encseq_create_reader_with_readmode(
                                   gt_encseqSequentialsuffixarrayreader(ssar),
                                   gt_readmodeSequentialsuffixarrayreader(ssar),
                                   0);
  state->mersize = (GtUword) mersize;
  state->encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq));
  state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar);
  state->storecounts = storecounts;
  state->minocc = minocc;
  state->maxocc = maxocc;
  state->totallength = gt_encseq_total_length(state->encseq);
  state->performtest = performtest;
  state->countoutputmers = 0;
  state->merindexfpout = NULL;
  state->countsfilefpout = NULL;
  GT_INITARRAY(&state->largecounts,Largecount);
  if (strlen(storeindex) == 0)
  {
    state->sizeofbuffer = 0;
    state->bytebuffer = NULL;
  } else
  {
    state->sizeofbuffer = MERBYTES(mersize);
    state->bytebuffer = gt_malloc(sizeof *state->bytebuffer
                                  * state->sizeofbuffer);
  }
  if (performtest)
  {
    state->currentmer = gt_malloc(sizeof *state->currentmer
                                  * state->mersize);
    state->suftab = gt_suftabSequentialsuffixarrayreader(ssar);
  } else
  {
    state->currentmer = NULL;
    state->suftab = NULL;
  }
  if (state->mersize > state->totallength)
  {
    gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed",
                 state->mersize,
                 state->totallength);
    haserr = true;
  } else
  {
    if (strlen(storeindex) == 0)
    {
      state->processoccurrencecount = adddistpos2distribution;
    } else
    {
      state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX,
                                                    "wb",err);
      if (state->merindexfpout == NULL)
      {
        haserr = true;
      } else
      {
        if (state->storecounts)
        {
          state->countsfilefpout
            = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err);
          if (state->countsfilefpout == NULL)
          {
            haserr = true;
          }
        }
      }
      state->processoccurrencecount = outputsortedstring2index;
    }
    if (!haserr)
    {
      if (gt_depthfirstesa(ssar,
                          tyr_allocateDfsinfo,
                          tyr_freeDfsinfo,
                          tyr_processleafedge,
                          NULL,
                          tyr_processcompletenode,
                          tyr_assignleftmostleaf,
                          tyr_assignrightmostleaf,
                          (Dfsstate*) state,
                          logger,
                          err) != 0)
      {
        haserr = true;
      }
      if (strlen(storeindex) == 0)
      {
        showfinalstatistics(state,inputindex,logger);
      }
    }
    if (!haserr)
    {
      if (state->countsfilefpout != NULL)
      {
        gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU
                      " to file \"%s%s\"",
                      state->largecounts.nextfreeLargecount,
                      (GtUword) MAXSMALLMERCOUNT,
                      storeindex,
                      COUNTSSUFFIX);
        gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount),
                  (size_t) state->largecounts.nextfreeLargecount,
                  state->countsfilefpout);
      }
    }
    if (!haserr)
    {
      gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"",
                  mersize,
                  state->countoutputmers);
      gt_logger_log(logger,"index size: %.2f megabytes\n",
                  GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer +
                               sizeof (GtUword) * EXTRAINTEGERS));
    }
  }
  /* now out EXTRAINTEGERS integer values */
  if (!haserr && state->merindexfpout != NULL)
  {
    outputbytewiseUlongvalue(state->merindexfpout,
                             (GtUword) state->mersize);
    outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize);
  }
  gt_fa_xfclose(state->merindexfpout);
  gt_fa_xfclose(state->countsfilefpout);
  GT_FREEARRAY(&state->occdistribution,Countwithpositions);
  gt_free(state->currentmer);
  gt_free(state->bytebuffer);
  GT_FREEARRAY(&state->largecounts,Largecount);
  gt_encseq_reader_delete(state->esrspace);
  gt_free(state);
  return haserr ? -1 : 0;
}
Exemplo n.º 22
0
static void outlcpvalues(Lcpsubtab *lcpsubtab,
                         GtUword width,
                         GtUword posoffset)
{
  GtUword idx, lcpvalue;
  Largelcpvalue *largelcpvalueptr;

  gt_assert(lcpsubtab != NULL && lcpsubtab->lcp2file != NULL);
  lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue = 0;
  if (lcpsubtab->tableoflcpvalues.numoflargelcpvalues > 0 &&
      lcpsubtab->tableoflcpvalues.numoflargelcpvalues >=
      lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue)
  {
    lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue
      = gt_realloc(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue,
                   sizeof (*lcpsubtab->lcp2file->largelcpvalues.
                           spaceLargelcpvalue) *
                   lcpsubtab->tableoflcpvalues.numoflargelcpvalues);
    lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue
      = lcpsubtab->tableoflcpvalues.numoflargelcpvalues;
  }
  for (idx=0; idx<width; idx++)
  {
    lcpvalue = gt_lcptab_getvalue(&lcpsubtab->tableoflcpvalues,0,idx);
    if (lcpsubtab->lcp2file->maxbranchdepth < lcpvalue)
    {
      lcpsubtab->lcp2file->maxbranchdepth = lcpvalue;
    }
    if (lcpvalue < (GtUword) LCPOVERFLOW)
    {
      lcpsubtab->lcp2file->smalllcpvalues[idx] = (uint8_t) lcpvalue;
    } else
    {
      gt_assert(lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue
                < lcpsubtab->lcp2file->largelcpvalues.
                                             allocatedLargelcpvalue);
      largelcpvalueptr
        = lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue +
          lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue++;
      largelcpvalueptr->position = posoffset + idx;
      largelcpvalueptr->value = lcpvalue;
      lcpsubtab->lcp2file->smalllcpvalues[idx] = LCPOVERFLOW;
    }
    lcpsubtab->lcptabsum += (double) lcpvalue;
    if (lcpsubtab->distlcpvalues != NULL)
    {
      gt_disc_distri_add(lcpsubtab->distlcpvalues, lcpvalue);
    }
  }
  outsmalllcpvalues(lcpsubtab->lcp2file,width);
  if (lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue > 0)
  {
    lcpsubtab->lcp2file->totalnumoflargelcpvalues
      += lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue;
    gt_assert(lcpsubtab->lcp2file->outfpllvtab != NULL);
    gt_xfwrite(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue,
               sizeof (*lcpsubtab->lcp2file->largelcpvalues.
                                   spaceLargelcpvalue),
               (size_t) lcpsubtab->lcp2file->largelcpvalues.
                                   nextfreeLargelcpvalue,
               lcpsubtab->lcp2file->outfpllvtab);
  }
}
Exemplo n.º 23
0
int gt_extractkeysfromdesfile(const char *indexname,
                              bool sortkeys,
                              GtLogger *logger,
                              GtError *err)
{
  FILE *fpin, *fpout = NULL;
  GtStr *line = NULL;
  const char *keyptr;
  unsigned long keylen, constantkeylen = 0, linenum;/* incorrectorder = 0;*/
  bool haserr = false, firstdesc = true;
  char *previouskey = NULL;
  Fixedsizekey *keytab = NULL, *keytabptr = NULL;
  GtEncseq *encseq = NULL;
  unsigned long numofentries = 0;
  const unsigned long linewidth = 60UL;

  fpin = gt_fa_fopen_with_suffix(indexname,GT_DESTABFILESUFFIX,"rb",err);
  if (fpin == NULL)
  {
    return -1;
  }
  if (!sortkeys)
  {
    fpout = gt_fa_fopen_with_suffix(indexname,GT_KEYSTABFILESUFFIX,"wb",err);
    if (fpout == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    line = gt_str_new();
  }
  for (linenum = 0; !haserr && gt_str_read_next_line(line, fpin) != EOF;
       linenum++)
  {
    keyptr = desc2key(&keylen,gt_str_get(line),err);
    if (keyptr == NULL)
    {
      haserr = true;
      break;
    }
    if (keylen == 0)
    {
      gt_error_set(err,"key of length 0 in \"%s\" not expected",
                   gt_str_get(line));
      haserr = true;
      break;
    }
    if (firstdesc)
    {
      if (keylen > (unsigned long) CHAR_MAX)
      {
        gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; "
                         "no key must be larger than %d",
                          (int) keylen,(int) keylen,keyptr,keylen,CHAR_MAX);
        haserr = true;
        break;
      }
      constantkeylen = keylen;
      previouskey = gt_malloc(sizeof (char) * (constantkeylen+1));
      firstdesc = false;
      if (!sortkeys)
      {
        gt_xfputc((char) constantkeylen,fpout);
      } else
      {
        GtEncseqLoader *el;
        if (constantkeylen > (unsigned long) MAXFIXEDKEYSIZE)
        {
          gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; "
                           "no key must be larger than %d",
                            (int) keylen,(int) keylen,keyptr,keylen,
                            MAXFIXEDKEYSIZE);
          haserr = true;
          break;
        }
        el = gt_encseq_loader_new();
        gt_encseq_loader_set_logger(el, logger);
        encseq = gt_encseq_loader_load(el, indexname, err);
        gt_encseq_loader_delete(el);
        if (encseq == NULL)
        {
          haserr = true;
          break;
        }
        numofentries = gt_encseq_num_of_sequences(encseq);
        gt_assert(numofentries > 0);
        keytab = gt_malloc(sizeof (*keytab) * numofentries);
        keytabptr = keytab;
      }
    } else
    {
      if (constantkeylen != keylen)
      {
        gt_error_set(err,"key \"%*.*s\" of length %lu: all keys must be of "
                         "the same length which for all previously seen "
                         "headers is %lu",
                         (int) keylen,(int) keylen,keyptr,keylen,
                         constantkeylen);
        haserr = true;
        break;
      }
      gt_assert(previouskey != NULL);
      if (!sortkeys && strncmp(previouskey,keyptr,(size_t) constantkeylen) >= 0)
      {
        gt_error_set(err,"previous key \"%s\" is not lexicographically smaller "
                         "than current key \"%*.*s\"",
                         previouskey,(int) keylen,(int) keylen,keyptr);
        haserr = true;
        break;
        /*
        printf("previous key \"%s\" (no %lu) is lexicographically larger "
               "than current key \"%*.*s\"\n",
               previouskey,linenum,(int) keylen,(int) keylen,keyptr);
        incorrectorder++;
        */
      }
    }
    if (!sortkeys)
    {
      gt_xfwrite(keyptr,sizeof *keyptr,(size_t) keylen,fpout);
      gt_xfputc('\0',fpout);
    } else
    {
      gt_assert(keytabptr != NULL);
      strncpy(keytabptr->key,keyptr,(size_t) constantkeylen);
      keytabptr->key[constantkeylen] = '\0';
      keytabptr->seqnum = linenum;
      keytabptr++;
    }
    strncpy(previouskey,keyptr,(size_t) constantkeylen);
    previouskey[constantkeylen] = '\0';
    gt_str_reset(line);
  }
  if (!haserr)
  {
    gt_logger_log(logger,"number of keys of length %lu = %lu",
                constantkeylen,linenum);
    /*
    gt_logger_log(logger,"number of incorrectly ordered keys = %lu",
                incorrectorder);
    */
  }
  gt_str_delete(line);
  gt_fa_fclose(fpin);
  gt_fa_fclose(fpout);
  gt_free(previouskey);
  if (!haserr && sortkeys)
  {
    gt_assert(keytabptr != NULL);
    gt_assert(numofentries > 0);
    gt_assert(keytabptr == keytab + numofentries);
    qsort(keytab,(size_t) numofentries,sizeof (*keytab),compareFixedkeys);
    gt_assert(keytabptr != NULL);
    for (keytabptr = keytab; !haserr && keytabptr < keytab + numofentries;
         keytabptr++)
    {
      if (giextract_encodedseq2fasta(stdout,
                                     encseq,
                                     keytabptr->seqnum,
                                     NULL,
                                     linewidth,
                                     err) != 0)
      {
        haserr = true;
        break;
      }
    }
  }
  if (encseq != NULL)
  {
    gt_encseq_delete(encseq);
    encseq = NULL;
  }
  gt_free(keytab);
  return haserr ? -1 : 0;
}