Example #1
0
void gt_Outlcpinfo_delete(GtOutlcpinfo *outlcpinfo)
{
  if (outlcpinfo == NULL)
  {
    return;
  }
  gt_turningwheel_delete(outlcpinfo->turnwheel);
  if (outlcpinfo->lcpsubtab.lcp2file != NULL)
  {
    if (!outlcpinfo->swallow_tail_lcpvalues &&
        outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues <
        outlcpinfo->numsuffixes2output)
    {
      outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues
        += outmany0lcpvalues(outlcpinfo->numsuffixes2output -
                             outlcpinfo->lcpsubtab.lcp2file
                                                  ->countoutputlcpvalues,
                             outlcpinfo->lcpsubtab.lcp2file->outfplcptab);
    }
    gt_assert(outlcpinfo->swallow_tail_lcpvalues ||
              outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues ==
              outlcpinfo->numsuffixes2output);
    GT_FREEARRAY(&outlcpinfo->lcpsubtab.lcp2file->largelcpvalues,
                 Largelcpvalue);
    gt_fa_fclose(outlcpinfo->lcpsubtab.lcp2file->outfplcptab);
    gt_fa_fclose(outlcpinfo->lcpsubtab.lcp2file->outfpllvtab);
    gt_free(outlcpinfo->lcpsubtab.lcp2file->reservoir);
    outlcpinfo->lcpsubtab.lcp2file->smalllcpvalues = NULL;
    outlcpinfo->lcpsubtab.lcp2file->reservoir = NULL;
    outlcpinfo->lcpsubtab.lcp2file->sizereservoir = 0;
    gt_free(outlcpinfo->lcpsubtab.lcp2file);
  } else
  {
    gt_free(outlcpinfo->lcpsubtab.tableoflcpvalues.bucketoflcpvalues);
#ifndef NDEBUG
    gt_free(outlcpinfo->lcpsubtab.tableoflcpvalues.isset);
#endif
  }
  gt_free(outlcpinfo->lcpsubtab.lcpprocess);
  outlcpinfo->lcpsubtab.tableoflcpvalues.bucketoflcpvalues = NULL;
#ifndef NDEBUG
  outlcpinfo->lcpsubtab.tableoflcpvalues.isset = NULL;
#endif
  outlcpinfo->lcpsubtab.tableoflcpvalues.numofentries = 0;
  if (outlcpinfo->lcpsubtab.distlcpvalues != NULL)
  {
    gt_disc_distri_show(outlcpinfo->lcpsubtab.distlcpvalues,NULL);
    gt_disc_distri_delete(outlcpinfo->lcpsubtab.distlcpvalues);
  }
  gt_free(outlcpinfo);
}
Example #2
0
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name,
        GtError *err)
{
    GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder));
    GtBaseQualDistr *bqd = NULL;
    GtWord end_enc_start_sampling = 0;
    FILE *fp = NULL;
    GT_UNUSED size_t read,
              one = (size_t) 1;

    seq_dec->alpha = alpha;
    seq_dec->alphabet_size = gt_alphabet_size(alpha);
    seq_dec->cur_read = 0;
    seq_dec->data_iter = NULL;
    seq_dec->file_info_rbt = NULL;
    seq_dec->fileinfos = NULL;
    seq_dec->filename = gt_str_new_cstr(name);
    seq_dec->huff_dec = NULL;
    seq_dec->huffman = NULL;
    seq_dec->sampling = NULL;
    seq_dec->symbols = NULL;
    gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX);

    fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err);
    if (gt_error_is_set(err)) {
        hcr_seq_decoder_delete(seq_dec);
        seq_dec = NULL;
    }
    else {
        hcr_read_file_info(seq_dec, fp);

        bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha);
        seq_dec->qual_offset = bqd->qual_offset;

        read = gt_xfread_one(&end_enc_start_sampling, fp);
        gt_assert(read == one);

        seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp);

        seq_decoder_init_huffman(seq_dec, end_enc_start_sampling, bqd, err);
        if (gt_error_is_set(err)) {
            hcr_seq_decoder_delete(seq_dec);
            seq_dec = NULL;
        }
    }

    if (seq_dec != NULL) {
        gt_xfseek(fp, end_enc_start_sampling, SEEK_SET);
        seq_dec->sampling = gt_sampling_read(fp);

        seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos,
                                 seq_dec->num_of_files);
    }

    hcr_base_qual_distr_delete(bqd);
    gt_fa_fclose(fp);
    return seq_dec;
}
Example #3
0
void gtr_delete(GtR *gtr)
{
  if (!gtr) return;
  gt_fa_fclose(gtr->logfp);
  gt_str_delete(gtr->testspacepeak);
  gt_str_delete(gtr->debugfp);
  gt_str_delete(gtr->test_only);
  gt_str_delete(gtr->manoutdir);
  gt_toolbox_delete(gtr->tools);
  gt_hashmap_delete(gtr->unit_tests);
  if (gtr->L) lua_close(gtr->L);
#ifndef WITHOUT_CAIRO
  gt_style_delete_without_state(gtr->style);
#endif
  gt_free(gtr);
}
int gt_cntlist_parse(const char *filename, bool alloc_cntlist,
    GtBitsequence **cntlist, GtUword *nofreads, GtError *err)
{
  int c, retval = 0;
  FILE *infp;

  gt_log_log("parse contained reads list file: %s", filename);
  infp = gt_fa_fopen(filename, "rb", err);

  if (infp == NULL)
    return -1;

  c = gt_xfgetc(infp);
  switch (c)
  {
    case EOF:
      gt_error_set(err, "%s: unexpected end of file", filename);
      retval = 1;
      break;
    case GT_CNTLIST_BIN_HEADER:
      gt_log_log("contained reads list format: BIN");
      retval = gt_cntlist_parse_bin(infp, alloc_cntlist, cntlist, nofreads,
          err);
      break;
    case GT_CNTLIST_BIT_HEADER:
      gt_log_log("contained reads list format: BIT");
      retval = gt_cntlist_parse_bit(infp, alloc_cntlist, cntlist, nofreads,
          err);
      break;
    case GT_CNTLIST_ASCII_HEADER:
      gt_xungetc(c, infp);
      gt_log_log("contained reads list format: ASCII");
      retval = gt_cntlist_parse_ascii(infp, alloc_cntlist, cntlist, nofreads,
          err);
      break;
    default:
      gt_error_set(err, "%s: unrecognized format", filename);
      retval = 1;
      break;
  }
  gt_fa_fclose(infp);

  return retval;
}
Example #5
0
static void gt_Sfxmappedrange_storetmp(GtSfxmappedrange *sfxmappedrange,
                                       GtSfxStoretype usedptrptr,
                                       GtSfxmappedrangetype type,
                                       bool writable)
{
  FILE *outfp;

  gt_assert(sfxmappedrange != NULL);
  sfxmappedrange->ptr = NULL;
  sfxmappedrange->filename = gt_str_new();
  sfxmappedrange->writable = writable;
  outfp = gt_xtmpfp(sfxmappedrange->filename);
  gt_assert(outfp != NULL);
  gt_log_log("write %s to file %s ("GT_WU" units of "GT_WU" bytes)",
             gt_str_get(sfxmappedrange->tablename),
             gt_str_get(sfxmappedrange->filename),
             (GtUword) sfxmappedrange->numofunits,
             (GtUword) sfxmappedrange->sizeofunit);
  switch (type) {
    case GtSfxGtBitsequence:
      gt_xfwrite(*(usedptrptr.bs),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.bs;
      gt_free(*(usedptrptr.bs));
      *(usedptrptr.bs) = NULL;
      break;
    case GtSfxunsignedlong:
      gt_xfwrite(*(usedptrptr.ulong),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.ulong;
      gt_free(*(usedptrptr.ulong));
      *(usedptrptr.ulong) = NULL;
      break;
    case GtSfxuint32_t:
      gt_xfwrite(*(usedptrptr.uint32),sfxmappedrange->sizeofunit,
                 sfxmappedrange->numofunits,outfp);
      sfxmappedrange->usedptrptr = (void**) usedptrptr.uint32;
      gt_free(*(usedptrptr.uint32));
      *(usedptrptr.uint32) = NULL;
      break;
   }
   gt_fa_fclose(outfp);
}
Example #6
0
int pckbucket2file(const GtStr *indexname,const Pckbuckettable *pckbuckettable,
                   GtError *err)
{
  FILE *fp;
  Seqpos seqposmaxdepth;

  gt_error_check(err);
  fp = opensfxfile(indexname,PCKBUCKETTABLE,"wb",err);
  if (fp == NULL)
  {
    return -1;
  }
  seqposmaxdepth = (Seqpos) pckbuckettable->maxdepth;
  gt_xfwrite(&seqposmaxdepth,sizeof (Seqpos),(size_t) 1,fp);
  gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab),
             (size_t) pckbuckettable->maxnumofvalues,fp);
  gt_fa_fclose(fp);
  return 0;
}
int gt_cntlist_show(GtBitsequence *cntlist, GtUword nofreads,
    const char *path, bool binary, GtError *err)
{
  FILE *file;
  gt_assert(cntlist != NULL);
  if (path == NULL)
    file = stdout;
  else
  {
    file = gt_fa_fopen(path, binary ? "wb" : "w", err);
    if (file == NULL)
      return -1;
  }
  gt_assert(file != NULL);
  (binary ? gt_cntlist_show_bit : gt_cntlist_show_ascii)
    (cntlist, nofreads, file);
  if (path != NULL)
    gt_fa_fclose(file);
  return 0;
}
Example #8
0
int gt_pckbuckettable_2file(const char *indexname,
                            const Pckbuckettable *pckbuckettable,
                            GtError *err)
{
  FILE *fp;
  unsigned long seqposmaxdepth;

  gt_error_check(err);
  fp = gt_fa_fopen_with_suffix(indexname,PCKBUCKETTABLE,"wb",err);
  if (fp == NULL)
  {
    return -1;
  }
  seqposmaxdepth = (unsigned long) pckbuckettable->maxdepth;
  gt_xfwrite(&seqposmaxdepth,sizeof (unsigned long),(size_t) 1,fp);
  gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab),
             (size_t) pckbuckettable->maxnumofvalues,fp);
  gt_fa_fclose(fp);
  return 0;
}
Example #9
0
void gt_file_delete(GtFile *file)
{
  if (!file) return;
  if (file->reference_count) {
    file->reference_count--;
    return;
  }
  switch (file->mode) {
    case GT_FILE_MODE_UNCOMPRESSED:
        if (!file->is_stdin)
          gt_fa_fclose(file->fileptr.file);
      break;
    case GT_FILE_MODE_GZIP:
        gt_fa_gzclose(file->fileptr.gzfile);
      break;
    case GT_FILE_MODE_BZIP2:
        gt_fa_bzclose(file->fileptr.bzfile);
      break;
    default: gt_assert(0);
  }
  gt_file_delete_without_handle(file);
}
Example #10
0
GtStr *gt_leftborderbuffer_delete(GtLeftborderOutbuffer *lbbuf,
                                  GtFirstcodesspacelog *fcsl,
                                  GT_UNUSED unsigned long expectedwritten)
{
  GtStr *outfilename;

  gt_assert(lbbuf != NULL);
  gt_leftborderbuffer_flush(lbbuf);
  gt_fa_fclose(lbbuf->fp);
  lbbuf->fp = NULL;
  gt_log_log("write %s to file %s (%lu units of size %u)",
             gt_str_get(lbbuf->name),
             gt_str_get(lbbuf->outfilename),
             lbbuf->totalwrite,(unsigned int) sizeof (*lbbuf->spaceuint32_t));
  gt_assert(lbbuf->spaceuint32_t != NULL);
  gt_free(lbbuf->spaceuint32_t);
  GT_FCI_SUBTRACTWORKSPACE(fcsl,gt_str_get(lbbuf->name));
  gt_assert(lbbuf->totalwrite == expectedwritten);
  outfilename = lbbuf->outfilename;
  gt_str_delete(lbbuf->name);
  gt_free(lbbuf);
  return outfilename;
}
Example #11
0
/*call function with linear gap costs for all given sequences */
static int gt_all_against_all_alignment_check(bool affine,
                                        GtAlignment *align,
                                        const GtLinspaceArguments *arguments,
                                        GtLinspaceManagement *spacemanager,
                                        const GtScoreHandler *scorehandler,
                                        const GtUchar *characters,
                                        GtUchar wildcardshow,
                                        const GtSequenceTable *sequence_table1,
                                        const GtSequenceTable *sequence_table2,
                                        GtWord left_dist,
                                        GtWord right_dist,
                                        GtTimer *linspacetimer,
                                        GtError *err)
{
  int had_err = 0;
  const GtUchar *useq, *vseq;
  GtUword i, j, ulen, vlen;

  gt_error_check(err);
  if (linspacetimer != NULL)
  {
    gt_timer_start(linspacetimer);
  }
  for (i = 0; !had_err && i < sequence_table1->size; i++)
  {
    ulen = gt_str_length(sequence_table1->seqarray[i]);
    useq = (const GtUchar*) gt_str_get(sequence_table1->seqarray[i]);
    for (j = 0; j< sequence_table2->size; j++)
    {
      vlen = gt_str_length(sequence_table2->seqarray[j]);
      vseq = (const GtUchar*) gt_str_get(sequence_table2->seqarray[j]);
      gt_alignment_reset(align);
      if (arguments->global)
      {
        if (arguments->diagonal)
        {
          if (gt_str_array_size(arguments->diagonalbonds) == 0)
          {
            left_dist = LEFT_DIAGONAL_SHIFT(arguments->similarity, ulen, vlen);
            right_dist = RIGHT_DIAGONAL_SHIFT(arguments->similarity, ulen,
                                              vlen);
          }
          if ((left_dist > MIN(0, (GtWord)vlen-(GtWord)ulen))||
              (right_dist < MAX(0, (GtWord)vlen-(GtWord)ulen)))
          {
            gt_error_set(err, "ERROR: invalid diagonalband for global "
                              "alignment (ulen: "GT_WU", vlen: "GT_WU")\n"
                              "left_dist <= MIN(0, vlen-ulen) and "
                              "right_dist >= MAX(0, vlen-ulen)", ulen, vlen);
            had_err = 1;
          }
          if (!had_err)
          {
            (affine ? gt_diagonalbandalign_affinegapcost_compute_generic
                    : gt_diagonalbandalign_compute_generic)
                       (spacemanager, scorehandler, align,
                        useq, 0, ulen, vseq, 0, vlen,
                        left_dist, right_dist);
          }
        } else
        {
          (affine ? gt_linearalign_affinegapcost_compute_generic
                  : gt_linearalign_compute_generic)
                             (spacemanager, scorehandler, align,
                              useq, 0, ulen, vseq, 0, vlen);
        }
      }
      else if (arguments->local)
      {
        (affine ? gt_linearalign_affinegapcost_compute_local_generic
                : gt_linearalign_compute_local_generic)
                    (spacemanager, scorehandler, align,
                     useq, 0, ulen, vseq, 0, vlen);
      }
      /* show alignment*/
      if (!had_err)
      {
        gt_assert(align != NULL);
        if (!strcmp(gt_str_get(arguments->outputfile),"stdout"))
        {
          alignment_show_with_sequences(useq, ulen, vseq, vlen, align,
                                        characters,
                                        wildcardshow, arguments->showscore,
                                        !arguments->scoreonly,
                                        arguments->showsequences,
                                        arguments->global,
                                        scorehandler, stdout);
        } else
        {
          FILE *fp = gt_fa_fopen_func(gt_str_get(arguments->outputfile),
                                                 "a", __FILE__,__LINE__,err);
          if (fp == NULL)
          {
            had_err = -1;
          } else
          {
            alignment_show_with_sequences(useq, ulen, vseq, vlen, align,
                                          characters, wildcardshow,
                                          arguments->showscore,
                                          !arguments->scoreonly,
                                          arguments->showsequences,
                                          arguments->global, scorehandler,fp);
            gt_fa_fclose(fp);
          }
        }
      }
    }
  }
  if (linspacetimer != NULL)
  {
    gt_timer_stop(linspacetimer);
  }
  if (!had_err && arguments->wildcardshow)
  {
    printf("# wildcards are represented by %c\n", wildcardshow);
  }
  return had_err;
}
Example #12
0
static int itersearchoverallkeys(const GtEncseq *encseq,
                                 const char *keytab,
                                 unsigned long numofkeys,
                                 unsigned long keysize,
                                 const GtStr *fileofkeystoextract,
                                 unsigned long linewidth,
                                 GtError *err)
{
  FILE *fp;
  GtStr *currentline;
  uint64_t linenum;
  unsigned long seqnum, countmissing = 0;
  bool haserr = false;
  Fastakeyquery fastakeyquery;

  if (linewidth == 0)
  {
    gt_error_set(err,"use option width to specify line width for formatting");
    return -1;
  }
  fp = gt_fa_fopen(gt_str_get(fileofkeystoextract),"r",err);
  if (fp == NULL)
  {
    return -1;
  }
  currentline = gt_str_new();
  fastakeyquery.fastakey = gt_malloc(sizeof (char) * (keysize+1));
  for (linenum = 0; gt_str_read_next_line(currentline, fp) != EOF; linenum++)
  {
    if (extractkeyfromcurrentline(&fastakeyquery,
                                  keysize,
                                  currentline,
                                  linenum,
                                  fileofkeystoextract,
                                  err) != 0)
    {
      haserr = true;
      break;
    }
    seqnum = searchfastaqueryindes(fastakeyquery.fastakey,keytab,numofkeys,
                                   keysize);
    if (seqnum < numofkeys)
    {
      if (giextract_encodedseq2fasta(stdout,
                                     encseq,
                                     seqnum,
                                     &fastakeyquery,
                                     linewidth,
                                     err) != 0)
      {
        haserr = true;
        break;
      }
    } else
    {
      countmissing++;
    }
    gt_str_reset(currentline);
  }
  if (!haserr && countmissing > 0)
  {
    printf("# number of unsatified fastakey-queries: %lu\n",countmissing);
  }
  gt_str_delete(currentline);
  gt_fa_fclose(fp);
  gt_free(fastakeyquery.fastakey);
  return haserr ? - 1 : 0;
}
Example #13
0
int gt_extractkeysfromdesfile(const char *indexname,
                              bool sortkeys,
                              GtLogger *logger,
                              GtError *err)
{
  FILE *fpin, *fpout = NULL;
  GtStr *line = NULL;
  const char *keyptr;
  unsigned long keylen, constantkeylen = 0, linenum;/* incorrectorder = 0;*/
  bool haserr = false, firstdesc = true;
  char *previouskey = NULL;
  Fixedsizekey *keytab = NULL, *keytabptr = NULL;
  GtEncseq *encseq = NULL;
  unsigned long numofentries = 0;
  const unsigned long linewidth = 60UL;

  fpin = gt_fa_fopen_with_suffix(indexname,GT_DESTABFILESUFFIX,"rb",err);
  if (fpin == NULL)
  {
    return -1;
  }
  if (!sortkeys)
  {
    fpout = gt_fa_fopen_with_suffix(indexname,GT_KEYSTABFILESUFFIX,"wb",err);
    if (fpout == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    line = gt_str_new();
  }
  for (linenum = 0; !haserr && gt_str_read_next_line(line, fpin) != EOF;
       linenum++)
  {
    keyptr = desc2key(&keylen,gt_str_get(line),err);
    if (keyptr == NULL)
    {
      haserr = true;
      break;
    }
    if (keylen == 0)
    {
      gt_error_set(err,"key of length 0 in \"%s\" not expected",
                   gt_str_get(line));
      haserr = true;
      break;
    }
    if (firstdesc)
    {
      if (keylen > (unsigned long) CHAR_MAX)
      {
        gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; "
                         "no key must be larger than %d",
                          (int) keylen,(int) keylen,keyptr,keylen,CHAR_MAX);
        haserr = true;
        break;
      }
      constantkeylen = keylen;
      previouskey = gt_malloc(sizeof (char) * (constantkeylen+1));
      firstdesc = false;
      if (!sortkeys)
      {
        gt_xfputc((char) constantkeylen,fpout);
      } else
      {
        GtEncseqLoader *el;
        if (constantkeylen > (unsigned long) MAXFIXEDKEYSIZE)
        {
          gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; "
                           "no key must be larger than %d",
                            (int) keylen,(int) keylen,keyptr,keylen,
                            MAXFIXEDKEYSIZE);
          haserr = true;
          break;
        }
        el = gt_encseq_loader_new();
        gt_encseq_loader_set_logger(el, logger);
        encseq = gt_encseq_loader_load(el, indexname, err);
        gt_encseq_loader_delete(el);
        if (encseq == NULL)
        {
          haserr = true;
          break;
        }
        numofentries = gt_encseq_num_of_sequences(encseq);
        gt_assert(numofentries > 0);
        keytab = gt_malloc(sizeof (*keytab) * numofentries);
        keytabptr = keytab;
      }
    } else
    {
      if (constantkeylen != keylen)
      {
        gt_error_set(err,"key \"%*.*s\" of length %lu: all keys must be of "
                         "the same length which for all previously seen "
                         "headers is %lu",
                         (int) keylen,(int) keylen,keyptr,keylen,
                         constantkeylen);
        haserr = true;
        break;
      }
      gt_assert(previouskey != NULL);
      if (!sortkeys && strncmp(previouskey,keyptr,(size_t) constantkeylen) >= 0)
      {
        gt_error_set(err,"previous key \"%s\" is not lexicographically smaller "
                         "than current key \"%*.*s\"",
                         previouskey,(int) keylen,(int) keylen,keyptr);
        haserr = true;
        break;
        /*
        printf("previous key \"%s\" (no %lu) is lexicographically larger "
               "than current key \"%*.*s\"\n",
               previouskey,linenum,(int) keylen,(int) keylen,keyptr);
        incorrectorder++;
        */
      }
    }
    if (!sortkeys)
    {
      gt_xfwrite(keyptr,sizeof *keyptr,(size_t) keylen,fpout);
      gt_xfputc('\0',fpout);
    } else
    {
      gt_assert(keytabptr != NULL);
      strncpy(keytabptr->key,keyptr,(size_t) constantkeylen);
      keytabptr->key[constantkeylen] = '\0';
      keytabptr->seqnum = linenum;
      keytabptr++;
    }
    strncpy(previouskey,keyptr,(size_t) constantkeylen);
    previouskey[constantkeylen] = '\0';
    gt_str_reset(line);
  }
  if (!haserr)
  {
    gt_logger_log(logger,"number of keys of length %lu = %lu",
                constantkeylen,linenum);
    /*
    gt_logger_log(logger,"number of incorrectly ordered keys = %lu",
                incorrectorder);
    */
  }
  gt_str_delete(line);
  gt_fa_fclose(fpin);
  gt_fa_fclose(fpout);
  gt_free(previouskey);
  if (!haserr && sortkeys)
  {
    gt_assert(keytabptr != NULL);
    gt_assert(numofentries > 0);
    gt_assert(keytabptr == keytab + numofentries);
    qsort(keytab,(size_t) numofentries,sizeof (*keytab),compareFixedkeys);
    gt_assert(keytabptr != NULL);
    for (keytabptr = keytab; !haserr && keytabptr < keytab + numofentries;
         keytabptr++)
    {
      if (giextract_encodedseq2fasta(stdout,
                                     encseq,
                                     keytabptr->seqnum,
                                     NULL,
                                     linewidth,
                                     err) != 0)
      {
        haserr = true;
        break;
      }
    }
  }
  if (encseq != NULL)
  {
    gt_encseq_delete(encseq);
    encseq = NULL;
  }
  gt_free(keytab);
  return haserr ? -1 : 0;
}
Example #14
0
static Fastakeyquery *readfileofkeystoextract(bool verbose,
                                              unsigned long *numofqueries,
                                              const GtStr *fileofkeystoextract,
                                              GtError *err)
{
  FILE *fp;
  GtStr *currentline;
  bool haserr = false;
  uint64_t linenum;
  Fastakeyquery *fastakeyqueries;
#undef SKDEBUG
#ifdef SKDEBUG
  unsigned long i;
#endif

  gt_error_check(err);
  *numofqueries = gt_file_number_of_lines(gt_str_get(fileofkeystoextract));
  if (*numofqueries == 0)
  {
    gt_error_set(err,"empty file \"%s\" not allowed",
                 gt_str_get(fileofkeystoextract));
    return NULL;
  }
  fp = gt_fa_fopen(gt_str_get(fileofkeystoextract),"r",err);
  if (fp == NULL)
  {
    return NULL;
  }
  if (verbose)
  {
    printf("# opened keyfile \"%s\"\n",gt_str_get(fileofkeystoextract));
  }
  fastakeyqueries = gt_malloc(sizeof (*fastakeyqueries) * (*numofqueries));
  currentline = gt_str_new();
  for (linenum = 0; gt_str_read_next_line(currentline, fp) != EOF; linenum++)
  {
    if (extractkeyfromcurrentline(fastakeyqueries + linenum,
                                  0,
                                  currentline,
                                  linenum,
                                  fileofkeystoextract,
                                  err) != 0)
    {
      haserr = true;
      break;
    }
    gt_str_reset(currentline);
  }
  gt_str_delete(currentline);
  gt_fa_fclose(fp);
  if (haserr)
  {
    fastakeyqueries_delete(fastakeyqueries,*numofqueries);
    return NULL;
  }
  qsort(fastakeyqueries,(size_t) *numofqueries,sizeof (*fastakeyqueries),
        comparefastakeys);
  if (verbose)
  {
    printf("# %lu fastakey-queries successfully parsed and sorted\n",
            *numofqueries);
  }
  *numofqueries = remdupsfastakeyqueries(fastakeyqueries,*numofqueries,verbose);
#ifdef SKDEBUG
  for (i=0; i<*numofqueries; i++)
  {
    printf("%lu %s\n",i,fastakeyqueries[i].fastakey);
  }
#endif
  return fastakeyqueries;
}
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  GtKmerDatabaseArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq       *es;
  GtUword        es_length,
                 nu_kmer_codes = 0;
  GtKmerDatabase *compare_db = NULL,
                 *db = NULL;
  GtLogger *logger;
  FILE *fp = NULL;
  GtHashmap *kmer_hash = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->use_hash)
    kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL,
                               (GtFree) gt_kmer_database_delete_hash_value);
  if (arguments->bench)
    timer = gt_timer_new_with_progress_description("loading encoded sequence");

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) {
    fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err);
    gt_logger_set_target(logger, fp);
  }

  if (!had_err) {
    GtEncseqLoader *es_l;
    if (arguments->bench)
      gt_timer_start(timer);
    es_l = gt_encseq_loader_new();
    es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->bench)
      gt_timer_show_progress(timer, "saving kmers (+iterating over file)",
                             stdout);
    if (es == NULL) {
      had_err = -1;
    }
    gt_encseq_loader_delete(es_l);
  }
  if (!had_err) {
    es_length = gt_encseq_total_length(es);
    if (es_length < (GtUword) arguments->kmersize) {
      gt_error_set(err, "Input is too short for used kmersize. File length: "
                   GT_WU " kmersize: %u", es_length, arguments->kmersize);
      had_err = -1;
    }
  }
  if (!had_err) {
    GtAlphabet *alphabet;
    alphabet = gt_encseq_alphabet(es);
    if (arguments->bench)
    nu_kmer_codes = gt_power_for_small_exponents(
                                            gt_alphabet_num_of_chars(alphabet),
                                            arguments->kmersize);
    if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) {
      compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize, arguments->sb_size, es);
    }
    if (!arguments->use_hash) {
      db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize,
                                arguments->sb_size, es);
      if (arguments->cutoff) {
        if (arguments->mean_cutoff)
          gt_kmer_database_use_mean_cutoff(db, (GtUword) 2,
                                           arguments->cutoff_value);
        else
          gt_kmer_database_set_cutoff(db, arguments->cutoff_value);
        if (!arguments->prune)
          gt_kmer_database_set_prune(db);
      }
    }
  }

  if (!had_err) {
    GtUword startpos = 0,
            endpos;
    GtKmercodeiterator *iter;
    const GtKmercode *kmercode = NULL;
    iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD,
                                          arguments->kmersize, 0);
    while (!had_err && startpos < es_length - (arguments->kmersize - 1)) {
      GtUword startpos_add_kmer = startpos;
      if (arguments->merge_only) {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max((arguments->sb_size - 1) * 2));
        if (endpos > es_length)
          endpos = es_length;
      }
      else {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max(arguments->sb_size - 1));
      }
      gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos);
      while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL &&
             startpos_add_kmer <= endpos - (arguments->kmersize - 1)) {
        if (!arguments->merge_only && !arguments->use_hash &&
            !kmercode->definedspecialposition && !arguments->bench) {
          gt_kmer_database_add_kmer(compare_db, kmercode->code,
                                    startpos_add_kmer);
        }
        if (arguments->use_hash && !kmercode->definedspecialposition) {
          gt_kmer_database_add_to_hash(kmer_hash, kmercode->code,
                                       startpos_add_kmer);
        }
        startpos_add_kmer++;
      }
      if (!arguments->use_hash) {
        gt_kmer_database_add_interval(db, startpos, endpos);
        gt_kmer_database_print_buffer(db, logger);
        if (!arguments->bench)
          had_err = gt_kmer_database_check_consistency(db, err);
      }
      startpos = endpos + 1;
    }
    if (!arguments->use_hash) {
      gt_kmer_database_flush(db);
      gt_kmer_database_print_buffer(db, logger);
      if (!had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(db, err);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(compare_db, err);
      if (!arguments->merge_only && !arguments->bench)
        gt_kmer_database_print(compare_db, logger, true);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_compare(compare_db, db, err);
      gt_kmer_database_print(db, logger, true);
    }
    gt_kmercodeiterator_delete(iter);
  }

  if (arguments->bench) {
    GtKmerStartpos pos;
    GtArrayGtUword *pos_hash;
    GtUword rand_access = (GtUword) 50000000,
            rand_code,
            i,
            sum = 0;
    gt_timer_show_progress(timer, "random access", stdout);
    for (i = 0; i < rand_access; i++) {
      rand_code = gt_rand_max(nu_kmer_codes - 1);
      if (arguments->use_hash) {
        pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code);
        if (pos_hash != NULL)
          sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1];
      }
      else {
        pos = gt_kmer_database_get_startpos(db, rand_code);
        if (pos.no_positions > 0)
          sum += pos.startpos[pos.no_positions - 1];
      }
    }
    printf("sum: " GT_WU "\n", sum);

    gt_timer_show_progress(timer, "", stdout);
    gt_timer_stop(timer);
    gt_timer_delete(timer);
  }
  if (arguments->use_hash)
    gt_hashmap_delete(kmer_hash);
  gt_encseq_delete(es);
  if (!arguments->use_hash)
    gt_kmer_database_delete(db);
  if (!arguments->merge_only && !arguments->bench)
    gt_kmer_database_delete(compare_db);
  gt_logger_delete(logger);
  gt_fa_fclose(fp);

  return had_err;
}
/*read condenseq data structure from file*/
GtCondenseq *gt_condenseq_new_from_file(const char *indexname,
                                        GtLogger *logger, GtError *err)
{
  int had_err = 0;
  FILE* fp;
  GtEncseqLoader *esl;
  GtEncseq *unique_es;
  GtCondenseq *condenseq = NULL;
  /*load unique_es*/
  esl = gt_encseq_loader_new();
  unique_es = gt_encseq_loader_load(esl, indexname, err);
  if (!unique_es)
    had_err = -1;
  if (!had_err) {
    gt_encseq_loader_delete(esl);
    condenseq = condenseq_new_empty(gt_encseq_alphabet(unique_es));
    condenseq->filename = gt_cstr_dup(indexname);
    condenseq->unique_es = unique_es;
    fp = gt_fa_fopen_with_suffix(indexname, GT_CONDENSEQ_FILE_SUFFIX,
                                 "rb", err);
    if (fp == NULL) {
      had_err = -1;
    }
    else {
      had_err = condenseq_io(condenseq, fp, gt_io_error_fread, err);
      if (!had_err) {
        GtUword i;
        gt_assert(condenseq->uniques);
        gt_assert(condenseq->links);
        gt_fa_fclose(fp);
        /*create link array for each unique entry*/
        for (i = 0; i < condenseq->udb_nelems; i++) {
          GT_INITARRAY(&(condenseq->uniques[i].links),uint32_t);
        }
        /* check for overflows */
        if (condenseq->ldb_nelems > (GtUword) ((uint32_t) 0 - (uint32_t) 1)) {
          gt_error_set(err, "Overflow, to many link-elements. Can't be stored");
          had_err = -1;
        }
        /* iterate through link entrys and store ids in corresponding unique
          entry array */
        for (i = 0; !had_err && (GtUword) i < condenseq->ldb_nelems; i++) {
          GtUword uid = condenseq->links[i].unique_id;
          gt_assert(uid < condenseq->udb_nelems);
          GT_STOREINARRAY(&(condenseq->uniques[uid].links),
                          uint32_t,
                          10,
                          (uint32_t) i);
        }
      }
    }
  }
  if (!had_err) {
    gt_assert(condenseq != NULL);
    if (condenseq->id_len != GT_UNDEF_UWORD)
      gt_logger_log(logger, "IDs const len: " GT_WU, condenseq->id_len);
    else
      gt_logger_log(logger, "using sdstab to access IDs");
  }
  if (had_err) {
    gt_condenseq_delete(condenseq);
    condenseq = NULL;
  }
  return (condenseq);
}
static int gt_condenseq_compress_runner(GT_UNUSED int argc, const char **argv,
                                        int parsed_args, void *tool_arguments,
                                        GtError *err)
{
  GtCondenseqCompressArguments *arguments = tool_arguments;
  GtLogger *logger,
           *kdb_logger;
  FILE *kmer_fp = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);
  kdb_logger = gt_logger_new(arguments->kdb, GT_LOGGER_DEFLT_PREFIX, stderr);
  if (arguments->kdb) {
    kmer_fp = gt_fa_fopen("kmer_db.out", "w", err);
    gt_logger_set_target(kdb_logger, kmer_fp);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    char *basenameptr;
    basenameptr = gt_basename(argv[parsed_args]);
    gt_str_set(arguments->indexname, basenameptr);
    gt_free(basenameptr);
  }

  if (!had_err) {
    GtEncseqLoader *es_l = gt_encseq_loader_new();
    arguments->input_es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->input_es == NULL)
      had_err = -1;
    gt_encseq_loader_delete(es_l);
  }

  if (!had_err) {
    if (arguments->minalignlength == GT_UNDEF_UWORD)
      arguments->minalignlength = arguments->initsize != GT_UNDEF_UWORD ?
                                  arguments->initsize / (GtUword) 3UL :
                                  GT_UNDEF_UWORD;
    if (arguments->windowsize == GT_UNDEF_UINT)
      arguments->windowsize = arguments->minalignlength != GT_UNDEF_UWORD ?
                              (unsigned int) (arguments->minalignlength / 5U) :
                              GT_UNDEF_UINT;
    if (arguments->windowsize < 4U)
      arguments->windowsize = 4U;
    if (arguments->kmersize == GT_UNDEF_UINT) {
      unsigned int size =
        gt_alphabet_num_of_chars(gt_encseq_alphabet(arguments->input_es));
      /* size^k ~= 100000 */
      gt_safe_assign(arguments->kmersize,
                     gt_round_to_long(gt_log_base(100000.0, (double) size)));
      gt_logger_log(logger, "|A|: %u, k: %u",
                    size, arguments->kmersize);
    }

    if (arguments->windowsize == GT_UNDEF_UINT) {
      arguments->windowsize = 5U * arguments->kmersize;
    }
    if (arguments->minalignlength == GT_UNDEF_UWORD) {
      arguments->minalignlength = (GtUword) (3UL * arguments->windowsize);
    }
    if (arguments->initsize == GT_UNDEF_UWORD) {
      arguments->initsize = (GtUword) (3UL * arguments->minalignlength);
    }
  }
  if (!had_err &&
      arguments->windowsize <= arguments->kmersize) {
    gt_error_set(err, "-windowsize (%u) must be larger -kmersize (%u)!",
                 arguments->windowsize, arguments->kmersize);
    had_err = -1;
  }
  if (!had_err &&
      arguments->minalignlength < (GtUword) arguments->windowsize) {
    gt_error_set(err, "-alignlength (" GT_WU ") must be at least "
                 "-windowsize (%u)!", arguments->minalignlength,
                 arguments->windowsize);
    had_err = -1;
  }
  if (!had_err && (arguments->initsize < arguments->minalignlength)) {
    gt_error_set(err, "-initsize (" GT_WU ") must be at least "
                 "-alignlength (" GT_WU ")!", arguments->initsize,
                 arguments->minalignlength);
    had_err = -1;
  }

  if (!had_err) {
    GtCondenseqCreator *ces_c;

    if (!had_err) {
      ces_c = gt_condenseq_creator_new(arguments->initsize,
                                       arguments->minalignlength,
                                       arguments->xdrop,
                                       &(arguments->scores),
                                       arguments->kmersize,
                                       arguments->windowsize,
                                       logger,
                                       err);
      if (ces_c == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->cutoff_value == GT_UNDEF_UWORD)
        gt_condenseq_creator_use_mean_cutoff(ces_c);
      else if (arguments->cutoff_value == 0)
        gt_condenseq_creator_disable_cutoff(ces_c);
      else
        gt_condenseq_creator_set_cutoff(ces_c, arguments->cutoff_value);
      gt_condenseq_creator_set_mean_fraction(ces_c, arguments->fraction);
      if (arguments->prune)
        gt_condenseq_creator_disable_prune(ces_c);
      if (arguments->brute)
        gt_condenseq_creator_enable_brute_force(ces_c);
      if (!arguments->diags)
        gt_condenseq_creator_disable_diagonals(ces_c);
      if (arguments->full_diags)
        gt_condenseq_creator_enable_full_diagonals(ces_c);
      if (arguments->clean_percent != GT_UNDEF_UINT)
        gt_condenseq_creator_set_diags_clean_limit(ces_c,
                                                   arguments->clean_percent);

      had_err = gt_condenseq_creator_create(ces_c,
                                            arguments->indexname,
                                            arguments->input_es,
                                            logger, kdb_logger, err);

      gt_condenseq_creator_delete(ces_c);
    }
  }

  gt_logger_delete(logger);
  gt_logger_delete(kdb_logger);
  if (arguments->kdb)
    gt_fa_fclose(kmer_fp);
  return had_err;
}