コード例 #1
0
static int decode_sequence_file(const char *seqfile,
                                GtEncseqDecodeArguments *args,
                                GtError *err)
{
  GtEncseqLoader *encseq_loader;
  GtEncseq *encseq;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(seqfile);
  encseq_loader = gt_encseq_loader_new();
  if (!had_err && gt_encseq_options_lossless_value(args->eopts)) {
    gt_encseq_loader_require_lossless_support(encseq_loader);
  }
  if (!(encseq = gt_encseq_loader_load(encseq_loader, seqfile, err)))
    had_err = -1;
  if (!had_err && gt_encseq_options_mirrored_value(args->eopts)) {
    if (!gt_alphabet_is_dna(gt_encseq_alphabet(encseq))) {
      gt_error_set(err, "mirroring is only defined on DNA sequences");
      had_err = -1;
    }
    if (!had_err)
      had_err = gt_encseq_mirror(encseq, err);
  }
  if (!had_err)
    had_err = output_sequence(encseq, args, seqfile, err);
  gt_encseq_delete(encseq);
  gt_encseq_loader_delete(encseq_loader);
  return had_err;
}
コード例 #2
0
ファイル: encseq_lua.c プロジェクト: kowsky/genometools
static int encseq_lua_mirror(lua_State *L)
{
  GtEncseq **encseq;
  GtError *err = gt_error_new();
  encseq = check_encseq(L, 1);
  gt_assert(*encseq);
  luaL_argcheck(L, !gt_encseq_is_mirrored(*encseq), 1, "is already mirrored");
  if (gt_encseq_mirror(*encseq, err) != 0)
    gt_lua_error(L, err);
  gt_error_delete(err);
  return 0;
}
コード例 #3
0
ファイル: gt_encseq_bitextract.c プロジェクト: 9beckert/TIR
static int gt_encseq_bitextract_runner(GT_UNUSED int argc, const char **argv,
                                       GT_UNUSED int parsed_args,
                                       void *tool_arguments,
                                       GT_UNUSED GtError *err)
{
  GtEncseqBitextractArguments *arguments = tool_arguments;
  GtEncseqLoader *el;
  GtEncseq *encseq;
  int had_err = 0;
  bool fwd, it1, GT_UNUSED it2;
  char buffer[BUFSIZ];
  GtEndofTwobitencoding etbe;
  GtEncseqReader *esr;
  GtSpecialrangeiterator *sri;
  GtRange srng;
  GtReadmode rm;

  gt_error_check(err);
  gt_assert(arguments);

  el = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(el, argv[parsed_args], err);
  if (!encseq)
    had_err = -1;

  if (!had_err && arguments->mirror) {
    had_err = gt_encseq_mirror(encseq, err);
  }

  if (!had_err) {
    rm = gt_readmode_parse(gt_str_get(arguments->readmode), NULL);
    fwd = GT_ISDIRREVERSE(rm) ? false : true;
  }

  if (!had_err && arguments->bitpos != GT_UNDEF_ULONG) {
    if (arguments->bitpos >= gt_encseq_total_length(encseq)) {
      gt_error_set(err, "position %lu exceeds encoded sequence length of %lu",
                   arguments->bitpos, gt_encseq_total_length(encseq));
      had_err = -1;
    }

    if (!had_err) {
      unsigned long ret;
      esr = gt_encseq_create_reader_with_readmode(encseq, rm,
                                                  arguments->bitpos);
      ret = gt_encseq_extract2bitencwithtwobitencodingstoppos(&etbe, esr,
                                                        encseq,
                                                        rm, arguments->bitpos);
      gt_bitsequence_tostring(buffer, etbe.tbe);
      printf("Twobitencoding   %s\n"
             "unitsnotspecial  %u\n"
             "position         %lu\n"
             "returnvalue      %lu\n",
             buffer,
             etbe.unitsnotspecial,
             arguments->bitpos,
             ret);
      gt_encseq_reader_delete(esr);
    }
  }

  if (!had_err && arguments->stoppos != GT_UNDEF_ULONG) {
    if (arguments->stoppos >= gt_encseq_total_length(encseq)) {
      gt_error_set(err, "position %lu exceeds encoded sequence length of %lu",
                   arguments->stoppos, gt_encseq_total_length(encseq));
      had_err = -1;
    }
    if (!had_err) {
      esr = gt_encseq_create_reader_with_readmode(encseq, rm, 0);
      /* check stoppos stuff */
      gt_encseq_reader_reinit_with_readmode(esr, encseq, rm,
                                            arguments->stoppos);
      printf("%lu: %lu\n", arguments->stoppos,
                           gt_getnexttwobitencodingstoppos(fwd, esr));
      gt_encseq_reader_delete(esr);
    }
  }

  if (!had_err && arguments->specialranges) {
    /* check specialrangeiterator stuff */
    if (gt_encseq_has_specialranges(encseq)) {
      sri = gt_specialrangeiterator_new(encseq, fwd);
      while (true) {
        it1 = gt_specialrangeiterator_next(sri, &srng);
        if (it1)
          printf("%lu:%lu\n", srng.start, srng.end);
        else break;
      }
      gt_specialrangeiterator_delete(sri);
    }
  }

  gt_encseq_delete(encseq);
  gt_encseq_loader_delete(el);
  return had_err;
}
コード例 #4
0
ファイル: esa-map.c プロジェクト: Garonenur/genometools
static int inputsuffixarray(bool map,
                            Suffixarray *suffixarray,
                            unsigned int demand,
                            const char *indexname,
                            GtLogger *logger,
                            GtError *err)
{
  bool haserr = false;
  GtEncseqLoader *el;
  GtUword totallength = 0;

  gt_error_check(err);
  initsuffixarray(suffixarray);
  el = gt_encseq_loader_new();
  if (!(demand & SARR_DESTAB))
    gt_encseq_loader_do_not_require_des_tab(el);
  else
    gt_encseq_loader_require_des_tab(el);
  if (!(demand & SARR_SDSTAB))
    gt_encseq_loader_do_not_require_sds_tab(el);
  else
    gt_encseq_loader_require_sds_tab(el);
  if (!(demand & SARR_SSPTAB))
    gt_encseq_loader_do_not_require_ssp_tab(el);
  else
    gt_encseq_loader_require_ssp_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  suffixarray->encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);
  if (suffixarray->encseq == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err);
  }
  if (!haserr
        && suffixarray->mirroredencseq
        && !gt_encseq_is_mirrored(suffixarray->encseq))
  {
    if (gt_encseq_mirror(suffixarray->encseq, err) != 0)
      haserr = true;
  }
  if (!haserr)
  {
    totallength = gt_encseq_total_length(suffixarray->encseq);
  }
  if (!haserr && (demand & SARR_SUFTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->suftab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                       GT_SUFTABSUFFIX,
                                       suffixarray->numberofallsortedsuffixes,
                                       sizeof (*suffixarray->suftab),
                                       err);
        if (suffixarray->suftab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
#if defined (_LP64) || defined (_WIN64)
      off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX);

      if (filesize == (off_t) sizeof (uint32_t) *
                              suffixarray->numberofallsortedsuffixes)
      {
        gt_logger_log(logger,"read suftab in units of 4 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t,
                         GT_SUFTABSUFFIX);
      } else
      {
        gt_logger_log(logger,"read suftab in units of 8 bytes");
        INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                         GT_SUFTABSUFFIX);
      }
#else
      gt_logger_log(logger,"read suftab in units of 4 bytes");
      INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword,
                       GT_SUFTABSUFFIX);
#endif
    }
    if (!haserr && !suffixarray->longest.defined)
    {
      gt_error_set(err,"longest not defined");
      haserr = true;
    }
  }
  if (!haserr && (demand & SARR_LCPTAB))
  {
    if (map)
    {
      if (suffixarray->numberofallsortedsuffixes > 0)
      {
        suffixarray->lcptab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_LCPTABSUFFIX,
                                         suffixarray->numberofallsortedsuffixes,
                                         sizeof (*suffixarray->lcptab),
                                         err);
        if (suffixarray->lcptab == NULL)
        {
          haserr = true;
        }
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar,
                       GT_LCPTABSUFFIX);
      if (!haserr &&
          fseek(suffixarray->lcptabstream.fp,
                (GtWord) sizeof (GtUchar),SEEK_SET))
      {
        gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno));
        haserr = true;
      }
    }
    if (!haserr && !suffixarray->numoflargelcpvalues.defined)
    {
      gt_error_set(err,"numoflargelcpvalues not defined");
      haserr = true;
    }
    if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0)
    {
      if (map)
      {
        suffixarray->llvtab
          = gt_fa_mmap_check_size_with_suffix(indexname,
                                           GT_LARGELCPTABSUFFIX,
                                           (GtUword)
                                           suffixarray->numoflargelcpvalues.
                                           valueunsignedlong,
                                           sizeof (*suffixarray->llvtab),
                                           err);
        if (suffixarray->llvtab == NULL)
        {
          haserr = true;
        }
      } else
      {
        INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue,
                         GT_LARGELCPTABSUFFIX);
      }
    }
  }
  if (!haserr && (demand & SARR_BWTTAB))
  {
    if (map)
    {
      suffixarray->bwttab
        = gt_fa_mmap_check_size_with_suffix(indexname,
                                         GT_BWTTABSUFFIX,
                                         totallength+1,
                                         sizeof (*suffixarray->bwttab),
                                         err);
      if (suffixarray->bwttab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar,
                       GT_BWTTABSUFFIX);
    }
  }
  if (!haserr && (demand & SARR_BCKTAB))
  {
    suffixarray->bcktab
      = gt_bcktab_map(indexname,
                      gt_encseq_alphabetnumofchars(suffixarray->encseq),
                      suffixarray->prefixlength,
                      totallength+1,
                      true,
                      err);
    if (suffixarray->bcktab == NULL)
    {
      haserr = true;
    }
  }
  if (haserr)
  {
    gt_freesuffixarray(suffixarray);
  }
  return haserr ? -1 : 0;
}
コード例 #5
0
ファイル: gt_encseq2spm.c プロジェクト: 9beckert/TIR
static int gt_encseq2spm_runner(GT_UNUSED int argc,
                                GT_UNUSED const char **argv,
                                GT_UNUSED int parsed_args,
                                void *tool_arguments,
                                GtError *err)
{
    GtEncseq2spmArguments *arguments = tool_arguments;
    GtEncseqLoader *el = NULL;
    GtEncseq *encseq = NULL;
    bool haserr = false;

    gt_error_check(err);
    gt_assert(arguments);
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    encseq = gt_encseq_loader_load(el, gt_str_get(arguments->encseqinput),
                                   err);
    if (encseq == NULL)
    {
        haserr = true;
    }
    if (!haserr)
    {
        if (arguments->singlestrand)
        {
            gt_error_set(err,"option -singlestand is not implemented");
            haserr = true;
        } else
        {
            if (gt_encseq_mirror(encseq, err) != 0)
            {
                haserr = true;
            }
        }
    }

    if (!haserr && arguments->singlescan > 0)
    {
        GtTimer *timer = NULL;

        if (gt_showtime_enabled())
        {
            char *outmsg;

            switch (arguments->singlescan)
            {
            case 1:
                outmsg = "to run fast scanning";
                break;
            case 2:
                outmsg = "to run fast scanning with check";
                break;
            case 3:
                outmsg = "to run fast scanning with output";
                break;
            case 4:
                outmsg = "to run old scanning code";
                break;
            default:
                gt_error_set(err,"argument %u to option -singlescan not allowed",
                             arguments->singlescan);
                haserr = true;
            }
            if (!haserr)
            {
                timer = gt_timer_new_with_progress_description(outmsg);
                gt_timer_start(timer);
            }
        }
        if (!haserr)
        {
            unsigned int kmersize = 0;
            haserr = gt_encseq2spm_kmersize(arguments, &kmersize, err);
            if (!haserr)
            {
                if (arguments->singlescan == 4U)
                {
                    gt_rungetencseqkmers(encseq,kmersize);
                } else
                {
                    if (arguments->singlescan > 0)
                    {
                        gt_firstcode_runkmerscan(encseq,arguments->singlescan - 1,kmersize,
                                                 arguments->minmatchlength);
                    }
                }
            }
        }
        if (timer != NULL)
        {
            gt_timer_show_progress_final(timer, stdout);
            gt_timer_delete(timer);
        }
    }
    if (!haserr && arguments->singlescan == 0)
    {
        GtLogger *logger;
        const GtReadmode readmode = GT_READMODE_FORWARD;
        GtBUstate_spmsk **spmsk_states = NULL;
        unsigned int kmersize, threadcount;

#ifdef GT_THREADS_ENABLED
        const unsigned int threads = gt_jobs;
#else
        const unsigned int threads = 1U;
#endif

        if (arguments->countspms || arguments->outputspms)
        {
            spmsk_states = gt_malloc(sizeof (*spmsk_states) * threads);
            for (threadcount = 0; threadcount < threads; threadcount++)
            {
                spmsk_states[threadcount]
                    = gt_spmsk_inl_new(encseq,
                                       readmode,
                                       (unsigned long) arguments->minmatchlength,
                                       arguments->countspms,
                                       arguments->outputspms,
                                       gt_str_get(arguments->encseqinput));
            }
        }
        logger = gt_logger_new(arguments->verbose,GT_LOGGER_DEFLT_PREFIX, stdout);
        haserr = gt_encseq2spm_kmersize(arguments, &kmersize, err);
        if (!haserr)
        {
            if (storefirstcodes_getencseqkmers_twobitencoding(encseq,
                    kmersize,
                    arguments->numofparts,
                    arguments->maximumspace,
                    arguments->minmatchlength,
                    /* use false */  arguments->checksuftab,
                    /* use false */  arguments->onlyaccum,
                    /* use false */  arguments->
                    onlyallfirstcodes,
                    /* use 5U */     arguments->
                    addbscache_depth,
                    /* specify the extra space needed for
                       the function processing the interval */
                    arguments->phase2extra,
                    /* use true */   arguments->radixlarge ?
                    false : true,
                    /* use 2 without threads and
                       use 1 with threads */
                    arguments->radixparts,
                    spmsk_states != NULL
                    ? gt_spmsk_inl_process
                    : NULL,
                    gt_spmsk_inl_process_end,
                    spmsk_states,
                    logger,
                    err) != 0)
            {
                haserr = true;
            }
        }
        if (spmsk_states != NULL)
        {
            unsigned long countmatches = 0;

            for (threadcount = 0; threadcount < threads; threadcount++)
            {
                countmatches += gt_spmsk_inl_delete(spmsk_states[threadcount]);
            }
            if (arguments->countspms)
            {
                printf("number of suffix-prefix matches=%lu\n",countmatches);
            }
            gt_free(spmsk_states);
        }
        gt_logger_delete(logger);
    }
    gt_encseq_delete(encseq);
    gt_encseq_loader_delete(el);
    return haserr ? -1 : 0;
}
コード例 #6
0
ファイル: shu-encseq-gc.c プロジェクト: 9beckert/TIR
int gt_encseq_gc_unit_test(GtError *err)
{
  int had_err = 0;
  double *results;
  GtEncseqBuilder *eb;
  GtEncseq *encseq;
  const char testseq1[] = "aaaaaa",
             testseq2[] = "cccccc",
             testseq3[] = "acgtacgt",
             testseq4[] = "acgtn";
          /* testseq5[] = "xxxxn"; */
  GtAlphabet *alpha;
/*GtError *tmp_err;*/

  gt_error_check(err);

  alpha = gt_alphabet_new_dna();

  /* test a-seq */
  eb = gt_encseq_builder_new(alpha);
  gt_encseq_builder_create_ssp_tab(eb);
  gt_encseq_builder_enable_description_support(eb);
  gt_encseq_builder_add_cstr(eb, testseq1, 6UL, "only a");
  encseq = gt_encseq_builder_build(eb, err);
  if ((results = gt_encseq_get_gc(encseq,
                                  false,
                                  true,
                                  err)) != NULL)
  {
    gt_ensure(had_err, gt_double_equals_double(results[0], 0.0));
  }
  else
  {
    had_err = -1;
  }
  gt_free(results);
  gt_encseq_builder_delete(eb);
  gt_encseq_delete(encseq);

  if (!had_err)
  {
    /* test c-seq */
    eb = gt_encseq_builder_new(alpha);
    gt_encseq_builder_create_ssp_tab(eb);
    gt_encseq_builder_enable_description_support(eb);
    gt_encseq_builder_add_cstr(eb, testseq2, 6UL, "only c");
    encseq = gt_encseq_builder_build(eb, err);
    if ((results = gt_encseq_get_gc(encseq,
                                    false,
                                    true,
                                    err)) != NULL)
    {
      gt_ensure(had_err, gt_double_equals_one(results[0]));
    }
    else
    {
      had_err = -1;
    }
    gt_free(results);
    gt_encseq_builder_delete(eb);
    gt_encseq_delete(encseq);
  }

  if (!had_err)
  {
    /* test dna-seq and dna+special-seq*/
    eb = gt_encseq_builder_new(alpha);
    gt_encseq_builder_create_ssp_tab(eb);
    gt_encseq_builder_enable_description_support(eb);
    gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5");
    gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special");
    encseq = gt_encseq_builder_build(eb, err);
    if ((results = gt_encseq_get_gc(encseq,
                                    false,
                                    true,
                                    err)) != NULL)
    {
      gt_ensure(had_err, gt_double_equals_double(results[0], 0.5));
      gt_ensure(had_err, gt_double_equals_double(results[1], 0.5));
    }
    else
    {
      had_err = -1;
    }
    gt_free(results);

    if (!had_err)
    {
      /* count special chars */
      if ((results = gt_encseq_get_gc(encseq,
                                      true,
                                      true,
                                      err)) != NULL)
      {
        gt_ensure(had_err, gt_double_equals_double(results[0], 0.5));
        gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0)));
      }
      else
      {
        had_err = -1;
      }
      gt_free(results);
    }

    gt_encseq_builder_delete(eb);
    gt_encseq_delete(encseq);
  }

  if (!had_err)
    {
      /* test dna-seq and dna+special-seq*/
      eb = gt_encseq_builder_new(alpha);
      gt_encseq_builder_create_ssp_tab(eb);
      gt_encseq_builder_enable_description_support(eb);
      gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5");
      gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special");
      encseq = gt_encseq_builder_build(eb, err);
      /*add mirrored sequence*/
      had_err = gt_encseq_mirror(encseq, err);
      /* sequence wise */
      if ((results = gt_encseq_get_gc(encseq,
                                      false,
                                      true,
                                      err)) != NULL)
      {
        gt_ensure(had_err, gt_double_equals_double(results[0], 0.5));
        gt_ensure(had_err, gt_double_equals_double(results[1], 0.5));
        gt_ensure(had_err, gt_double_equals_double(results[2], 0.5));
        gt_ensure(had_err, gt_double_equals_double(results[3], 0.5));
      }
      else
      {
        had_err = -1;
      }
      gt_free(results);

      if (!had_err)
      {
        /* count special chars */
        if ((results = gt_encseq_get_gc(encseq,
                                        true,
                                        true,
                                        err)) != NULL)
        {
          gt_ensure(had_err, gt_double_equals_double(results[0], 0.5));
          gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0)));
          gt_ensure(had_err, gt_double_equals_double(results[2], (2.0/5.0)));
          gt_ensure(had_err, gt_double_equals_double(results[3], 0.5));
        }
        else
        {
          had_err = -1;
        }
        gt_free(results);
      }
      gt_encseq_builder_delete(eb);
      gt_encseq_delete(encseq);
    }
    gt_alphabet_delete(alpha);
  return had_err;
}