コード例 #1
0
ファイル: eis-bwtseq.c プロジェクト: AnnSeidel/genometools
enum verifyBWTSeqErrCode
gt_BWTSeqVerifyIntegrity(BWTSeq *bwtSeq, const char *projectName,
                      int checkFlags,
                      GtUword tickPrint, FILE *fp,
                      GtLogger *verbosity, GtError *err)
{
  Suffixarray suffixArray;
  struct extBitsRetrieval extBits;
  bool suffixArrayIsInitialized = false, extBitsAreInitialized = false;
  enum verifyBWTSeqErrCode retval = VERIFY_BWTSEQ_NO_ERROR;
  do
  {
    GtUword seqLen;
    gt_assert(bwtSeq && projectName && err);
    gt_error_check(err);

    initExtBitsRetrieval(&extBits);
    extBitsAreInitialized = true;

    if (gt_mapsuffixarray(&suffixArray,
                       SARR_SUFTAB | SARR_ESQTAB, projectName, verbosity, err))
    {
      gt_error_set(err, "Cannot load reference suffix array project with"
                    " demand for suffix table file and encoded sequence"
                    " for project: %s", projectName);
      retval = VERIFY_BWTSEQ_REFLOAD_ERROR;
      break;
    }
    suffixArrayIsInitialized = true;
    seqLen = gt_encseq_total_length(suffixArray.encseq) + 1;
    if (BWTSeqLength(bwtSeq) != seqLen)
    {
      gt_error_set(err, "length mismatch for suffix array project %s and "
                "bwt sequence index", projectName);
      retval = VERIFY_BWTSEQ_LENCOMPARE_ERROR;
      break;
    }

    if (checkFlags & VERIFY_BWTSEQ_SUFVAL
        && BWTSeqHasLocateInformation(bwtSeq))
    {
      GtUword i;
      for (i = 0; i < seqLen && retval == VERIFY_BWTSEQ_NO_ERROR; ++i)
      {
        if (gt_BWTSeqPosHasLocateInfo(bwtSeq, i, &extBits))
        {
          GtUword sfxArrayValue = gt_BWTSeqLocateMatch(bwtSeq, i,
                                                             &extBits);
          if (sfxArrayValue != ESASUFFIXPTRGET(suffixArray.suftab,i))
          {
            gt_error_set(err, "Failed suffix array value comparison"
                          " at position "GT_WU": "GT_WU" != "GT_WU"",
                          i, sfxArrayValue,
                          ESASUFFIXPTRGET(suffixArray.suftab,i));
            retval = VERIFY_BWTSEQ_SUFVAL_ERROR;
            break;
          }
        }
        if (tickPrint && !((i + 1) % tickPrint))
          putc('.', fp);
      }
      if (tickPrint)
        putc('\n', fp);
      if (retval != VERIFY_BWTSEQ_NO_ERROR)
        break;
    }
    else if (checkFlags & VERIFY_BWTSEQ_SUFVAL)
    {
      gt_error_set(err, "check of suffix array values was requested,"
                " but index contains no  locate information!");
      retval = VERIFY_BWTSEQ_SUFVAL_ERROR;
      break;
    }
    else if (!(checkFlags & VERIFY_BWTSEQ_SUFVAL)
             && BWTSeqHasLocateInformation(bwtSeq))
    {
      fputs("Not checking suftab values.\n", stderr);
    }
    if (BWTSeqHasLocateInformation(bwtSeq))
    {
      GtUword nextLocate = BWTSeqTerminatorPos(bwtSeq);
      if (suffixArray.longest.defined &&
          suffixArray.longest.valueunsignedlong != nextLocate)
      {
        gt_error_set(err, "terminator/0-rotation position mismatch "GT_WU""
                  " vs. "GT_WU"", suffixArray.longest.valueunsignedlong,
                  nextLocate);
        retval = VERIFY_BWTSEQ_TERMPOS_ERROR;
        break;
      }
      if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK)
          && (bwtSeq->featureToggles & BWTReversiblySorted))
      {
        GtUword i = seqLen;
        /* handle first symbol specially because the encseq
         * will not return the terminator symbol */
        {
          Symbol sym = BWTSeqGetSym(bwtSeq, nextLocate);
          if (sym != UNDEFBWTCHAR)
          {
            gt_error_set(err, "symbol mismatch at position "GT_WU": "
                      "%d vs. reference symbol %d", i - 1, (int)sym,
                      (int)UNDEFBWTCHAR);
            retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR;
            break;
          }
          --i;
          nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits);
        }
        while (i > 0)
        {
          Symbol symRef =
                         gt_encseq_get_encoded_char(suffixArray.encseq,
                                                          --i,
                                                          suffixArray.readmode);
          Symbol symCmp = BWTSeqGetSym(bwtSeq, nextLocate);
          if (symCmp != symRef)
          {
            gt_error_set(err, "symbol mismatch at position "GT_WU": "
                      "%d vs. reference symbol %d", i, symCmp, symRef);
            retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR;
            break;
          }
          nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits);
        }
        if (retval != VERIFY_BWTSEQ_NO_ERROR)
          break;
      }
      else if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK)
               && !(bwtSeq->featureToggles & BWTReversiblySorted))
      {
        gt_error_set(err, "requested complete backwards regeneration in index"
                  " without regeneration capability");
        retval = VERIFY_BWTSEQ_LFMAPWALK_IMP_ERROR;
        break;
      }
    }
    if (checkFlags & VERIFY_BWTSEQ_CONTEXT)
    {
      BWTSeqContextRetriever *bwtSeqCR =
        gt_BWTSeqCRLoad(bwtSeq, projectName, CTX_MAP_ILOG_AUTOSIZE);
      if (!bwtSeqCR)
      {
        gt_error_set(err, "cannot load BWT sequence context access table"
                  " for project %s", projectName);
        retval = VERIFY_BWTSEQ_CONTEXT_LOADFAIL;
        break;
      }
      fputs("Checking context regeneration.\n", stderr);
      {
        GtUword i, start, subSeqLen,
          maxSubSeqLen = MIN(MAX(MIN_CONTEXT_LEN, seqLen/CONTEXT_FRACTION),
                             MAX_CONTEXT_LEN),
          numTries = MIN(MAX_NUM_CONTEXT_CHECKS,
                         MAX(2, seqLen/CONTEXT_INTERVAL));
        Symbol *contextBuf = gt_malloc(sizeof (Symbol) * MAX_CONTEXT_LEN);
        GtEncseqReader *esr =
           gt_encseq_create_reader_with_readmode(suffixArray.encseq,
                                                 suffixArray.readmode,
                                                 0);
        for (i = 0; i < numTries && retval == VERIFY_BWTSEQ_NO_ERROR; ++i)
        {
          GtUword j, end, inSubSeqLen;
          subSeqLen = random()%maxSubSeqLen + 1;
          start = random()%(seqLen - subSeqLen + 1);
          end = start + subSeqLen;
          inSubSeqLen = subSeqLen - ((end==seqLen)?1:0);
          gt_BWTSeqCRAccessSubseq(bwtSeqCR, start, subSeqLen, contextBuf);
          gt_encseq_reader_reinit_with_readmode(esr, suffixArray.encseq,
                                                suffixArray.readmode, start);
          for (j = 0; j < inSubSeqLen; ++j)
          {
            Symbol symRef = gt_encseq_reader_next_encoded_char(esr);
            Symbol symCmp = contextBuf[j];
            if (symCmp != symRef)
            {
              gt_error_set(err, "symbol mismatch at position "GT_WU": "
                        "%d vs. reference symbol %d", start + j, (int)symCmp,
                        (int)symRef);
              retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL;
              break;
            }
          }
          while (j < subSeqLen)
          {
            Symbol symRef = UNDEFBWTCHAR;
            Symbol symCmp = contextBuf[j];
            if (symCmp != symRef)
            {
              gt_error_set(err, "symbol mismatch at position "GT_WU": "
                        "%d vs. reference symbol %d", start + j, (int)symCmp,
                        (int)symRef);
              retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL;
              break;
            }
            ++j;
          }
        }
        if (retval == VERIFY_BWTSEQ_NO_ERROR)
          fputs("Context regeneration completed successfully.\n", stderr);
        gt_encseq_reader_delete(esr);
        gt_free(contextBuf);
      }
      gt_deleteBWTSeqCR(bwtSeqCR);
    }
  } while (0);
  if (suffixArrayIsInitialized) gt_freesuffixarray(&suffixArray);
  if (extBitsAreInitialized) destructExtBitsRetrieval(&extBits);
  return retval;
}
コード例 #2
0
ファイル: hcr.c プロジェクト: AnnSeidel/genometools
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name,
                                            GtError *err)
{
  int had_err = 0;
  GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder));
  GtBaseQualDistr *bqd = NULL;
  GtWord end_enc_start_sampling = 0;
  FILE *fp = NULL;
  GT_UNUSED size_t read;
  GT_UNUSED const size_t one = (size_t) 1;

  seq_dec->alpha = alpha;
  seq_dec->alphabet_size = gt_alphabet_size(alpha);
  seq_dec->cur_read = 0;
  seq_dec->data_iter = NULL;
  seq_dec->file_info_rbt = NULL;
  seq_dec->fileinfos = NULL;
  seq_dec->filename = gt_str_new_cstr(name);
  seq_dec->huff_dec = NULL;
  seq_dec->huffman = NULL;
  seq_dec->sampling = NULL;
  seq_dec->symbols = NULL;
  gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX);

  fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err);
  if (fp == NULL) {
    had_err = -1;
    hcr_seq_decoder_delete(seq_dec);
    seq_dec = NULL;
  }

  if (!had_err) {
    hcr_read_file_info(seq_dec, fp);

    bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha);
    seq_dec->qual_offset = bqd->qual_offset;

    read = gt_xfread_one(&end_enc_start_sampling, fp);
    gt_assert(read == one);

    seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp);

    had_err = seq_decoder_init_huffman(seq_dec,
                                       end_enc_start_sampling, bqd, err);
    if (had_err) {
      hcr_seq_decoder_delete(seq_dec);
      seq_dec = NULL;
    }
  }

  if (!had_err) {
    size_t pos;
    gt_xfseek(fp, 0, SEEK_END);
    pos = ftell(fp);

    gt_xfseek(fp, end_enc_start_sampling, SEEK_SET);
    if (end_enc_start_sampling < pos)
      seq_dec->sampling = gt_sampling_read(fp);
    else
      seq_dec->sampling = NULL;

    seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos,
                                                        seq_dec->num_of_files);
  }

  hcr_base_qual_distr_delete(bqd);
  gt_fa_fclose(fp);
  return seq_dec;
}
コード例 #3
0
ファイル: gt_seqorder.c プロジェクト: joergi-w/genometools
static int gt_seqorder_runner(GT_UNUSED int argc,
                              const char **argv,
                              int parsed_args,
                              void *tool_arguments,
                              GtError *err)
{
  GtSeqorderArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq *encseq;
  GtEncseqLoader *loader;
  GtUword i, nofseqs;

  gt_error_check(err);
  gt_assert(arguments != NULL);

  /* load encseq */
  loader = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(loader, argv[parsed_args], err);
  if (encseq == NULL)
    had_err = -1;
  if (had_err == 0 && !gt_encseq_has_description_support(encseq))
    gt_warning("%s has no description support", argv[parsed_args]);
  if (!had_err)
  {
    nofseqs = gt_encseq_num_of_sequences(encseq);
    if (arguments->invert)
    {
      for (i = nofseqs; i > 0; i--)
        gt_seqorder_output(i - 1, encseq);
    }
    else if (arguments->shuffle)
    {
      GtUword *seqnums;
      seqnums = gt_malloc(sizeof (GtUword) * nofseqs);
      gt_seqorder_get_shuffled_seqnums(nofseqs, seqnums);
      for (i = 0; i < nofseqs; i++)
        gt_seqorder_output(seqnums[i], encseq);
      gt_free(seqnums);
    }
    else if (arguments->sorthdr)
    {
      GtUword *seqnums;
      seqnums = gt_malloc(sizeof (GtUword) * nofseqs);
      gt_seqorder_get_hdrsorted_seqnums(encseq, seqnums,
                                        seqorder_str_compare_lex);
      for (i = 0; i < nofseqs; i++)
        gt_seqorder_output(seqnums[i], encseq);
      gt_free(seqnums);
    }
    else if (arguments->sorthdrnum)
    {
      GtUword *seqnums;
      seqnums = gt_malloc(sizeof (GtUword) * nofseqs);
      gt_seqorder_get_hdrsorted_seqnums(encseq, seqnums,
                                        seqorder_str_compare_num);
      for (i = 0; i < nofseqs; i++)
        gt_seqorder_output(seqnums[i], encseq);
      gt_free(seqnums);
    }
    else
    {
      GtSuffixsortspace *suffixsortspace;
      gt_assert(arguments->sort || arguments->revsort);
      suffixsortspace
        = gt_suffixsortspace_new(nofseqs,
                                 /* Use iterator over sequence separators:
                                    saves a lot of binary searches */
                                 gt_encseq_seqstartpos(encseq, nofseqs-1),
                                 false,NULL);
      gt_seqorder_sort(suffixsortspace, encseq);
      if (arguments->sort)
        for (i = 0; i < nofseqs; i++)
          gt_seqorder_output(gt_encseq_seqnum(
                                   encseq,
                                   gt_suffixsortspace_getdirect(suffixsortspace,
                                                                i)),
                             encseq);
      else
        for (i = nofseqs; i > 0; i--)
          gt_seqorder_output(gt_encseq_seqnum(
                                   encseq,
                                   gt_suffixsortspace_getdirect(suffixsortspace,
                                                                i - 1)),
                             encseq);
      gt_suffixsortspace_delete(suffixsortspace, false);
    }
  }

  gt_encseq_loader_delete(loader);
  gt_encseq_delete(encseq);
  return had_err;
}
コード例 #4
0
ファイル: prsqualint.c プロジェクト: AnnSeidel/genometools
Qualifiedinteger *gt_parsequalifiedinteger(const char *option,
                                        const char *lparam,
                                        GtError *err)
{
  GtWord readint = 0;
  size_t i;
  char *lparamcopy;
  bool haserr = false;
  Qualifiedinteger *qualint;

  lparamcopy = gt_malloc(sizeof (char) * (strlen(lparam)+1));
  qualint = gt_malloc(sizeof (*qualint));
  strcpy(lparamcopy,lparam);
  for (i=0; lparamcopy[i] != '\0'; i++)
  {
    if (!isdigit((int) lparamcopy[i]) &&
        lparamcopy[i] != BESTCHARACTER &&
        lparamcopy[i] != PERCENTAWAYCHARACTER)
    {
      ERRORLPARAM;
      haserr = true;
      break;
    }
  }
  if (!haserr && i == 0)
  {
    ERRORLPARAM;
    haserr = true;
  }
  if (!haserr)
  {
    if (lparamcopy[i-1] == BESTCHARACTER)
    {
      lparamcopy[i-1] = '\0';
      qualint->qualtag = Qualbestof;
    } else
    {
      if (lparamcopy[i-1] == PERCENTAWAYCHARACTER)
      {
        lparamcopy[i-1] = '\0';
        qualint->qualtag = Qualpercentaway;
      } else
      {
        qualint->qualtag = Qualabsolute;
      }
    }
    if (sscanf(lparamcopy,""GT_WD"",&readint) != 1 || readint <= 0)
    {
      ERRORLPARAM;
      haserr = true;
    }
  }
  if (!haserr &&
      (qualint->qualtag == Qualpercentaway || qualint->qualtag == Qualbestof))
  {
    if (readint > 100L)
    {
      ERRORLPARAM;
      haserr = true;
    }
  }
  qualint->integervalue = (GtUword) readint;
  gt_free(lparamcopy);
  if (haserr)
  {
    gt_free (qualint);
    return NULL;
  }
  return qualint;
}
コード例 #5
0
int
gt_bitPackStringInt16_unit_test(GtError *err)
{
  BitString bitStore = NULL;
  BitString bitStoreCopy = NULL;
  uint16_t *randSrc = NULL; /*< create random ints here for input as bit
                                *  store */
  uint16_t *randCmp = NULL; /*< used for random ints read back */
  unsigned *numBitsList = NULL;
  size_t i, numRnd;
  BitOffset offsetStart, offset;
  int had_err = 0;
  offset = offsetStart = random()%(sizeof (uint16_t) * CHAR_BIT);
  numRnd = random() % (MAX_RND_NUMS_uint16_t + 1);
  gt_log_log("offset=" GT_WU ", numRnd=" GT_WU "\n",
          (GtUword)offsetStart, (GtUword)numRnd);
  {
    BitOffset numBits = sizeof (uint16_t) * CHAR_BIT * numRnd + offsetStart;
    randSrc = gt_malloc(sizeof (uint16_t)*numRnd);
    bitStore = gt_malloc(bitElemsAllocSize(numBits) * sizeof (BitElem));
    bitStoreCopy = gt_calloc(bitElemsAllocSize(numBits), sizeof (BitElem));
    randCmp = gt_malloc(sizeof (uint16_t)*numRnd);
  }
  /* first test unsigned types */
  gt_log_log("gt_bsStoreUInt16/gt_bsGetUInt16: ");
  for (i = 0; i < numRnd; ++i)
  {
#if 16 > 32 && LONG_BIT < 16
    uint16_t v = randSrc[i] = (uint16_t)random() << 32 | random();
#else /* 16 > 32 && LONG_BIT < 16 */
    uint16_t v = randSrc[i] = random();
#endif /* 16 > 32 && LONG_BIT < 16 */
    int bits = gt_requiredUInt16Bits(v);
    gt_bsStoreUInt16(bitStore, offset, bits, v);
    offset += bits;
  }
  offset = offsetStart;
  for (i = 0; i < numRnd; ++i)
  {
    uint16_t v = randSrc[i];
    int bits = gt_requiredUInt16Bits(v);
    uint16_t r = gt_bsGetUInt16(bitStore, offset, bits);
    gt_ensure(r == v);
    if (had_err)
    {
      gt_log_log("Expected %"PRIu16", got %"PRIu16", i = " GT_WU "\n",
              v, r, (GtUword)i);
      freeResourcesAndReturn(had_err);
    }
    offset += bits;
  }
  gt_log_log("passed\n");
  if (numRnd > 0)
  {
    uint16_t v = randSrc[0], r = 0;
    unsigned numBits = gt_requiredUInt16Bits(v);
    BitOffset i = offsetStart + numBits;
    uint16_t mask = ~(uint16_t)0;
    if (numBits < 16)
      mask = ~(mask << numBits);
    gt_log_log("bsSetBit, gt_bsClearBit, bsToggleBit, gt_bsGetBit: ");
    while (v)
    {
      int lowBit = v & 1;
      v >>= 1;
      gt_ensure(lowBit == (r = gt_bsGetBit(bitStore, --i)));
      if (had_err)
      {
        gt_log_log("Expected %d, got %d, i = "GT_LLU"\n",
                lowBit, (int)r, (GtUint64)i);
        freeResourcesAndReturn(had_err);
      }
    }
    i = offsetStart + numBits;
    gt_bsClear(bitStoreCopy, offsetStart, numBits, random()&1);
    v = randSrc[0];
    while (i)
    {
      int lowBit = v & 1;
      v >>= 1;
      if (lowBit)
        bsSetBit(bitStoreCopy, --i);
      else
        gt_bsClearBit(bitStoreCopy, --i);
    }
    v = randSrc[0];
    r = gt_bsGetUInt16(bitStoreCopy, offsetStart, numBits);
    gt_ensure(r == v);
    if (had_err)
    {
      gt_log_log("Expected %"PRIu16", got %"PRIu16"\n", v, r);
      freeResourcesAndReturn(had_err);
    }
    for (i = 0; i < numBits; ++i)
      bsToggleBit(bitStoreCopy, offsetStart + i);
    r = gt_bsGetUInt16(bitStoreCopy, offsetStart, numBits);
    gt_ensure(r == (v = (~v & mask)));
    if (had_err)
    {
      gt_log_log("Expected %"PRIu16", got %"PRIu16"\n", v, r);
      freeResourcesAndReturn(had_err);
    }
    gt_log_log("passed\n");
  }
コード例 #6
0
static int enumeratelcpintervals(const char *inputindex,
                                 Sequentialsuffixarrayreader *ssar,
                                 const char *storeindex,
                                 bool storecounts,
                                 GtUword mersize,
                                 GtUword minocc,
                                 GtUword maxocc,
                                 bool performtest,
                                 GtLogger *logger,
                                 GtError *err)
{
  TyrDfsstate *state;
  bool haserr = false;
  unsigned int alphasize;

  gt_error_check(err);
  state = gt_malloc(sizeof (*state));
  GT_INITARRAY(&state->occdistribution,Countwithpositions);
  state->esrspace = gt_encseq_create_reader_with_readmode(
                                   gt_encseqSequentialsuffixarrayreader(ssar),
                                   gt_readmodeSequentialsuffixarrayreader(ssar),
                                   0);
  state->mersize = (GtUword) mersize;
  state->encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq));
  state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar);
  state->storecounts = storecounts;
  state->minocc = minocc;
  state->maxocc = maxocc;
  state->totallength = gt_encseq_total_length(state->encseq);
  state->performtest = performtest;
  state->countoutputmers = 0;
  state->merindexfpout = NULL;
  state->countsfilefpout = NULL;
  GT_INITARRAY(&state->largecounts,Largecount);
  if (strlen(storeindex) == 0)
  {
    state->sizeofbuffer = 0;
    state->bytebuffer = NULL;
  } else
  {
    state->sizeofbuffer = MERBYTES(mersize);
    state->bytebuffer = gt_malloc(sizeof *state->bytebuffer
                                  * state->sizeofbuffer);
  }
  if (performtest)
  {
    state->currentmer = gt_malloc(sizeof *state->currentmer
                                  * state->mersize);
    state->suftab = gt_suftabSequentialsuffixarrayreader(ssar);
  } else
  {
    state->currentmer = NULL;
    state->suftab = NULL;
  }
  if (state->mersize > state->totallength)
  {
    gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed",
                 state->mersize,
                 state->totallength);
    haserr = true;
  } else
  {
    if (strlen(storeindex) == 0)
    {
      state->processoccurrencecount = adddistpos2distribution;
    } else
    {
      state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX,
                                                    "wb",err);
      if (state->merindexfpout == NULL)
      {
        haserr = true;
      } else
      {
        if (state->storecounts)
        {
          state->countsfilefpout
            = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err);
          if (state->countsfilefpout == NULL)
          {
            haserr = true;
          }
        }
      }
      state->processoccurrencecount = outputsortedstring2index;
    }
    if (!haserr)
    {
      if (gt_depthfirstesa(ssar,
                          tyr_allocateDfsinfo,
                          tyr_freeDfsinfo,
                          tyr_processleafedge,
                          NULL,
                          tyr_processcompletenode,
                          tyr_assignleftmostleaf,
                          tyr_assignrightmostleaf,
                          (Dfsstate*) state,
                          logger,
                          err) != 0)
      {
        haserr = true;
      }
      if (strlen(storeindex) == 0)
      {
        showfinalstatistics(state,inputindex,logger);
      }
    }
    if (!haserr)
    {
      if (state->countsfilefpout != NULL)
      {
        gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU
                      " to file \"%s%s\"",
                      state->largecounts.nextfreeLargecount,
                      (GtUword) MAXSMALLMERCOUNT,
                      storeindex,
                      COUNTSSUFFIX);
        gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount),
                  (size_t) state->largecounts.nextfreeLargecount,
                  state->countsfilefpout);
      }
    }
    if (!haserr)
    {
      gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"",
                  mersize,
                  state->countoutputmers);
      gt_logger_log(logger,"index size: %.2f megabytes\n",
                  GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer +
                               sizeof (GtUword) * EXTRAINTEGERS));
    }
  }
  /* now out EXTRAINTEGERS integer values */
  if (!haserr && state->merindexfpout != NULL)
  {
    outputbytewiseUlongvalue(state->merindexfpout,
                             (GtUword) state->mersize);
    outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize);
  }
  gt_fa_xfclose(state->merindexfpout);
  gt_fa_xfclose(state->countsfilefpout);
  GT_FREEARRAY(&state->occdistribution,Countwithpositions);
  gt_free(state->currentmer);
  gt_free(state->bytebuffer);
  GT_FREEARRAY(&state->largecounts,Largecount);
  gt_encseq_reader_delete(state->esrspace);
  gt_free(state);
  return haserr ? -1 : 0;
}
コード例 #7
0
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  unsigned long i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_LTR_retrotransposon) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    unsigned long seqlen;
    char translated, *rev_seq;
    FILE *instream;
    GtHMMERParseStatus *pstatus;
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          gt_symbol(gt_ft_LTR_retrotransposon),
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">%lu%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">%lu%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
コード例 #8
0
static int gt_tyr_occratio_arguments_check(int rest_argc,
                                           void *tool_arguments,
                                           GtError *err)
{
  Tyr_occratio_options *arguments = tool_arguments;
  bool haserr = false;

  Optionargmodedesc outputmodedesctable[] =
  {
    {"unique","number of unique mers",TYROCC_OUTPUTUNIQUE},
    {"nonunique","number of nonunique mers (single count)",
                 TYROCC_OUTPUTNONUNIQUE},
    {"nonuniquemulti","number of nonunique mers (multi count)",
                 TYROCC_OUTPUTNONUNIQUEMULTI},
    {"relative","fraction of unique/non-unique mers relative to all mers",
                 TYROCC_OUTPUTRELATIVE},
    {"total","number of all mers",TYROCC_OUTPUTTOTAL}
  };
  if (rest_argc != 0)
  {
    gt_error_set(err,"superfluous arguments");
    return -1;
  }
  if (gt_option_is_set(arguments->refoptionmersizes))
  {
    unsigned long *mersizes = NULL;
    unsigned long idx,
                  numofmersizes = gt_str_array_size(arguments->mersizesstrings);
    if (numofmersizes == 0)
    {
      gt_error_set(err,"missing argument to option -mersizes:");
      haserr = true;
    } else
    {
      mersizes = gt_malloc(sizeof(*mersizes) * numofmersizes);
      for (idx=0; idx<numofmersizes; idx++)
      {
        long readnum;

        if (sscanf(gt_str_array_get(arguments->mersizesstrings,idx),
                   "%ld",&readnum) != 1 || readnum <= 0)
        {
          gt_error_set(err,"invalid argument \"%s\" of option -mersizes: "
                       "must be a positive integer",
                       gt_str_array_get(arguments->mersizesstrings,idx));
          haserr = true;
          break;
        }
        mersizes[idx] = (unsigned long) readnum;
        if (idx > 0 && mersizes[idx-1] >= mersizes[idx])
        {
          gt_error_set(err,"invalid argumnt %s to option -mersizes: "
                       "positive numbers must be strictly increasing",
                       gt_str_array_get(arguments->mersizesstrings,idx));
          haserr = true;
          break;
        }
      }
    }
    if (!haserr)
    {
      gt_assert(mersizes != NULL);
      arguments->minmersize = mersizes[0];
      arguments->maxmersize = mersizes[numofmersizes-1];
      INITBITTAB(arguments->outputvector,arguments->maxmersize+1);
      for (idx=0; idx<numofmersizes; idx++)
      {
        SETIBIT(arguments->outputvector,mersizes[idx]);
      }
    }
    gt_free(mersizes);
  } else
  {
    if (arguments->minmersize == 0)
    {
      gt_error_set(err,"if option -mersizes is not used, then option "
                       "-minmersize is mandatory");
      haserr = true;
    }
    if (!haserr)
    {
      if (arguments->maxmersize == 0)
      {
        gt_error_set(err,"if option -mersizes is not used, then option "
                         "-maxmersize is mandatory");
        haserr = true;
      }
    }
    if (!haserr)
    {
      if (arguments->minmersize > arguments->maxmersize)
      {
        gt_error_set(err,"minimum mer size must not be larger than "
                         "maximum mer size");
        haserr = true;
      }
    }
    if (!haserr)
    {
      if (arguments->minmersize+arguments->stepmersize > arguments->maxmersize)
      {
        gt_error_set(err,"minimum mer size + step value must be smaller or "
                         "equal to maximum mersize");
        haserr = true;
      }
    }
    if (!haserr)
    {
      unsigned long outputval;

      INITBITTAB(arguments->outputvector,arguments->maxmersize+1);
      for (outputval = arguments->minmersize;
           outputval <= arguments->maxmersize;
           outputval += arguments->stepmersize)
      {
        SETIBIT(arguments->outputvector,outputval);
      }
    }
  }
  if (!haserr)
  {
    unsigned long idx;
    for (idx=0; idx<gt_str_array_size(arguments->outputspec); idx++)
    {
      if (optionargaddbitmask(outputmodedesctable,
                           sizeof (outputmodedesctable)/
                           sizeof (outputmodedesctable[0]),
                           &arguments->outputmode,
                           "-output",
                           gt_str_array_get(arguments->outputspec,idx),
                           err) != 0)
      {
        haserr = true;
        break;
      }
    }
  }
  if (!haserr)
  {
    if ((arguments->outputmode & TYROCC_OUTPUTRELATIVE) &&
        !(arguments->outputmode &
            (TYROCC_OUTPUTUNIQUE | TYROCC_OUTPUTNONUNIQUE |
                                   TYROCC_OUTPUTNONUNIQUEMULTI)))
    {
      gt_error_set(err,"argument relative to option -output requires that one "
                   "of the arguments unique, nonunique, or nonuniquemulti "
                   "is used");
      haserr = true;
    }
  }
  return haserr ? - 1: 0;
}
コード例 #9
0
static GtOPrval parse_options(int *parsed_args,
                              Cmppairwiseopt *pw,
                              int argc, const char **argv, GtError *err)
{
  GtOptionParser *op;
  GtOption *optionstrings,
         *optionfiles,
         *optioncharlistlen,
         *optiontext,
         *optionshowedist,
         *optionprint;
  GtStrArray *charlistlen;
  GtOPrval oprval;

  gt_error_check(err);
  charlistlen = gt_str_array_new();
  pw->strings = gt_str_array_new();
  pw->files = gt_str_array_new();
  pw->text = gt_str_new();
  pw->charlistlen = NULL;
  pw->fastasequences0 = NULL;
  pw->fastasequences1 = NULL;
  pw->showedist = false;
  pw->print = false;
  pw->fasta = false;
  op = gt_option_parser_new("options", "Apply function to pairs of strings.");
  gt_option_parser_set_mail_address(op, "<*****@*****.**>");

  optionstrings = gt_option_new_string_array("ss", "use two strings",
                                             pw->strings);
  gt_option_parser_add_option(op, optionstrings);

  optionfiles = gt_option_new_filename_array("ff", "use two files",
                                             pw->files);
  gt_option_parser_add_option(op, optionfiles);

  optioncharlistlen = gt_option_new_string_array("a",
                                             "use character list and length",
                                             charlistlen);
  gt_option_parser_add_option(op, optioncharlistlen);

  optiontext = gt_option_new_string("t", "use text", pw->text, NULL);
  gt_option_parser_add_option(op, optiontext);

  optionshowedist = gt_option_new_bool("e", "output unit edit distance",
                      &pw->showedist, false);
  gt_option_parser_add_option(op, optionshowedist);

  optionprint = gt_option_new_bool("p", "print edist alignment",
                      &pw->print, false);
  gt_option_parser_add_option(op, optionprint);

  gt_option_exclude(optionstrings, optionfiles);
  gt_option_exclude(optionstrings, optioncharlistlen);
  gt_option_exclude(optionstrings, optiontext);
  gt_option_exclude(optionfiles, optioncharlistlen);
  gt_option_exclude(optionfiles, optiontext);
  gt_option_exclude(optioncharlistlen, optiontext);
  gt_option_imply(optionshowedist, optionstrings);
  gt_option_imply(optionprint, optionstrings);

  oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc,
                                  err);
  if (oprval == GT_OPTION_PARSER_OK)
  {
    if (gt_option_is_set(optionstrings))
    {
      if (gt_str_array_size(pw->strings) != 2UL)
      {
        gt_error_set(err, "option -ss requires two string arguments");
        oprval = GT_OPTION_PARSER_ERROR;
      }
    } else
    {
      if (gt_option_is_set(optionfiles))
      {
        if (gt_str_array_size(pw->files) != 2UL)
        {
          if (gt_str_array_size(pw->files) == 3UL &&
              !strcmp(gt_str_array_get(pw->files,0),"fasta"))
          {
            pw->fasta = true;
          }
          if (!pw->fasta)
          {
            gt_error_set(err, "option -ff requires two filename arguments or "
                              "keyword fasta and two filename arguments in "
                              "FASTA format");
            oprval = GT_OPTION_PARSER_ERROR;
          }
        }
      } else
      {
        if (gt_option_is_set(optioncharlistlen))
        {
          GtWord readint;
          if (gt_str_array_size(charlistlen) != 2UL)
          {
            gt_error_set(err,
                         "option -a requires charlist and length argument");
            oprval = GT_OPTION_PARSER_ERROR;
          }else
          {
            pw->charlistlen = gt_malloc(sizeof *pw->charlistlen);
            pw->charlistlen->charlist =
              gt_str_ref(gt_str_array_get_str(charlistlen,
                                                                    0));
            if (sscanf(gt_str_array_get(charlistlen,1UL), GT_WD, &readint) != 1
                || readint < 1L)
            {
              gt_error_set(err,
                           "option -a requires charlist and length argument");
              oprval = GT_OPTION_PARSER_ERROR;
            }
            pw->charlistlen->len = (GtUword) readint;
          }
        } else
        {
          if (!gt_option_is_set(optiontext))
          {
            gt_error_set(err,
                         "use exactly one of the options -ss, -ff, -a, -t");
            oprval = GT_OPTION_PARSER_ERROR;
          }
        }
      }
    }
  }
  gt_option_parser_delete(op);
  if (oprval == GT_OPTION_PARSER_OK && *parsed_args != argc)
  {
    gt_error_set(err, "superfluous program parameters");
    oprval = GT_OPTION_PARSER_ERROR;
  }
  gt_str_array_delete(charlistlen);
  return oprval;
}
コード例 #10
0
ファイル: gtr.c プロジェクト: yesimon/genometools
int gtr_run(GtR *gtr, int argc, const char **argv, GtError *err)
{
  GtToolfunc toolfunc;
  GtTool *tool = NULL;
  char **nargv = NULL;
  void *mem, *map;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gtr);
  if (gtr->debug)
    enable_logging(gt_str_get(gtr->debugfp), &gtr->logfp);
  if (gtr->quiet)
    gt_warning_disable();
  gtr->seed = gt_ya_rand_init(gtr->seed);
  gt_log_log("seed=%u", gtr->seed);
  if (gtr->list)
    return list_tools(gtr);
  if (gt_str_length(gtr->manoutdir) > 0)
    return create_manpages(gtr, gt_str_get(gtr->manoutdir), err);
  if (gtr->check64bit)
    return check64bit();
  if (gtr->test)
    return run_tests(gtr, err);
  if (gt_str_length(gtr->testspacepeak)) {
    mem = gt_malloc(1 << 26); /* alloc 64 MB */;
    map = gt_fa_xmmap_read(gt_str_get(gtr->testspacepeak), NULL);
    gt_fa_xmunmap(map);
    gt_free(mem);
  }
  if (argc == 0 && !gtr->interactive) {
    gt_error_set(err, "neither tool nor script specified; option -help lists "
                      "possible tools");
    had_err = -1;
  }
  if (!had_err && argc) {
    if (!gtr->tools || !gt_toolbox_has_tool(gtr->tools, argv[0])) {
      /* no tool found -> try to open script */
      if (gt_file_exists(argv[0])) {
        /* export script */
        gt_lua_set_script_dir(gtr->L, argv[0]);
        /* run script */
        nargv = gt_cstr_array_prefix_first(argv, gt_error_get_progname(err));
        gt_lua_set_arg(gtr->L, nargv[0], (const char**) nargv+1);
        if (luaL_dofile(gtr->L, argv[0])) {
          /* error */
          gt_assert(lua_isstring(gtr->L, -1)); /* error message on top */
          gt_error_set(err, "could not execute script %s",
                       lua_tostring(gtr->L, -1));
          had_err = -1;
          lua_pop(gtr->L, 1); /* pop error message */
        }
      }
      else {
        /* neither tool nor script found */
        gt_error_set(err, "neither tool nor script '%s' found; option -help "
                          "lists possible tools", argv[0]);
        had_err = -1;
      }
    }
    else {
      /* run tool */
      if (!(toolfunc = gt_toolbox_get(gtr->tools, argv[0]))) {
        tool = gt_toolbox_get_tool(gtr->tools, argv[0]);
        gt_assert(tool);
      }
      nargv = gt_cstr_array_prefix_first(argv, gt_error_get_progname(err));
      gt_error_set_progname(err, nargv[0]);
      if (toolfunc)
        had_err = toolfunc(argc, (const char**) nargv, err);
      else
        had_err = gt_tool_run(tool, argc, (const char**) nargv, err);
    }
  }
  gt_cstr_array_delete(nargv);
  if (!had_err && gtr->interactive) {
    gt_showshortversion(gt_error_get_progname(err));
    gt_lua_set_arg(gtr->L, gt_error_get_progname(err), argv);
    run_interactive_lua_interpreter(gtr->L);
  }
  if (had_err)
    return EXIT_FAILURE;
  return EXIT_SUCCESS;
}
コード例 #11
0
GtQuerymatch *gt_querymatch_new(void)
{
  return gt_malloc(sizeof (GtQuerymatch));
}
コード例 #12
0
static int gt_condenser_search_runner(GT_UNUSED int argc,
                                      GT_UNUSED const char **argv,
                                      GT_UNUSED int parsed_args,
                                      void *tool_arguments,
                                      GtError *err)
{
  GtCondenserSearchArguments *arguments = tool_arguments;
  int i, had_err = 0;
  char *querypath = gt_str_get(arguments->querypath);
  GtStr* coarse_fname = gt_str_new_cstr("coarse_");
  char *db_basename = NULL;
  char *suffix_ptr = NULL;
  GtTimer *timer = NULL;
  GtLogger *logger = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  db_basename = gt_basename(gt_str_get(arguments->dbpath));
  /* if first char is '.' this might be a hidden file */
  if (strlen(db_basename) > (size_t) 1 &&
      (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) {
    /* remove suffix */
    *suffix_ptr = '\0';
  }
  gt_str_append_cstr(coarse_fname, db_basename);
  gt_str_append_cstr(coarse_fname, ".fas");
  gt_free(db_basename);
  db_basename = NULL;
  suffix_ptr = NULL;

  if (arguments->blastn || arguments->blastp) {
    GtMatch              *match;
    GtMatchIterator      *mp = NULL;
    GtNREncseq           *nrencseq = NULL;
    GtStr                *fastaname = gt_str_clone(arguments->dbpath);
    HitPosition          *hits;
    double                eval,
                          raw_eval = 0.0;
    GtUword               coarse_db_len = 0;
    GtMatchIteratorStatus status;
    int                   curr_hits = 0,
                          max_hits = 100;

    hits = gt_malloc(sizeof (*hits) * (size_t) max_hits);

    gt_str_append_cstr(fastaname, ".fas");

    for (i=0; i < max_hits; i++) {
      hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1);
    }

    if (gt_showtime_enabled()) {
      timer = gt_timer_new_with_progress_description("initialization");
      gt_timer_start(timer);
    }

    /*extract sequences from compressed database*/
    if (!had_err) {
      nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath),
                                             logger, err);
      if (nrencseq == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->ceval == GT_UNDEF_DOUBLE ||
          arguments->feval == GT_UNDEF_DOUBLE) {
        /* from NCBI BLAST tutorial:
           E = Kmne^{-lambdaS}
           calculates E-value for score S with natural scale parameters K for
           search space size and lambda for the scoring system
           E = mn2^-S'
           m being the subject (total) length, n the length of ONE query
           calculates E-value for bit-score S'
         */
        GtFastaReader *reader;
        GtCondenserSearchAvg avg = {0,0};
        reader = gt_fasta_reader_rec_new(arguments->querypath);
        had_err = gt_fasta_reader_run(reader, NULL, NULL,
                                      gt_condenser_search_cum_moving_avg,
                                      &avg,
                                      err);
        if (!had_err) {
          GtUword S = arguments->bitscore;
          gt_log_log(GT_WU " queries, avg query size: " GT_WU,
                     avg.count, avg.avg);
          raw_eval = 1/pow(2.0, (double) S) * avg.avg;
          gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval);
          gt_assert(avg.avg != 0);
        }
        gt_fasta_reader_delete(reader);
      }
    }

    /*create BLAST database from compressed database fasta file*/
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create coarse BLAST db", stderr);
      if (arguments->blastn)
        had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname),
                                                          err);
      else
        had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname),
                                                          err);
    }

    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "coarse BLAST run", stderr);

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();
      gt_blast_process_call_set_db(call, gt_str_get(fastaname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, arguments->ceval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      while (!had_err &&
             (status = gt_match_iterator_next(mp, &match, err)) !=
             GT_MATCHER_STATUS_END)
      {
        if (status == GT_MATCHER_STATUS_OK) {
          GtUword hit_seq_id;
          char string[7];
          const char *dbseqid = gt_match_get_seqid2(match);
          if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) {
            gt_match_get_range_seq2(match, hits[curr_hits].range);
            hits[curr_hits].idx = hit_seq_id;
            gt_match_delete(match);
            curr_hits++;
            if (curr_hits == max_hits) {
              HitPosition *hit_extention;
              max_hits += 100;
              hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits);
              for (i=max_hits - 100; i < max_hits; i++) {
                hits[i].range = gt_malloc(sizeof (*hits[i].range));
              }
            }
          } else {
            gt_error_set(err, "could not parse unique db header %s", dbseqid);
            had_err = -1;
          }
        } else if (status == GT_MATCHER_STATUS_ERROR) {
          had_err = -1;
        }
      }
      gt_match_iterator_delete(mp);
    }
    /*extract sequences*/
    if (!had_err) {
      GtNREncseqDecompressor *decomp;
      GtFile *coarse_hits;
      if (timer != NULL)
        gt_timer_show_progress(timer, "extract coarse search hits", stderr);
      decomp = gt_n_r_encseq_decompressor_new(nrencseq);
      coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err);
      /* TODO DW do NOT extract complete uniques! these could be complete
         chromosomes!! just extract something around it? maybe +- max query
         length*/
      for (i = 0; i < curr_hits; i++) {
        gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp,
                                                             hits[i].idx);
      }
      had_err =
        gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits,
                                                           decomp,
                                                           &coarse_db_len,
                                                           err);
      gt_assert(coarse_db_len != 0);
      gt_file_delete(coarse_hits);
      gt_n_r_encseq_decompressor_delete(decomp);
    }
    gt_n_r_encseq_delete(nrencseq);

    /* create BLAST database from decompressed database file */
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create fine BLAST db", stderr);
      if (arguments->blastn)
        had_err =
          gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname),
                                                  err);
      else
        had_err =
          gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname),
                                                  err);
    }
    /* perform fine BLAST search */
    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "fine BLAST run", stderr);

      if (arguments->feval == GT_UNDEF_DOUBLE) {
        eval = raw_eval * coarse_db_len;
      } else {
        eval = arguments->feval;
      }

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();

      gt_blast_process_call_set_db(call, gt_str_get(coarse_fname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, eval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval,
                    coarse_db_len);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      if (!had_err) {
        GtUword numofhits = 0;
        while (!had_err &&
               (status = gt_match_iterator_next(mp, &match, err)) !=
               GT_MATCHER_STATUS_END) {
          if (status == GT_MATCHER_STATUS_OK) {
            GtMatchBlast *matchb = (GtMatchBlast*) match;
            char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50);
            GtRange range_seq1;
            GtRange range_seq2;
            numofhits++;
            gt_match_get_range_seq1(match, &range_seq1);
            gt_match_get_range_seq2(match, &range_seq2);
            gt_file_xprintf(
                    arguments->outfp,
                    "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t"
                    GT_WU "\t%g\t%.3f\n",
                    gt_match_get_seqid1(match),
                    gt_match_get_seqid2(match),
                    gt_match_blast_get_similarity(matchb),
                    gt_match_blast_get_align_length(matchb),
                    range_seq1.start,
                    range_seq1.end,
                    range_seq2.start,
                    range_seq2.end,
                    gt_match_blast_get_evalue(matchb),
                    (double) gt_match_blast_get_bitscore(matchb));
            gt_match_delete(match);
            gt_free(dbseqid);
          } else if (status == GT_MATCHER_STATUS_ERROR) {
            had_err = -1;
          }
        }
        gt_log_log(GT_WU " hits found\n", numofhits);
      }
      gt_match_iterator_delete(mp);

    }
    if (!had_err)
      if (timer != NULL)
        gt_timer_show_progress_final(timer, stderr);
    gt_timer_delete(timer);

    /*cleanup*/
    for (i=0; i < max_hits; i++) {
      gt_free(hits[i].range);
    }
    gt_free(hits);
    gt_str_delete(fastaname);
  }
  gt_str_delete(coarse_fname);
  gt_logger_delete(logger);
  return had_err;
}
コード例 #13
0
ファイル: gt_idxlocali.c プロジェクト: potter-s/genometools
static void *gt_idxlocali_arguments_new(void)
{
    return gt_malloc(sizeof (IdxlocaliOptions));
}
コード例 #14
0
ファイル: gt_input_file.c プロジェクト: gemtools/gemtools
gt_input_file* gt_input_file_open(char* const file_name,const bool mmap_file) {
    GT_NULL_CHECK(file_name);
    // Allocate handler
    gt_input_file* input_file = gt_alloc(gt_input_file);
    // Input file
    struct stat stat_info;
    unsigned char tbuf[4];
    int i;
    gt_cond_fatal_error(stat(file_name,&stat_info)==-1,FILE_STAT,file_name);
    input_file->file_name = file_name;
    input_file->file_size = stat_info.st_size;
    input_file->eof = (input_file->file_size==0);
    input_file->file_format = FILE_FORMAT_UNKNOWN;
    gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex,NULL),SYS_MUTEX_INIT);
    if (mmap_file) {
        input_file->file = NULL;
        input_file->fildes = open(file_name,O_RDONLY,0); // TODO: O_NOATIME condCompl (Thanks Jordi Camps)
        gt_cond_fatal_error(input_file->fildes==-1,FILE_OPEN,file_name);
        input_file->file_buffer =
            (uint8_t*) mmap(0,input_file->file_size,PROT_READ,MAP_PRIVATE,input_file->fildes,0);
        gt_cond_fatal_error(input_file->file_buffer==MAP_FAILED,SYS_MMAP_FILE,file_name);
        input_file->file_type = MAPPED_FILE;
    } else {
        input_file->fildes = -1;
        gt_cond_fatal_error(!(input_file->file=fopen(file_name,"r")),FILE_OPEN,file_name);
        input_file->file_type = REGULAR_FILE;
        if(S_ISREG(stat_info.st_mode)) {
            // Regular file - check if gzip or bzip compressed
            i=(int)fread(tbuf,(size_t)1,(size_t)4,input_file->file);
            if(tbuf[0]==0x1f && tbuf[1]==0x8b && tbuf[2]==0x08) {
                input_file->file_type=GZIPPED_FILE;
                fclose(input_file->file);
#ifdef HAVE_ZLIB
                gt_cond_fatal_error(!(input_file->file=(void *)gzopen(file_name,"r")),FILE_GZIP_OPEN,file_name);
#else
                gt_fatal_error(FILE_GZIP_NO_ZLIB,file_name);
#endif
            } else if(tbuf[0]=='B' && tbuf[1]=='Z' && tbuf[2]=='h' && tbuf[3]>='0' && tbuf[3]<='9') {
                fseek(input_file->file,0L,SEEK_SET);
                input_file->file_type=BZIPPED_FILE;
#ifdef HAVE_BZLIB
                input_file->file=BZ2_bzReadOpen(&i,input_file->file,0,0,NULL,0);
                gt_cond_fatal_error(i!=BZ_OK,FILE_BZIP2_OPEN,file_name);
#else
                gt_fatal_error(FILE_BZIP2_NO_BZLIB,file_name);
#endif
            } else {
                fseek(input_file->file,0L,SEEK_SET);
            }
        } else {
            input_file->eof=0;
        }
        input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE);
    }
    // Auxiliary Buffer (for synch purposes)
    input_file->buffer_size = 0;
    input_file->buffer_begin = 0;
    input_file->buffer_pos = 0;
    input_file->global_pos = 0;
    input_file->processed_lines = 0;
    // ID generator
    input_file->processed_id = 0;
    // Detect file format
    gt_input_file_detect_file_format(input_file);
    return input_file;
}
コード例 #15
0
ファイル: hcr.c プロジェクト: oeigenbrod/genometools
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha,
                                 bool descs, GtQualRange qrange, GtTimer *timer,
                                 GtError *err)
{
  GtBaseQualDistr *bqd;
  GtHcrEncoder *hcr_enc;
  GtSeqIterator *seqit;
  GtStrArray *file;
  int had_err = 0,
      status;
  unsigned long len1,
                len2,
                i,
                num_of_reads = 0;
  const GtUchar *seq,
                *qual;
  char *desc;

  gt_error_check(err);
  gt_assert(alpha && files);

  if (timer != NULL)
    gt_timer_show_progress(timer, "get <base,qual> distr", stdout);

  if (qrange.start != GT_UNDEF_UINT)
    if (qrange.start == qrange.end) {
      gt_error_set(err, "qrange.start must unequal qrange.end");
      return NULL;
    }

  hcr_enc = gt_malloc(sizeof (GtHcrEncoder));
  hcr_enc->files = files;
  hcr_enc->num_of_files = gt_str_array_size(files);
  hcr_enc->num_of_reads = 0;
  hcr_enc->page_sampling = false;
  hcr_enc->regular_sampling = false;
  hcr_enc->sampling_rate = 0;
  hcr_enc->pagesize = sysconf((int) _SC_PAGESIZE);
  if (descs) {
    hcr_enc->encdesc_encoder = gt_encdesc_encoder_new();
    if (timer != NULL)
      gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer);
  }
  else
    hcr_enc->encdesc_encoder = NULL;

  hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder));
  hcr_enc->seq_encoder->alpha = alpha;
  hcr_enc->seq_encoder->sampling = NULL;
  hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files,
                                   sizeof (*(hcr_enc->seq_encoder->fileinfos)));
  hcr_enc->seq_encoder->qrange = qrange;
  bqd = hcr_base_qual_distr_new(alpha, qrange);

  /* check if reads in the same file are of same length and get
     <base, quality> pair distribution */
  for (i = 0; i < hcr_enc->num_of_files; i++) {
    file = gt_str_array_new();
    gt_str_array_add(file, gt_str_array_get_str(files, i));
    seqit = gt_seq_iterator_fastq_new(file, err);
    if (!seqit) {
      gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object");
      had_err = -1;
    }
    if (!had_err) {
      gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha));
      gt_seq_iterator_set_quality_buffer(seqit, &qual);
      status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err);

      if (status == 1) {
        num_of_reads = 1UL;
        while (!had_err) {
          status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err);
          if (status == -1)
            had_err = -1;
          if (status != 1)
            break;
          if (len2 != len1) {
            gt_error_set(err, "reads have to be of equal length");
            had_err = -1;
            break;
          }
          if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0)
            had_err = -1;
          len1 = len2;
          num_of_reads++;
        }
      }
      else if (status == -1)
        had_err = -1;

      if (!had_err) {
        if (i == 0)
          hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads;
        else
          hcr_enc->seq_encoder->fileinfos[i].readnum =
            hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads;
        hcr_enc->seq_encoder->fileinfos[i].readlength = len1;
      }
    }
    hcr_enc->num_of_reads += num_of_reads;
    gt_str_array_delete(file);
    gt_seq_iterator_delete(seqit);
  }
  if (!had_err)
    hcr_base_qual_distr_trim(bqd);

  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "build huffman tree for sequences and"
                             " qualities", stdout);
    hcr_enc->seq_encoder->huffman =
      gt_huffman_new(bqd,
                     hcr_base_qual_distr_func,
                     (unsigned long) bqd->ncols * bqd->nrows);
  }
  if (!had_err) {
    hcr_enc->seq_encoder->qual_offset = bqd->qual_offset;
    hcr_base_qual_distr_delete(bqd);
    return hcr_enc;
  }
  return NULL;
}
コード例 #16
0
ファイル: diagram.c プロジェクト: AlexWoroschilow/uni_hamburg
static int process_node(GtDiagram *d, GtFeatureNode *node,
                        GtFeatureNode *parent, GtError *err)
{
  GtRange elem_range;
  bool *collapse;
  GtShouldGroupByParent *group;
  const char *feature_type = NULL,
             *parent_gft = NULL;
  double tmp;
  GtStyleQueryStatus rval;
  GtUword max_show_width = GT_UNDEF_UWORD,
                par_max_show_width = GT_UNDEF_UWORD;

  gt_assert(d && node);

  gt_log_log(">> getting '%s'", gt_feature_node_get_type(node));

  /* skip pseudonodes */
  if (gt_feature_node_is_pseudo(node))
    return 0;

  feature_type = gt_feature_node_get_type(node);
  gt_assert(feature_type);

  /* discard elements that do not overlap with visible range */
  elem_range = gt_genome_node_get_range((GtGenomeNode*) node);
  if (!gt_range_overlap(&d->range, &elem_range))
    return 0;

  /* get maximal view widths in nucleotides to show this type */
  rval = gt_style_get_num(d->style, feature_type, "max_show_width", &tmp, NULL,
                          err);
  switch (rval) {
    case GT_STYLE_QUERY_OK:
      max_show_width = tmp;
      break;
    case GT_STYLE_QUERY_ERROR:
      return -1;
      break; /* should never be reached */
    default:
      /* do not change default value */
      break;
  }

  /* for non-root nodes, get maximal view with to show parent */
  if (parent)
  {
    if (!gt_feature_node_is_pseudo(parent))
    {
      parent_gft = gt_feature_node_get_type(parent);
      rval = gt_style_get_num(d->style,
                              parent_gft, "max_show_width",
                              &tmp, NULL, err);
      switch (rval) {
        case GT_STYLE_QUERY_OK:
          par_max_show_width = tmp;
          break;
        case GT_STYLE_QUERY_ERROR:
          return -1;
          break; /* should never be reached */
        default:
          /* do not change default value */
          break;
      }
    } else
      par_max_show_width = GT_UNDEF_UWORD;
  }

  /* check if this type is to be displayed at all */
  if (max_show_width != GT_UNDEF_UWORD &&
      gt_range_length(&d->range) > max_show_width)
  {
    return 0;
  }

  /* disregard parent node if it is configured not to be shown */
  if (parent
        && par_max_show_width != GT_UNDEF_UWORD
        && gt_range_length(&d->range) > par_max_show_width)
  {
    parent = NULL;
  }

  /* check if this is a collapsing type, cache result */
  if ((collapse = (bool*) gt_hashmap_get(d->collapsingtypes,
                                         feature_type)) == NULL)
  {
    collapse = gt_malloc(sizeof (bool));
    *collapse = false;
    if (gt_style_get_bool(d->style, feature_type, "collapse_to_parent",
                           collapse, NULL, err) == GT_STYLE_QUERY_ERROR) {
      gt_free(collapse);
      return -1;
    }
    gt_hashmap_add(d->collapsingtypes, (void*) feature_type, collapse);
  }

  /* check if type should be grouped by parent, cache result */
  if ((group = (GtShouldGroupByParent*) gt_hashmap_get(d->groupedtypes,
                                                       feature_type)) == NULL)
  {
    bool tmp;
    group = gt_malloc(sizeof (GtShouldGroupByParent));
    rval = gt_style_get_bool(d->style, feature_type, "group_by_parent",
                             &tmp, NULL, err);
    switch (rval) {
      case GT_STYLE_QUERY_OK:
        if (tmp)
          *group = GT_GROUP_BY_PARENT;
        else
          *group = GT_DO_NOT_GROUP_BY_PARENT;
        break;
      case GT_STYLE_QUERY_NOT_SET:
        *group = GT_UNDEFINED_GROUPING;
        break;
      case GT_STYLE_QUERY_ERROR:
        gt_free(group);
        return -1;
        break; /* should never be reached */
    }
    gt_hashmap_add(d->groupedtypes, (void*) feature_type, group);
  }

  /* decide where to place this feature: */
  if (*collapse)
  {
    /* user has specified collapsing to parent for this type */
    if (parent && !gt_feature_node_is_pseudo(parent)) {
      /* collapsing child nodes are added to upwards blocks,
         but never collapse into pseudo nodes */
      add_recursive(d, node, parent, node);
    } else {
      /* if no parent or only pseudo-parent, do not collapse */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }
  else  /* (!*collapse) */
  {
    if (parent) {
      bool do_not_overlap = false;
      do_not_overlap = gt_feature_node_direct_children_do_not_overlap_st(parent,
                                                                         node);
      if (*group == GT_GROUP_BY_PARENT
          || (do_not_overlap && *group == GT_UNDEFINED_GROUPING))
      {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else if
            (gt_feature_node_number_of_children(parent) > 1)
        {
          if (add_to_parent(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      } else {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      }
    } else {
      /* root nodes always get their own block */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }

  /* we can now assume that this node (or its representative)
     has been processed into the reverse lookup structure */
#ifndef NDEBUG
  if (gt_feature_node_is_multi(node))
  {
    GtFeatureNode *rep;
    rep = gt_feature_node_get_multi_representative((GtFeatureNode*) node);
    gt_assert(gt_hashmap_get(d->nodeinfo, rep));
  }
  else
    gt_assert(gt_hashmap_get(d->nodeinfo, node));
#endif

  return 0;
}
コード例 #17
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}
コード例 #18
0
ファイル: mapspec.c プロジェクト: 9beckert/TIR
int  gt_mapspec_read(GtMapspecSetupFunc setup, void *data,
                     const GtStr *filename, unsigned long expectedsize,
                     void **mapped, GtError *err)
{
  void *mapptr;
  uint64_t expectedaccordingtomapspec;
  unsigned long byteoffset = 0;
  size_t numofbytes;
  GtMapspec *ms = gt_malloc(sizeof (GtMapspec));
  GtMapspecification *mapspecptr;
  int had_err = 0;
  unsigned long totalpadunits = 0;

  gt_error_check(err);
  GT_INITARRAY(&ms->mapspectable, GtMapspecification);
  setup(ms, data, false);

  mapptr = gt_fa_mmap_read(gt_str_get(filename), &numofbytes, err);
  if (mapptr == NULL)
  {
    had_err = -1;
  }
  *mapped = mapptr;
  if (!had_err)
  {
    if (assigncorrecttype(ms->mapspectable.spaceGtMapspecification,
                          mapptr,0,err) != 0)
    {
      had_err = -1;
    }
  }
  if (!had_err)
  {
    expectedaccordingtomapspec =
                               detexpectedaccordingtomapspec(&ms->mapspectable);
    if (expectedaccordingtomapspec != (uint64_t) numofbytes)
    {
      gt_error_set(err,"%lu bytes read from %s, but " Formatuint64_t
                         " expected",
                         (unsigned long) numofbytes,
                         gt_str_get(filename),
                         PRINTuint64_tcast(expectedaccordingtomapspec));
      had_err = -1;
    }
  }
  if (!had_err)
  {
    mapspecptr = ms->mapspectable.spaceGtMapspecification;
    gt_assert(mapspecptr != NULL);
    byteoffset = CALLCASTFUNC(uint64_t,unsigned_long,
                              (uint64_t) (mapspecptr->sizeofunit *
                                          mapspecptr->numofunits));
    if (byteoffset % (unsigned long) GT_WORDSIZE_INBYTES > 0)
    {
      size_t padunits
        = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES);
      byteoffset += (unsigned long) padunits;
      totalpadunits += (unsigned long) padunits;
    }
    for (mapspecptr++;
         mapspecptr < ms->mapspectable.spaceGtMapspecification +
                      ms->mapspectable.nextfreeGtMapspecification; mapspecptr++)
    {
      if (assigncorrecttype(mapspecptr,mapptr,byteoffset,err) != 0)
      {
        had_err = -1;
        break;
      }
      byteoffset = CALLCASTFUNC(uint64_t,unsigned_long,
                                (uint64_t) (byteoffset +
                                            mapspecptr->sizeofunit *
                                            mapspecptr->numofunits));
      if (byteoffset % (unsigned long) GT_WORDSIZE_INBYTES > 0)
      {
        size_t padunits
          = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES);
        byteoffset += (unsigned long) padunits;
        totalpadunits += (unsigned long) padunits;
      }
    }
  }
  if (!had_err)
  {
    if (expectedsize + totalpadunits != byteoffset)
    {
      gt_error_set(err,"mapping: expected file size is %lu bytes, "
                       "but file has %lu bytes",
                       expectedsize,byteoffset);
      had_err = -1;
    }
  }
  GT_FREEARRAY(&ms->mapspectable,GtMapspecification);
  gt_free(ms);
  return had_err;
}
コード例 #19
0
GtSafePipe *gt_safe_popen(const char *path,
                          char *const argv[],
                          char *const envp[],
                          GtError *err) {
#ifndef _WIN32
  int stdin_pipe[2], stdout_pipe[2], had_err = 0;
  GtSafePipe *p = NULL;

  p = gt_malloc(sizeof(*p));
  p->read_fd = p->write_fd = NULL;
  p->child_pid = (pid_t) -1;

  if ((had_err = pipe(stdin_pipe))) {
    gt_error_set(err, "could not open stdin pipe: %s", strerror(errno));
  }
  if (!had_err) {
    if ((had_err = pipe(stdout_pipe))) {
      gt_error_set(err, "could not open stdout pipe: %s", strerror(errno));
    }
    if (!had_err) {
      if (!(p->read_fd = fdopen(stdout_pipe[0], "r"))) {
        gt_error_set(err, "could not open stdout_pipe[0] for reading: %s",
                     strerror(errno));
        had_err = -1;
      }
      if (!had_err) {
        if (!(p->write_fd = fdopen(stdin_pipe[1], "w"))) {
          gt_error_set(err, "could not open stdin_pipe[1] for writing: %s",
                       strerror(errno));
          had_err = -1;
        }
        if (!had_err) {
          if ((p->child_pid = safe_fork()) == (pid_t) -1) {
            gt_error_set(err, "could not fork: %s", strerror(errno));
            had_err = -1;
          }
          if (!had_err) {
            if (!p->child_pid) {
              /* this is the child process */
              (void) close(stdout_pipe[0]);
              (void) close(stdin_pipe[1]);
              if (stdin_pipe[0] != 0) {
                (void) dup2(stdin_pipe[0], 0);
                (void) close(stdin_pipe[0]);
              }
              if (stdout_pipe[1] != 1) {
                (void) dup2(stdout_pipe[1], 1);
                (void) close(stdout_pipe[1]);
              }
              (void) execve(path, argv, envp);
              perror("could not execute external program: ");
              perror(strerror(errno));
              exit(127);
            }
            (void) close(stdout_pipe[1]);
            (void) close(stdin_pipe[0]);
          }
          if (had_err) {
            (void) fclose(p->write_fd);
          }
        }
        if (had_err) {
          (void) fclose(p->read_fd);
        }
      }
      if (had_err) {
        (void) close(stdout_pipe[1]);
        (void) close(stdout_pipe[0]);
      }
    }
    if (had_err) {
      (void) close(stdin_pipe[1]);
      (void) close(stdin_pipe[0]);
    }
  }
  if (had_err) {
    gt_free(p);
    p = NULL;
  }
  return p;
#else
  gt_error_set(err, "Function gt_safe_popen not implemented for windows yet");
  return NULL;
#endif
}
コード例 #20
0
ファイル: mapspec.c プロジェクト: 9beckert/TIR
int gt_mapspec_write(GtMapspecSetupFunc setup, FILE *fp,
                     void *data, unsigned long expectedsize, GtError *err)
{
  GtMapspecification *mapspecptr;
  unsigned long byteoffset = 0;
  int had_err = 0;
  unsigned long totalpadunits = 0;
  unsigned long byteswritten;
  GtMapspec *ms = gt_malloc(sizeof (GtMapspec));

  gt_error_check(err);
  GT_INITARRAY(&ms->mapspectable,GtMapspecification);
  setup(ms, data, true);
  gt_assert(ms->mapspectable.spaceGtMapspecification != NULL);
  for (mapspecptr = ms->mapspectable.spaceGtMapspecification;
       mapspecptr < ms->mapspectable.spaceGtMapspecification +
                    ms->mapspectable.nextfreeGtMapspecification;
       mapspecptr++)
  {
#ifdef SKDEBUG
    printf("# gt_mapspec_flushtheindex2file");
    showmapspec(mapspecptr);
    printf(" at byteoffset %lu\n",byteoffset);
#endif
    if (mapspecptr->numofunits > 0)
    {
      switch (mapspecptr->typespec)
      {
        case GtCharType:
          WRITEACTIONWITHTYPE(char);
          break;
        case GtFilelengthvaluesType:
          WRITEACTIONWITHTYPE(GtFilelengthvalues);
          break;
        case GtUcharType:
          WRITEACTIONWITHTYPE(GtUchar);
          break;
        case Uint16Type:
          WRITEACTIONWITHTYPE(uint16_t);
          break;
        case Uint32Type:
          WRITEACTIONWITHTYPE(uint32_t);
          break;
        case GtUlongType:
          WRITEACTIONWITHTYPE(GtUlong);
          break;
        case Uint64Type:
          WRITEACTIONWITHTYPE(uint64_t);
          break;
        case GtBitsequenceType:
          WRITEACTIONWITHTYPE(GtBitsequence);
          break;
        case GtUlongBoundType:
          WRITEACTIONWITHTYPE(GtUlongBound);
          break;
        case GtPairBwtidxType:
          WRITEACTIONWITHTYPE(GtPairBwtidx);
          break;
        case GtTwobitencodingType:
          WRITEACTIONWITHTYPE(GtTwobitencoding);
          break;
        case GtSpecialcharinfoType:
          WRITEACTIONWITHTYPE(GtSpecialcharinfo);
          break;
        case GtBitElemType:
          WRITEACTIONWITHTYPE(BitElem);
          break;
        default:
           gt_error_set(err,"no map specification for size %lu",
                         (unsigned long) mapspecptr->sizeofunit);
           had_err = -1;
      }
    }
    if (had_err)
    {
      break;
    }
    byteoffset = CALLCASTFUNC(uint64_t,unsigned_long,
                              (uint64_t) (byteoffset +
                                          mapspecptr->sizeofunit *
                                          mapspecptr->numofunits));
    if (gt_mapspec_pad(fp,&byteswritten,byteoffset,err) != 0)
    {
      had_err = -1;
    }
    byteoffset += byteswritten;
    totalpadunits += byteswritten;
  }
  if (!had_err)
  {
    if (expectedsize + totalpadunits != byteoffset)
    {
      gt_error_set(err,"expected file size is %lu bytes, "
                       "but file has %lu bytes",
                       expectedsize,
                       byteoffset);
      had_err = -1;
    }
  }
  GT_FREEARRAY(&ms->mapspectable,GtMapspecification);
  gt_free(ms);
  return had_err;
}
コード例 #21
0
ファイル: gthtrans.c プロジェクト: oeigenbrod/genometools
static void showtranslation(GthSplicedSeq *splicedseq,
                            char *frame0_in,
                            char *frame1_in,
                            char *frame2_in,
                            GtArray *exons,
                            bool gen_strand_forward,
                            unsigned long gen_total_length,
                            unsigned long gen_offset,
                            unsigned int indentlevel,
                            GthOutput *out)
{
  char *dotline, *template_out, *frame0_out, *frame1_out, *frame2_out;
  unsigned long i, exonseparatorwidth =  strlen(EXONSEPARATORSTRING),
                outlen = splicedseq->splicedseqlen +
                         ((gt_array_size(exons) - 1) * exonseparatorwidth) +
                         (splicedseq->splicedseqlen / TRANSLATIONLINEWIDTH);
  GtFile *outfp = out->outfp;

  dotline      = gt_malloc(sizeof (unsigned char) * outlen);
  template_out = gt_malloc(sizeof (unsigned char) * outlen);
  frame0_out   = gt_malloc(sizeof (unsigned char) * outlen);
  frame1_out   = gt_malloc(sizeof (unsigned char) * outlen);
  frame2_out   = gt_malloc(sizeof (unsigned char) * outlen);

  createoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out,
                    (char*) splicedseq->splicedseq, frame0_in, frame1_in,
                    frame2_in, splicedseq, exonseparatorwidth, outlen,
                    out->gs2out);

  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<translation>\n");
    indentlevel++;

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<gDNA_template>");
    for (i = 0; i < outlen; i++) {
      if (template_out[i] != '\n') {
        gt_file_xfputc(template_out[i], outfp);
      }
    }
    gt_file_xprintf(outfp, "</gDNA_template>\n");

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<first_frame>");
    for (i = 0; i < outlen; i++) {
      if (frame0_out[i] != '\n') {
        gt_file_xfputc(frame0_out[i], outfp);
      }
    }
    gt_file_xprintf(outfp, "</first_frame>\n");

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<second_frame>");
    for (i = 0; i < outlen; i++) {
      if (frame1_out[i] != '\n') {
        gt_file_xfputc(frame1_out[i], outfp);
      }
    }
    gt_file_xprintf(outfp, "</second_frame>\n");

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<third_frame>");
    for (i = 0; i < outlen; i++) {
      if (frame2_out[i] != '\n') {
        gt_file_xfputc(frame2_out[i], outfp);
      }
    }
    gt_file_xprintf(outfp, "</third_frame>\n");

    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</translation>\n");
  }
  else {
    showoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out,
                    outlen, gen_strand_forward, gen_total_length,
                    gen_offset, splicedseq->positionmapping, out);
  }

  gt_free(dotline);
  gt_free(template_out);
  gt_free(frame0_out);
  gt_free(frame1_out);
  gt_free(frame2_out);
}
コード例 #22
0
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes,
                                       const GtAlphabet *alphabet,
                                       GtError *err)
{
  GtUchar *writeptr = filecontents, *readptr = filecontents;
  const GtUchar *endptr = filecontents + numofbytes;
  bool firstline = true, haserr = false;
  GtUword lastspecialrange_length = 0;
  GtBareSpecialrange *srptr = NULL;
  GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq);
  const GtUchar *smap = gt_alphabet_symbolmap(alphabet);

  bare_encseq->specialcharacters = 0;
  bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet);
  bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars,
                                     sizeof *bare_encseq->charcount);
  GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange);
  readptr = filecontents;
  while (!haserr && readptr < endptr)
  {
    if (*readptr == '>')
    {
      if (!firstline)
      {
        if (lastspecialrange_length == 0)
        {
          GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                                GtBareSpecialrange,128UL);
          srptr->start = (GtUword) (writeptr - filecontents);
        }
        lastspecialrange_length++;
        *writeptr++ = SEPARATOR;
        bare_encseq->specialcharacters++;
      } else
      {
        firstline = false;
      }
      while (readptr < endptr && *readptr != '\n')
      {
        readptr++;
      }
      readptr++;
    } else
    {
      while (readptr < endptr && *readptr != '\n')
      {
        if (!isspace(*readptr))
        {
          GtUchar cc = smap[*readptr];
          if (cc == UNDEFCHAR)
          {
            gt_error_set(err,"illegal input characters %c\n",*readptr);
            haserr = true;
            break;
          }
          if (ISSPECIAL(cc))
          {
            if (lastspecialrange_length == 0)
            {
              GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                                    GtBareSpecialrange,128UL);
              srptr->start = (GtUword) (writeptr - filecontents);
            }
            lastspecialrange_length++;
            bare_encseq->specialcharacters++;
          } else
          {
            gt_assert((GtUword) cc < bare_encseq->numofchars);
            bare_encseq->charcount[(int) cc]++;
            if (lastspecialrange_length > 0)
            {
              gt_assert(srptr != NULL);
              srptr->length = lastspecialrange_length;
            }
            lastspecialrange_length = 0;
          }
          *writeptr++ = cc;
        }
        readptr++;
      }
      readptr++;
    }
  }
  if (lastspecialrange_length > 0)
  {
    gt_assert(srptr != NULL);
    srptr->length = lastspecialrange_length;
  }
  bare_encseq->sequence = filecontents;
  bare_encseq->totallength = (GtUword) (writeptr - filecontents);
  if (haserr)
  {
    gt_bare_encseq_delete(bare_encseq);
    return NULL;
  }
  return bare_encseq;
}