enum verifyBWTSeqErrCode gt_BWTSeqVerifyIntegrity(BWTSeq *bwtSeq, const char *projectName, int checkFlags, GtUword tickPrint, FILE *fp, GtLogger *verbosity, GtError *err) { Suffixarray suffixArray; struct extBitsRetrieval extBits; bool suffixArrayIsInitialized = false, extBitsAreInitialized = false; enum verifyBWTSeqErrCode retval = VERIFY_BWTSEQ_NO_ERROR; do { GtUword seqLen; gt_assert(bwtSeq && projectName && err); gt_error_check(err); initExtBitsRetrieval(&extBits); extBitsAreInitialized = true; if (gt_mapsuffixarray(&suffixArray, SARR_SUFTAB | SARR_ESQTAB, projectName, verbosity, err)) { gt_error_set(err, "Cannot load reference suffix array project with" " demand for suffix table file and encoded sequence" " for project: %s", projectName); retval = VERIFY_BWTSEQ_REFLOAD_ERROR; break; } suffixArrayIsInitialized = true; seqLen = gt_encseq_total_length(suffixArray.encseq) + 1; if (BWTSeqLength(bwtSeq) != seqLen) { gt_error_set(err, "length mismatch for suffix array project %s and " "bwt sequence index", projectName); retval = VERIFY_BWTSEQ_LENCOMPARE_ERROR; break; } if (checkFlags & VERIFY_BWTSEQ_SUFVAL && BWTSeqHasLocateInformation(bwtSeq)) { GtUword i; for (i = 0; i < seqLen && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { if (gt_BWTSeqPosHasLocateInfo(bwtSeq, i, &extBits)) { GtUword sfxArrayValue = gt_BWTSeqLocateMatch(bwtSeq, i, &extBits); if (sfxArrayValue != ESASUFFIXPTRGET(suffixArray.suftab,i)) { gt_error_set(err, "Failed suffix array value comparison" " at position "GT_WU": "GT_WU" != "GT_WU"", i, sfxArrayValue, ESASUFFIXPTRGET(suffixArray.suftab,i)); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } } if (tickPrint && !((i + 1) % tickPrint)) putc('.', fp); } if (tickPrint) putc('\n', fp); if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if (checkFlags & VERIFY_BWTSEQ_SUFVAL) { gt_error_set(err, "check of suffix array values was requested," " but index contains no locate information!"); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } else if (!(checkFlags & VERIFY_BWTSEQ_SUFVAL) && BWTSeqHasLocateInformation(bwtSeq)) { fputs("Not checking suftab values.\n", stderr); } if (BWTSeqHasLocateInformation(bwtSeq)) { GtUword nextLocate = BWTSeqTerminatorPos(bwtSeq); if (suffixArray.longest.defined && suffixArray.longest.valueunsignedlong != nextLocate) { gt_error_set(err, "terminator/0-rotation position mismatch "GT_WU"" " vs. "GT_WU"", suffixArray.longest.valueunsignedlong, nextLocate); retval = VERIFY_BWTSEQ_TERMPOS_ERROR; break; } if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && (bwtSeq->featureToggles & BWTReversiblySorted)) { GtUword i = seqLen; /* handle first symbol specially because the encseq * will not return the terminator symbol */ { Symbol sym = BWTSeqGetSym(bwtSeq, nextLocate); if (sym != UNDEFBWTCHAR) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i - 1, (int)sym, (int)UNDEFBWTCHAR); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } --i; nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } while (i > 0) { Symbol symRef = gt_encseq_get_encoded_char(suffixArray.encseq, --i, suffixArray.readmode); Symbol symCmp = BWTSeqGetSym(bwtSeq, nextLocate); if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i, symCmp, symRef); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && !(bwtSeq->featureToggles & BWTReversiblySorted)) { gt_error_set(err, "requested complete backwards regeneration in index" " without regeneration capability"); retval = VERIFY_BWTSEQ_LFMAPWALK_IMP_ERROR; break; } } if (checkFlags & VERIFY_BWTSEQ_CONTEXT) { BWTSeqContextRetriever *bwtSeqCR = gt_BWTSeqCRLoad(bwtSeq, projectName, CTX_MAP_ILOG_AUTOSIZE); if (!bwtSeqCR) { gt_error_set(err, "cannot load BWT sequence context access table" " for project %s", projectName); retval = VERIFY_BWTSEQ_CONTEXT_LOADFAIL; break; } fputs("Checking context regeneration.\n", stderr); { GtUword i, start, subSeqLen, maxSubSeqLen = MIN(MAX(MIN_CONTEXT_LEN, seqLen/CONTEXT_FRACTION), MAX_CONTEXT_LEN), numTries = MIN(MAX_NUM_CONTEXT_CHECKS, MAX(2, seqLen/CONTEXT_INTERVAL)); Symbol *contextBuf = gt_malloc(sizeof (Symbol) * MAX_CONTEXT_LEN); GtEncseqReader *esr = gt_encseq_create_reader_with_readmode(suffixArray.encseq, suffixArray.readmode, 0); for (i = 0; i < numTries && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { GtUword j, end, inSubSeqLen; subSeqLen = random()%maxSubSeqLen + 1; start = random()%(seqLen - subSeqLen + 1); end = start + subSeqLen; inSubSeqLen = subSeqLen - ((end==seqLen)?1:0); gt_BWTSeqCRAccessSubseq(bwtSeqCR, start, subSeqLen, contextBuf); gt_encseq_reader_reinit_with_readmode(esr, suffixArray.encseq, suffixArray.readmode, start); for (j = 0; j < inSubSeqLen; ++j) { Symbol symRef = gt_encseq_reader_next_encoded_char(esr); Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } } while (j < subSeqLen) { Symbol symRef = UNDEFBWTCHAR; Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } ++j; } } if (retval == VERIFY_BWTSEQ_NO_ERROR) fputs("Context regeneration completed successfully.\n", stderr); gt_encseq_reader_delete(esr); gt_free(contextBuf); } gt_deleteBWTSeqCR(bwtSeqCR); } } while (0); if (suffixArrayIsInitialized) gt_freesuffixarray(&suffixArray); if (extBitsAreInitialized) destructExtBitsRetrieval(&extBits); return retval; }
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name, GtError *err) { int had_err = 0; GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder)); GtBaseQualDistr *bqd = NULL; GtWord end_enc_start_sampling = 0; FILE *fp = NULL; GT_UNUSED size_t read; GT_UNUSED const size_t one = (size_t) 1; seq_dec->alpha = alpha; seq_dec->alphabet_size = gt_alphabet_size(alpha); seq_dec->cur_read = 0; seq_dec->data_iter = NULL; seq_dec->file_info_rbt = NULL; seq_dec->fileinfos = NULL; seq_dec->filename = gt_str_new_cstr(name); seq_dec->huff_dec = NULL; seq_dec->huffman = NULL; seq_dec->sampling = NULL; seq_dec->symbols = NULL; gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX); fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err); if (fp == NULL) { had_err = -1; hcr_seq_decoder_delete(seq_dec); seq_dec = NULL; } if (!had_err) { hcr_read_file_info(seq_dec, fp); bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha); seq_dec->qual_offset = bqd->qual_offset; read = gt_xfread_one(&end_enc_start_sampling, fp); gt_assert(read == one); seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp); had_err = seq_decoder_init_huffman(seq_dec, end_enc_start_sampling, bqd, err); if (had_err) { hcr_seq_decoder_delete(seq_dec); seq_dec = NULL; } } if (!had_err) { size_t pos; gt_xfseek(fp, 0, SEEK_END); pos = ftell(fp); gt_xfseek(fp, end_enc_start_sampling, SEEK_SET); if (end_enc_start_sampling < pos) seq_dec->sampling = gt_sampling_read(fp); else seq_dec->sampling = NULL; seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos, seq_dec->num_of_files); } hcr_base_qual_distr_delete(bqd); gt_fa_fclose(fp); return seq_dec; }
static int gt_seqorder_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtSeqorderArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *encseq; GtEncseqLoader *loader; GtUword i, nofseqs; gt_error_check(err); gt_assert(arguments != NULL); /* load encseq */ loader = gt_encseq_loader_new(); encseq = gt_encseq_loader_load(loader, argv[parsed_args], err); if (encseq == NULL) had_err = -1; if (had_err == 0 && !gt_encseq_has_description_support(encseq)) gt_warning("%s has no description support", argv[parsed_args]); if (!had_err) { nofseqs = gt_encseq_num_of_sequences(encseq); if (arguments->invert) { for (i = nofseqs; i > 0; i--) gt_seqorder_output(i - 1, encseq); } else if (arguments->shuffle) { GtUword *seqnums; seqnums = gt_malloc(sizeof (GtUword) * nofseqs); gt_seqorder_get_shuffled_seqnums(nofseqs, seqnums); for (i = 0; i < nofseqs; i++) gt_seqorder_output(seqnums[i], encseq); gt_free(seqnums); } else if (arguments->sorthdr) { GtUword *seqnums; seqnums = gt_malloc(sizeof (GtUword) * nofseqs); gt_seqorder_get_hdrsorted_seqnums(encseq, seqnums, seqorder_str_compare_lex); for (i = 0; i < nofseqs; i++) gt_seqorder_output(seqnums[i], encseq); gt_free(seqnums); } else if (arguments->sorthdrnum) { GtUword *seqnums; seqnums = gt_malloc(sizeof (GtUword) * nofseqs); gt_seqorder_get_hdrsorted_seqnums(encseq, seqnums, seqorder_str_compare_num); for (i = 0; i < nofseqs; i++) gt_seqorder_output(seqnums[i], encseq); gt_free(seqnums); } else { GtSuffixsortspace *suffixsortspace; gt_assert(arguments->sort || arguments->revsort); suffixsortspace = gt_suffixsortspace_new(nofseqs, /* Use iterator over sequence separators: saves a lot of binary searches */ gt_encseq_seqstartpos(encseq, nofseqs-1), false,NULL); gt_seqorder_sort(suffixsortspace, encseq); if (arguments->sort) for (i = 0; i < nofseqs; i++) gt_seqorder_output(gt_encseq_seqnum( encseq, gt_suffixsortspace_getdirect(suffixsortspace, i)), encseq); else for (i = nofseqs; i > 0; i--) gt_seqorder_output(gt_encseq_seqnum( encseq, gt_suffixsortspace_getdirect(suffixsortspace, i - 1)), encseq); gt_suffixsortspace_delete(suffixsortspace, false); } } gt_encseq_loader_delete(loader); gt_encseq_delete(encseq); return had_err; }
Qualifiedinteger *gt_parsequalifiedinteger(const char *option, const char *lparam, GtError *err) { GtWord readint = 0; size_t i; char *lparamcopy; bool haserr = false; Qualifiedinteger *qualint; lparamcopy = gt_malloc(sizeof (char) * (strlen(lparam)+1)); qualint = gt_malloc(sizeof (*qualint)); strcpy(lparamcopy,lparam); for (i=0; lparamcopy[i] != '\0'; i++) { if (!isdigit((int) lparamcopy[i]) && lparamcopy[i] != BESTCHARACTER && lparamcopy[i] != PERCENTAWAYCHARACTER) { ERRORLPARAM; haserr = true; break; } } if (!haserr && i == 0) { ERRORLPARAM; haserr = true; } if (!haserr) { if (lparamcopy[i-1] == BESTCHARACTER) { lparamcopy[i-1] = '\0'; qualint->qualtag = Qualbestof; } else { if (lparamcopy[i-1] == PERCENTAWAYCHARACTER) { lparamcopy[i-1] = '\0'; qualint->qualtag = Qualpercentaway; } else { qualint->qualtag = Qualabsolute; } } if (sscanf(lparamcopy,""GT_WD"",&readint) != 1 || readint <= 0) { ERRORLPARAM; haserr = true; } } if (!haserr && (qualint->qualtag == Qualpercentaway || qualint->qualtag == Qualbestof)) { if (readint > 100L) { ERRORLPARAM; haserr = true; } } qualint->integervalue = (GtUword) readint; gt_free(lparamcopy); if (haserr) { gt_free (qualint); return NULL; } return qualint; }
int gt_bitPackStringInt16_unit_test(GtError *err) { BitString bitStore = NULL; BitString bitStoreCopy = NULL; uint16_t *randSrc = NULL; /*< create random ints here for input as bit * store */ uint16_t *randCmp = NULL; /*< used for random ints read back */ unsigned *numBitsList = NULL; size_t i, numRnd; BitOffset offsetStart, offset; int had_err = 0; offset = offsetStart = random()%(sizeof (uint16_t) * CHAR_BIT); numRnd = random() % (MAX_RND_NUMS_uint16_t + 1); gt_log_log("offset=" GT_WU ", numRnd=" GT_WU "\n", (GtUword)offsetStart, (GtUword)numRnd); { BitOffset numBits = sizeof (uint16_t) * CHAR_BIT * numRnd + offsetStart; randSrc = gt_malloc(sizeof (uint16_t)*numRnd); bitStore = gt_malloc(bitElemsAllocSize(numBits) * sizeof (BitElem)); bitStoreCopy = gt_calloc(bitElemsAllocSize(numBits), sizeof (BitElem)); randCmp = gt_malloc(sizeof (uint16_t)*numRnd); } /* first test unsigned types */ gt_log_log("gt_bsStoreUInt16/gt_bsGetUInt16: "); for (i = 0; i < numRnd; ++i) { #if 16 > 32 && LONG_BIT < 16 uint16_t v = randSrc[i] = (uint16_t)random() << 32 | random(); #else /* 16 > 32 && LONG_BIT < 16 */ uint16_t v = randSrc[i] = random(); #endif /* 16 > 32 && LONG_BIT < 16 */ int bits = gt_requiredUInt16Bits(v); gt_bsStoreUInt16(bitStore, offset, bits, v); offset += bits; } offset = offsetStart; for (i = 0; i < numRnd; ++i) { uint16_t v = randSrc[i]; int bits = gt_requiredUInt16Bits(v); uint16_t r = gt_bsGetUInt16(bitStore, offset, bits); gt_ensure(r == v); if (had_err) { gt_log_log("Expected %"PRIu16", got %"PRIu16", i = " GT_WU "\n", v, r, (GtUword)i); freeResourcesAndReturn(had_err); } offset += bits; } gt_log_log("passed\n"); if (numRnd > 0) { uint16_t v = randSrc[0], r = 0; unsigned numBits = gt_requiredUInt16Bits(v); BitOffset i = offsetStart + numBits; uint16_t mask = ~(uint16_t)0; if (numBits < 16) mask = ~(mask << numBits); gt_log_log("bsSetBit, gt_bsClearBit, bsToggleBit, gt_bsGetBit: "); while (v) { int lowBit = v & 1; v >>= 1; gt_ensure(lowBit == (r = gt_bsGetBit(bitStore, --i))); if (had_err) { gt_log_log("Expected %d, got %d, i = "GT_LLU"\n", lowBit, (int)r, (GtUint64)i); freeResourcesAndReturn(had_err); } } i = offsetStart + numBits; gt_bsClear(bitStoreCopy, offsetStart, numBits, random()&1); v = randSrc[0]; while (i) { int lowBit = v & 1; v >>= 1; if (lowBit) bsSetBit(bitStoreCopy, --i); else gt_bsClearBit(bitStoreCopy, --i); } v = randSrc[0]; r = gt_bsGetUInt16(bitStoreCopy, offsetStart, numBits); gt_ensure(r == v); if (had_err) { gt_log_log("Expected %"PRIu16", got %"PRIu16"\n", v, r); freeResourcesAndReturn(had_err); } for (i = 0; i < numBits; ++i) bsToggleBit(bitStoreCopy, offsetStart + i); r = gt_bsGetUInt16(bitStoreCopy, offsetStart, numBits); gt_ensure(r == (v = (~v & mask))); if (had_err) { gt_log_log("Expected %"PRIu16", got %"PRIu16"\n", v, r); freeResourcesAndReturn(had_err); } gt_log_log("passed\n"); }
static int enumeratelcpintervals(const char *inputindex, Sequentialsuffixarrayreader *ssar, const char *storeindex, bool storecounts, GtUword mersize, GtUword minocc, GtUword maxocc, bool performtest, GtLogger *logger, GtError *err) { TyrDfsstate *state; bool haserr = false; unsigned int alphasize; gt_error_check(err); state = gt_malloc(sizeof (*state)); GT_INITARRAY(&state->occdistribution,Countwithpositions); state->esrspace = gt_encseq_create_reader_with_readmode( gt_encseqSequentialsuffixarrayreader(ssar), gt_readmodeSequentialsuffixarrayreader(ssar), 0); state->mersize = (GtUword) mersize; state->encseq = gt_encseqSequentialsuffixarrayreader(ssar); alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq)); state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar); state->storecounts = storecounts; state->minocc = minocc; state->maxocc = maxocc; state->totallength = gt_encseq_total_length(state->encseq); state->performtest = performtest; state->countoutputmers = 0; state->merindexfpout = NULL; state->countsfilefpout = NULL; GT_INITARRAY(&state->largecounts,Largecount); if (strlen(storeindex) == 0) { state->sizeofbuffer = 0; state->bytebuffer = NULL; } else { state->sizeofbuffer = MERBYTES(mersize); state->bytebuffer = gt_malloc(sizeof *state->bytebuffer * state->sizeofbuffer); } if (performtest) { state->currentmer = gt_malloc(sizeof *state->currentmer * state->mersize); state->suftab = gt_suftabSequentialsuffixarrayreader(ssar); } else { state->currentmer = NULL; state->suftab = NULL; } if (state->mersize > state->totallength) { gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed", state->mersize, state->totallength); haserr = true; } else { if (strlen(storeindex) == 0) { state->processoccurrencecount = adddistpos2distribution; } else { state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX, "wb",err); if (state->merindexfpout == NULL) { haserr = true; } else { if (state->storecounts) { state->countsfilefpout = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err); if (state->countsfilefpout == NULL) { haserr = true; } } } state->processoccurrencecount = outputsortedstring2index; } if (!haserr) { if (gt_depthfirstesa(ssar, tyr_allocateDfsinfo, tyr_freeDfsinfo, tyr_processleafedge, NULL, tyr_processcompletenode, tyr_assignleftmostleaf, tyr_assignrightmostleaf, (Dfsstate*) state, logger, err) != 0) { haserr = true; } if (strlen(storeindex) == 0) { showfinalstatistics(state,inputindex,logger); } } if (!haserr) { if (state->countsfilefpout != NULL) { gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU " to file \"%s%s\"", state->largecounts.nextfreeLargecount, (GtUword) MAXSMALLMERCOUNT, storeindex, COUNTSSUFFIX); gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount), (size_t) state->largecounts.nextfreeLargecount, state->countsfilefpout); } } if (!haserr) { gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"", mersize, state->countoutputmers); gt_logger_log(logger,"index size: %.2f megabytes\n", GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer + sizeof (GtUword) * EXTRAINTEGERS)); } } /* now out EXTRAINTEGERS integer values */ if (!haserr && state->merindexfpout != NULL) { outputbytewiseUlongvalue(state->merindexfpout, (GtUword) state->mersize); outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize); } gt_fa_xfclose(state->merindexfpout); gt_fa_xfclose(state->countsfilefpout); GT_FREEARRAY(&state->occdistribution,Countwithpositions); gt_free(state->currentmer); gt_free(state->bytebuffer); GT_FREEARRAY(&state->largecounts,Largecount); gt_encseq_reader_delete(state->esrspace); gt_free(state); return haserr ? -1 : 0; }
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { GtLTRdigestPdomVisitor *lv; GtFeatureNodeIterator *fni; GtFeatureNode *curnode = NULL; int had_err = 0; GtRange rng; unsigned long i; lv = gt_ltrdigest_pdom_visitor_cast(nv); gt_assert(lv); gt_error_check(err); /* traverse annotation subgraph and find LTR element */ fni = gt_feature_node_iterator_new(fn); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (strcmp(gt_feature_node_get_type(curnode), gt_ft_LTR_retrotransposon) == 0) { lv->ltr_retrotrans = curnode; } } gt_feature_node_iterator_delete(fni); if (!had_err && lv->ltr_retrotrans != NULL) { GtCodonIterator *ci; GtTranslator *tr; GtTranslatorStatus status; unsigned long seqlen; char translated, *rev_seq; FILE *instream; GtHMMERParseStatus *pstatus; unsigned int frame; GtStr *seq; seq = gt_str_new(); rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans); lv->leftLTR_5 = rng.start - 1; lv->rightLTR_3 = rng.end - 1; seqlen = gt_range_length(&rng); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) lv->ltr_retrotrans, gt_symbol(gt_ft_LTR_retrotransposon), false, NULL, NULL, lv->rmap, err); if (!had_err) { for (i = 0UL; i < 3UL; i++) { gt_str_reset(lv->fwd[i]); gt_str_reset(lv->rev[i]); } /* create translations */ ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL); gt_assert(ci); tr = gt_translator_new(ci); status = gt_translator_next(tr, &translated, &frame, err); while (status == GT_TRANSLATOR_OK && translated) { gt_str_append_char(lv->fwd[frame], translated); status = gt_translator_next(tr, &translated, &frame, NULL); } if (status == GT_TRANSLATOR_ERROR) had_err = -1; if (!had_err) { rev_seq = gt_malloc((size_t) seqlen * sizeof (char)); strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char)); (void) gt_reverse_complement(rev_seq, seqlen, NULL); gt_codon_iterator_delete(ci); ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL); gt_translator_set_codon_iterator(tr, ci); status = gt_translator_next(tr, &translated, &frame, err); while (status == GT_TRANSLATOR_OK && translated) { gt_str_append_char(lv->rev[frame], translated); status = gt_translator_next(tr, &translated, &frame, NULL); } if (status == GT_TRANSLATOR_ERROR) had_err = -1; gt_free(rev_seq); } gt_codon_iterator_delete(ci); gt_translator_delete(tr); } /* run HMMER and handle results */ if (!had_err) { int pid, pc[2], cp[2]; GT_UNUSED int rval; (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's exit status */ rval = pipe(pc); gt_assert(rval == 0); rval = pipe(cp); gt_assert(rval == 0); switch ((pid = (int) fork())) { case -1: perror("Can't fork"); exit(1); /* XXX: error handling */ case 0: /* child */ (void) close(1); /* close current stdout. */ rval = dup(cp[1]); /* make stdout go to write end of pipe. */ (void) close(0); /* close current stdin. */ rval = dup(pc[0]); /* make stdin come from read end of pipe. */ (void) close(pc[1]); (void) close(cp[0]); (void) execvp("hmmscan", lv->args); /* XXX: read path from env */ perror("couldn't execute hmmscan!"); exit(1); default: /* parent */ for (i = 0UL; i < 3UL; i++) { char buf[5]; GT_UNUSED ssize_t written; (void) sprintf(buf, ">%lu%c\n", i, '+'); written = write(pc[1], buf, 4 * sizeof (char)); written = write(pc[1], gt_str_get(lv->fwd[i]), (size_t) gt_str_length(lv->fwd[i]) * sizeof (char)); written = write(pc[1], "\n", 1 * sizeof (char)); (void) sprintf(buf, ">%lu%c\n", i, '-'); written = write(pc[1], buf, 4 * sizeof (char)); written = write(pc[1], gt_str_get(lv->rev[i]), (size_t) gt_str_length(lv->rev[i]) * sizeof (char)); written = write(pc[1], "\n", 1 * sizeof (char)); } (void) close(pc[1]); (void) close(cp[1]); instream = fdopen(cp[0], "r"); pstatus = gt_hmmer_parse_status_new(); had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus, instream, err); (void) fclose(instream); if (!had_err) had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err); gt_hmmer_parse_status_delete(pstatus); } } gt_str_delete(seq); } if (!had_err) had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv); return had_err; }
static int gt_tyr_occratio_arguments_check(int rest_argc, void *tool_arguments, GtError *err) { Tyr_occratio_options *arguments = tool_arguments; bool haserr = false; Optionargmodedesc outputmodedesctable[] = { {"unique","number of unique mers",TYROCC_OUTPUTUNIQUE}, {"nonunique","number of nonunique mers (single count)", TYROCC_OUTPUTNONUNIQUE}, {"nonuniquemulti","number of nonunique mers (multi count)", TYROCC_OUTPUTNONUNIQUEMULTI}, {"relative","fraction of unique/non-unique mers relative to all mers", TYROCC_OUTPUTRELATIVE}, {"total","number of all mers",TYROCC_OUTPUTTOTAL} }; if (rest_argc != 0) { gt_error_set(err,"superfluous arguments"); return -1; } if (gt_option_is_set(arguments->refoptionmersizes)) { unsigned long *mersizes = NULL; unsigned long idx, numofmersizes = gt_str_array_size(arguments->mersizesstrings); if (numofmersizes == 0) { gt_error_set(err,"missing argument to option -mersizes:"); haserr = true; } else { mersizes = gt_malloc(sizeof(*mersizes) * numofmersizes); for (idx=0; idx<numofmersizes; idx++) { long readnum; if (sscanf(gt_str_array_get(arguments->mersizesstrings,idx), "%ld",&readnum) != 1 || readnum <= 0) { gt_error_set(err,"invalid argument \"%s\" of option -mersizes: " "must be a positive integer", gt_str_array_get(arguments->mersizesstrings,idx)); haserr = true; break; } mersizes[idx] = (unsigned long) readnum; if (idx > 0 && mersizes[idx-1] >= mersizes[idx]) { gt_error_set(err,"invalid argumnt %s to option -mersizes: " "positive numbers must be strictly increasing", gt_str_array_get(arguments->mersizesstrings,idx)); haserr = true; break; } } } if (!haserr) { gt_assert(mersizes != NULL); arguments->minmersize = mersizes[0]; arguments->maxmersize = mersizes[numofmersizes-1]; INITBITTAB(arguments->outputvector,arguments->maxmersize+1); for (idx=0; idx<numofmersizes; idx++) { SETIBIT(arguments->outputvector,mersizes[idx]); } } gt_free(mersizes); } else { if (arguments->minmersize == 0) { gt_error_set(err,"if option -mersizes is not used, then option " "-minmersize is mandatory"); haserr = true; } if (!haserr) { if (arguments->maxmersize == 0) { gt_error_set(err,"if option -mersizes is not used, then option " "-maxmersize is mandatory"); haserr = true; } } if (!haserr) { if (arguments->minmersize > arguments->maxmersize) { gt_error_set(err,"minimum mer size must not be larger than " "maximum mer size"); haserr = true; } } if (!haserr) { if (arguments->minmersize+arguments->stepmersize > arguments->maxmersize) { gt_error_set(err,"minimum mer size + step value must be smaller or " "equal to maximum mersize"); haserr = true; } } if (!haserr) { unsigned long outputval; INITBITTAB(arguments->outputvector,arguments->maxmersize+1); for (outputval = arguments->minmersize; outputval <= arguments->maxmersize; outputval += arguments->stepmersize) { SETIBIT(arguments->outputvector,outputval); } } } if (!haserr) { unsigned long idx; for (idx=0; idx<gt_str_array_size(arguments->outputspec); idx++) { if (optionargaddbitmask(outputmodedesctable, sizeof (outputmodedesctable)/ sizeof (outputmodedesctable[0]), &arguments->outputmode, "-output", gt_str_array_get(arguments->outputspec,idx), err) != 0) { haserr = true; break; } } } if (!haserr) { if ((arguments->outputmode & TYROCC_OUTPUTRELATIVE) && !(arguments->outputmode & (TYROCC_OUTPUTUNIQUE | TYROCC_OUTPUTNONUNIQUE | TYROCC_OUTPUTNONUNIQUEMULTI))) { gt_error_set(err,"argument relative to option -output requires that one " "of the arguments unique, nonunique, or nonuniquemulti " "is used"); haserr = true; } } return haserr ? - 1: 0; }
static GtOPrval parse_options(int *parsed_args, Cmppairwiseopt *pw, int argc, const char **argv, GtError *err) { GtOptionParser *op; GtOption *optionstrings, *optionfiles, *optioncharlistlen, *optiontext, *optionshowedist, *optionprint; GtStrArray *charlistlen; GtOPrval oprval; gt_error_check(err); charlistlen = gt_str_array_new(); pw->strings = gt_str_array_new(); pw->files = gt_str_array_new(); pw->text = gt_str_new(); pw->charlistlen = NULL; pw->fastasequences0 = NULL; pw->fastasequences1 = NULL; pw->showedist = false; pw->print = false; pw->fasta = false; op = gt_option_parser_new("options", "Apply function to pairs of strings."); gt_option_parser_set_mail_address(op, "<*****@*****.**>"); optionstrings = gt_option_new_string_array("ss", "use two strings", pw->strings); gt_option_parser_add_option(op, optionstrings); optionfiles = gt_option_new_filename_array("ff", "use two files", pw->files); gt_option_parser_add_option(op, optionfiles); optioncharlistlen = gt_option_new_string_array("a", "use character list and length", charlistlen); gt_option_parser_add_option(op, optioncharlistlen); optiontext = gt_option_new_string("t", "use text", pw->text, NULL); gt_option_parser_add_option(op, optiontext); optionshowedist = gt_option_new_bool("e", "output unit edit distance", &pw->showedist, false); gt_option_parser_add_option(op, optionshowedist); optionprint = gt_option_new_bool("p", "print edist alignment", &pw->print, false); gt_option_parser_add_option(op, optionprint); gt_option_exclude(optionstrings, optionfiles); gt_option_exclude(optionstrings, optioncharlistlen); gt_option_exclude(optionstrings, optiontext); gt_option_exclude(optionfiles, optioncharlistlen); gt_option_exclude(optionfiles, optiontext); gt_option_exclude(optioncharlistlen, optiontext); gt_option_imply(optionshowedist, optionstrings); gt_option_imply(optionprint, optionstrings); oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc, err); if (oprval == GT_OPTION_PARSER_OK) { if (gt_option_is_set(optionstrings)) { if (gt_str_array_size(pw->strings) != 2UL) { gt_error_set(err, "option -ss requires two string arguments"); oprval = GT_OPTION_PARSER_ERROR; } } else { if (gt_option_is_set(optionfiles)) { if (gt_str_array_size(pw->files) != 2UL) { if (gt_str_array_size(pw->files) == 3UL && !strcmp(gt_str_array_get(pw->files,0),"fasta")) { pw->fasta = true; } if (!pw->fasta) { gt_error_set(err, "option -ff requires two filename arguments or " "keyword fasta and two filename arguments in " "FASTA format"); oprval = GT_OPTION_PARSER_ERROR; } } } else { if (gt_option_is_set(optioncharlistlen)) { GtWord readint; if (gt_str_array_size(charlistlen) != 2UL) { gt_error_set(err, "option -a requires charlist and length argument"); oprval = GT_OPTION_PARSER_ERROR; }else { pw->charlistlen = gt_malloc(sizeof *pw->charlistlen); pw->charlistlen->charlist = gt_str_ref(gt_str_array_get_str(charlistlen, 0)); if (sscanf(gt_str_array_get(charlistlen,1UL), GT_WD, &readint) != 1 || readint < 1L) { gt_error_set(err, "option -a requires charlist and length argument"); oprval = GT_OPTION_PARSER_ERROR; } pw->charlistlen->len = (GtUword) readint; } } else { if (!gt_option_is_set(optiontext)) { gt_error_set(err, "use exactly one of the options -ss, -ff, -a, -t"); oprval = GT_OPTION_PARSER_ERROR; } } } } } gt_option_parser_delete(op); if (oprval == GT_OPTION_PARSER_OK && *parsed_args != argc) { gt_error_set(err, "superfluous program parameters"); oprval = GT_OPTION_PARSER_ERROR; } gt_str_array_delete(charlistlen); return oprval; }
int gtr_run(GtR *gtr, int argc, const char **argv, GtError *err) { GtToolfunc toolfunc; GtTool *tool = NULL; char **nargv = NULL; void *mem, *map; int had_err = 0; gt_error_check(err); gt_assert(gtr); if (gtr->debug) enable_logging(gt_str_get(gtr->debugfp), >r->logfp); if (gtr->quiet) gt_warning_disable(); gtr->seed = gt_ya_rand_init(gtr->seed); gt_log_log("seed=%u", gtr->seed); if (gtr->list) return list_tools(gtr); if (gt_str_length(gtr->manoutdir) > 0) return create_manpages(gtr, gt_str_get(gtr->manoutdir), err); if (gtr->check64bit) return check64bit(); if (gtr->test) return run_tests(gtr, err); if (gt_str_length(gtr->testspacepeak)) { mem = gt_malloc(1 << 26); /* alloc 64 MB */; map = gt_fa_xmmap_read(gt_str_get(gtr->testspacepeak), NULL); gt_fa_xmunmap(map); gt_free(mem); } if (argc == 0 && !gtr->interactive) { gt_error_set(err, "neither tool nor script specified; option -help lists " "possible tools"); had_err = -1; } if (!had_err && argc) { if (!gtr->tools || !gt_toolbox_has_tool(gtr->tools, argv[0])) { /* no tool found -> try to open script */ if (gt_file_exists(argv[0])) { /* export script */ gt_lua_set_script_dir(gtr->L, argv[0]); /* run script */ nargv = gt_cstr_array_prefix_first(argv, gt_error_get_progname(err)); gt_lua_set_arg(gtr->L, nargv[0], (const char**) nargv+1); if (luaL_dofile(gtr->L, argv[0])) { /* error */ gt_assert(lua_isstring(gtr->L, -1)); /* error message on top */ gt_error_set(err, "could not execute script %s", lua_tostring(gtr->L, -1)); had_err = -1; lua_pop(gtr->L, 1); /* pop error message */ } } else { /* neither tool nor script found */ gt_error_set(err, "neither tool nor script '%s' found; option -help " "lists possible tools", argv[0]); had_err = -1; } } else { /* run tool */ if (!(toolfunc = gt_toolbox_get(gtr->tools, argv[0]))) { tool = gt_toolbox_get_tool(gtr->tools, argv[0]); gt_assert(tool); } nargv = gt_cstr_array_prefix_first(argv, gt_error_get_progname(err)); gt_error_set_progname(err, nargv[0]); if (toolfunc) had_err = toolfunc(argc, (const char**) nargv, err); else had_err = gt_tool_run(tool, argc, (const char**) nargv, err); } } gt_cstr_array_delete(nargv); if (!had_err && gtr->interactive) { gt_showshortversion(gt_error_get_progname(err)); gt_lua_set_arg(gtr->L, gt_error_get_progname(err), argv); run_interactive_lua_interpreter(gtr->L); } if (had_err) return EXIT_FAILURE; return EXIT_SUCCESS; }
GtQuerymatch *gt_querymatch_new(void) { return gt_malloc(sizeof (GtQuerymatch)); }
static int gt_condenser_search_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCondenserSearchArguments *arguments = tool_arguments; int i, had_err = 0; char *querypath = gt_str_get(arguments->querypath); GtStr* coarse_fname = gt_str_new_cstr("coarse_"); char *db_basename = NULL; char *suffix_ptr = NULL; GtTimer *timer = NULL; GtLogger *logger = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); db_basename = gt_basename(gt_str_get(arguments->dbpath)); /* if first char is '.' this might be a hidden file */ if (strlen(db_basename) > (size_t) 1 && (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) { /* remove suffix */ *suffix_ptr = '\0'; } gt_str_append_cstr(coarse_fname, db_basename); gt_str_append_cstr(coarse_fname, ".fas"); gt_free(db_basename); db_basename = NULL; suffix_ptr = NULL; if (arguments->blastn || arguments->blastp) { GtMatch *match; GtMatchIterator *mp = NULL; GtNREncseq *nrencseq = NULL; GtStr *fastaname = gt_str_clone(arguments->dbpath); HitPosition *hits; double eval, raw_eval = 0.0; GtUword coarse_db_len = 0; GtMatchIteratorStatus status; int curr_hits = 0, max_hits = 100; hits = gt_malloc(sizeof (*hits) * (size_t) max_hits); gt_str_append_cstr(fastaname, ".fas"); for (i=0; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("initialization"); gt_timer_start(timer); } /*extract sequences from compressed database*/ if (!had_err) { nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath), logger, err); if (nrencseq == NULL) had_err = -1; } if (!had_err) { if (arguments->ceval == GT_UNDEF_DOUBLE || arguments->feval == GT_UNDEF_DOUBLE) { /* from NCBI BLAST tutorial: E = Kmne^{-lambdaS} calculates E-value for score S with natural scale parameters K for search space size and lambda for the scoring system E = mn2^-S' m being the subject (total) length, n the length of ONE query calculates E-value for bit-score S' */ GtFastaReader *reader; GtCondenserSearchAvg avg = {0,0}; reader = gt_fasta_reader_rec_new(arguments->querypath); had_err = gt_fasta_reader_run(reader, NULL, NULL, gt_condenser_search_cum_moving_avg, &avg, err); if (!had_err) { GtUword S = arguments->bitscore; gt_log_log(GT_WU " queries, avg query size: " GT_WU, avg.count, avg.avg); raw_eval = 1/pow(2.0, (double) S) * avg.avg; gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval); gt_assert(avg.avg != 0); } gt_fasta_reader_delete(reader); } } /*create BLAST database from compressed database fasta file*/ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create coarse BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname), err); } if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "coarse BLAST run", stderr); if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(fastaname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, arguments->ceval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtUword hit_seq_id; char string[7]; const char *dbseqid = gt_match_get_seqid2(match); if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) { gt_match_get_range_seq2(match, hits[curr_hits].range); hits[curr_hits].idx = hit_seq_id; gt_match_delete(match); curr_hits++; if (curr_hits == max_hits) { HitPosition *hit_extention; max_hits += 100; hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits); for (i=max_hits - 100; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range)); } } } else { gt_error_set(err, "could not parse unique db header %s", dbseqid); had_err = -1; } } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_match_iterator_delete(mp); } /*extract sequences*/ if (!had_err) { GtNREncseqDecompressor *decomp; GtFile *coarse_hits; if (timer != NULL) gt_timer_show_progress(timer, "extract coarse search hits", stderr); decomp = gt_n_r_encseq_decompressor_new(nrencseq); coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err); /* TODO DW do NOT extract complete uniques! these could be complete chromosomes!! just extract something around it? maybe +- max query length*/ for (i = 0; i < curr_hits; i++) { gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp, hits[i].idx); } had_err = gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits, decomp, &coarse_db_len, err); gt_assert(coarse_db_len != 0); gt_file_delete(coarse_hits); gt_n_r_encseq_decompressor_delete(decomp); } gt_n_r_encseq_delete(nrencseq); /* create BLAST database from decompressed database file */ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create fine BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname), err); } /* perform fine BLAST search */ if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "fine BLAST run", stderr); if (arguments->feval == GT_UNDEF_DOUBLE) { eval = raw_eval * coarse_db_len; } else { eval = arguments->feval; } if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(coarse_fname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, eval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval, coarse_db_len); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); if (!had_err) { GtUword numofhits = 0; while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtMatchBlast *matchb = (GtMatchBlast*) match; char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50); GtRange range_seq1; GtRange range_seq2; numofhits++; gt_match_get_range_seq1(match, &range_seq1); gt_match_get_range_seq2(match, &range_seq2); gt_file_xprintf( arguments->outfp, "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t%g\t%.3f\n", gt_match_get_seqid1(match), gt_match_get_seqid2(match), gt_match_blast_get_similarity(matchb), gt_match_blast_get_align_length(matchb), range_seq1.start, range_seq1.end, range_seq2.start, range_seq2.end, gt_match_blast_get_evalue(matchb), (double) gt_match_blast_get_bitscore(matchb)); gt_match_delete(match); gt_free(dbseqid); } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_log_log(GT_WU " hits found\n", numofhits); } gt_match_iterator_delete(mp); } if (!had_err) if (timer != NULL) gt_timer_show_progress_final(timer, stderr); gt_timer_delete(timer); /*cleanup*/ for (i=0; i < max_hits; i++) { gt_free(hits[i].range); } gt_free(hits); gt_str_delete(fastaname); } gt_str_delete(coarse_fname); gt_logger_delete(logger); return had_err; }
static void *gt_idxlocali_arguments_new(void) { return gt_malloc(sizeof (IdxlocaliOptions)); }
gt_input_file* gt_input_file_open(char* const file_name,const bool mmap_file) { GT_NULL_CHECK(file_name); // Allocate handler gt_input_file* input_file = gt_alloc(gt_input_file); // Input file struct stat stat_info; unsigned char tbuf[4]; int i; gt_cond_fatal_error(stat(file_name,&stat_info)==-1,FILE_STAT,file_name); input_file->file_name = file_name; input_file->file_size = stat_info.st_size; input_file->eof = (input_file->file_size==0); input_file->file_format = FILE_FORMAT_UNKNOWN; gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex,NULL),SYS_MUTEX_INIT); if (mmap_file) { input_file->file = NULL; input_file->fildes = open(file_name,O_RDONLY,0); // TODO: O_NOATIME condCompl (Thanks Jordi Camps) gt_cond_fatal_error(input_file->fildes==-1,FILE_OPEN,file_name); input_file->file_buffer = (uint8_t*) mmap(0,input_file->file_size,PROT_READ,MAP_PRIVATE,input_file->fildes,0); gt_cond_fatal_error(input_file->file_buffer==MAP_FAILED,SYS_MMAP_FILE,file_name); input_file->file_type = MAPPED_FILE; } else { input_file->fildes = -1; gt_cond_fatal_error(!(input_file->file=fopen(file_name,"r")),FILE_OPEN,file_name); input_file->file_type = REGULAR_FILE; if(S_ISREG(stat_info.st_mode)) { // Regular file - check if gzip or bzip compressed i=(int)fread(tbuf,(size_t)1,(size_t)4,input_file->file); if(tbuf[0]==0x1f && tbuf[1]==0x8b && tbuf[2]==0x08) { input_file->file_type=GZIPPED_FILE; fclose(input_file->file); #ifdef HAVE_ZLIB gt_cond_fatal_error(!(input_file->file=(void *)gzopen(file_name,"r")),FILE_GZIP_OPEN,file_name); #else gt_fatal_error(FILE_GZIP_NO_ZLIB,file_name); #endif } else if(tbuf[0]=='B' && tbuf[1]=='Z' && tbuf[2]=='h' && tbuf[3]>='0' && tbuf[3]<='9') { fseek(input_file->file,0L,SEEK_SET); input_file->file_type=BZIPPED_FILE; #ifdef HAVE_BZLIB input_file->file=BZ2_bzReadOpen(&i,input_file->file,0,0,NULL,0); gt_cond_fatal_error(i!=BZ_OK,FILE_BZIP2_OPEN,file_name); #else gt_fatal_error(FILE_BZIP2_NO_BZLIB,file_name); #endif } else { fseek(input_file->file,0L,SEEK_SET); } } else { input_file->eof=0; } input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE); } // Auxiliary Buffer (for synch purposes) input_file->buffer_size = 0; input_file->buffer_begin = 0; input_file->buffer_pos = 0; input_file->global_pos = 0; input_file->processed_lines = 0; // ID generator input_file->processed_id = 0; // Detect file format gt_input_file_detect_file_format(input_file); return input_file; }
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha, bool descs, GtQualRange qrange, GtTimer *timer, GtError *err) { GtBaseQualDistr *bqd; GtHcrEncoder *hcr_enc; GtSeqIterator *seqit; GtStrArray *file; int had_err = 0, status; unsigned long len1, len2, i, num_of_reads = 0; const GtUchar *seq, *qual; char *desc; gt_error_check(err); gt_assert(alpha && files); if (timer != NULL) gt_timer_show_progress(timer, "get <base,qual> distr", stdout); if (qrange.start != GT_UNDEF_UINT) if (qrange.start == qrange.end) { gt_error_set(err, "qrange.start must unequal qrange.end"); return NULL; } hcr_enc = gt_malloc(sizeof (GtHcrEncoder)); hcr_enc->files = files; hcr_enc->num_of_files = gt_str_array_size(files); hcr_enc->num_of_reads = 0; hcr_enc->page_sampling = false; hcr_enc->regular_sampling = false; hcr_enc->sampling_rate = 0; hcr_enc->pagesize = sysconf((int) _SC_PAGESIZE); if (descs) { hcr_enc->encdesc_encoder = gt_encdesc_encoder_new(); if (timer != NULL) gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer); } else hcr_enc->encdesc_encoder = NULL; hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder)); hcr_enc->seq_encoder->alpha = alpha; hcr_enc->seq_encoder->sampling = NULL; hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files, sizeof (*(hcr_enc->seq_encoder->fileinfos))); hcr_enc->seq_encoder->qrange = qrange; bqd = hcr_base_qual_distr_new(alpha, qrange); /* check if reads in the same file are of same length and get <base, quality> pair distribution */ for (i = 0; i < hcr_enc->num_of_files; i++) { file = gt_str_array_new(); gt_str_array_add(file, gt_str_array_get_str(files, i)); seqit = gt_seq_iterator_fastq_new(file, err); if (!seqit) { gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object"); had_err = -1; } if (!had_err) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha)); gt_seq_iterator_set_quality_buffer(seqit, &qual); status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err); if (status == 1) { num_of_reads = 1UL; while (!had_err) { status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err); if (status == -1) had_err = -1; if (status != 1) break; if (len2 != len1) { gt_error_set(err, "reads have to be of equal length"); had_err = -1; break; } if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0) had_err = -1; len1 = len2; num_of_reads++; } } else if (status == -1) had_err = -1; if (!had_err) { if (i == 0) hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads; else hcr_enc->seq_encoder->fileinfos[i].readnum = hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads; hcr_enc->seq_encoder->fileinfos[i].readlength = len1; } } hcr_enc->num_of_reads += num_of_reads; gt_str_array_delete(file); gt_seq_iterator_delete(seqit); } if (!had_err) hcr_base_qual_distr_trim(bqd); if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "build huffman tree for sequences and" " qualities", stdout); hcr_enc->seq_encoder->huffman = gt_huffman_new(bqd, hcr_base_qual_distr_func, (unsigned long) bqd->ncols * bqd->nrows); } if (!had_err) { hcr_enc->seq_encoder->qual_offset = bqd->qual_offset; hcr_base_qual_distr_delete(bqd); return hcr_enc; } return NULL; }
static int process_node(GtDiagram *d, GtFeatureNode *node, GtFeatureNode *parent, GtError *err) { GtRange elem_range; bool *collapse; GtShouldGroupByParent *group; const char *feature_type = NULL, *parent_gft = NULL; double tmp; GtStyleQueryStatus rval; GtUword max_show_width = GT_UNDEF_UWORD, par_max_show_width = GT_UNDEF_UWORD; gt_assert(d && node); gt_log_log(">> getting '%s'", gt_feature_node_get_type(node)); /* skip pseudonodes */ if (gt_feature_node_is_pseudo(node)) return 0; feature_type = gt_feature_node_get_type(node); gt_assert(feature_type); /* discard elements that do not overlap with visible range */ elem_range = gt_genome_node_get_range((GtGenomeNode*) node); if (!gt_range_overlap(&d->range, &elem_range)) return 0; /* get maximal view widths in nucleotides to show this type */ rval = gt_style_get_num(d->style, feature_type, "max_show_width", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: max_show_width = tmp; break; case GT_STYLE_QUERY_ERROR: return -1; break; /* should never be reached */ default: /* do not change default value */ break; } /* for non-root nodes, get maximal view with to show parent */ if (parent) { if (!gt_feature_node_is_pseudo(parent)) { parent_gft = gt_feature_node_get_type(parent); rval = gt_style_get_num(d->style, parent_gft, "max_show_width", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: par_max_show_width = tmp; break; case GT_STYLE_QUERY_ERROR: return -1; break; /* should never be reached */ default: /* do not change default value */ break; } } else par_max_show_width = GT_UNDEF_UWORD; } /* check if this type is to be displayed at all */ if (max_show_width != GT_UNDEF_UWORD && gt_range_length(&d->range) > max_show_width) { return 0; } /* disregard parent node if it is configured not to be shown */ if (parent && par_max_show_width != GT_UNDEF_UWORD && gt_range_length(&d->range) > par_max_show_width) { parent = NULL; } /* check if this is a collapsing type, cache result */ if ((collapse = (bool*) gt_hashmap_get(d->collapsingtypes, feature_type)) == NULL) { collapse = gt_malloc(sizeof (bool)); *collapse = false; if (gt_style_get_bool(d->style, feature_type, "collapse_to_parent", collapse, NULL, err) == GT_STYLE_QUERY_ERROR) { gt_free(collapse); return -1; } gt_hashmap_add(d->collapsingtypes, (void*) feature_type, collapse); } /* check if type should be grouped by parent, cache result */ if ((group = (GtShouldGroupByParent*) gt_hashmap_get(d->groupedtypes, feature_type)) == NULL) { bool tmp; group = gt_malloc(sizeof (GtShouldGroupByParent)); rval = gt_style_get_bool(d->style, feature_type, "group_by_parent", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: if (tmp) *group = GT_GROUP_BY_PARENT; else *group = GT_DO_NOT_GROUP_BY_PARENT; break; case GT_STYLE_QUERY_NOT_SET: *group = GT_UNDEFINED_GROUPING; break; case GT_STYLE_QUERY_ERROR: gt_free(group); return -1; break; /* should never be reached */ } gt_hashmap_add(d->groupedtypes, (void*) feature_type, group); } /* decide where to place this feature: */ if (*collapse) { /* user has specified collapsing to parent for this type */ if (parent && !gt_feature_node_is_pseudo(parent)) { /* collapsing child nodes are added to upwards blocks, but never collapse into pseudo nodes */ add_recursive(d, node, parent, node); } else { /* if no parent or only pseudo-parent, do not collapse */ if (add_to_current(d, node, parent, err) < 0) { return -1; } } } else /* (!*collapse) */ { if (parent) { bool do_not_overlap = false; do_not_overlap = gt_feature_node_direct_children_do_not_overlap_st(parent, node); if (*group == GT_GROUP_BY_PARENT || (do_not_overlap && *group == GT_UNDEFINED_GROUPING)) { if (gt_feature_node_is_pseudo(parent) && gt_feature_node_is_multi(node)) { if (add_to_rep(d, node, parent, err) < 0) { return -1; } } else if (gt_feature_node_number_of_children(parent) > 1) { if (add_to_parent(d, node, parent, err) < 0) { return -1; } } else { if (add_to_current(d, node, parent, err) < 0) { return -1; } } } else { if (gt_feature_node_is_pseudo(parent) && gt_feature_node_is_multi(node)) { if (add_to_rep(d, node, parent, err) < 0) { return -1; } } else { if (add_to_current(d, node, parent, err) < 0) { return -1; } } } } else { /* root nodes always get their own block */ if (add_to_current(d, node, parent, err) < 0) { return -1; } } } /* we can now assume that this node (or its representative) has been processed into the reverse lookup structure */ #ifndef NDEBUG if (gt_feature_node_is_multi(node)) { GtFeatureNode *rep; rep = gt_feature_node_get_multi_representative((GtFeatureNode*) node); gt_assert(gt_hashmap_get(d->nodeinfo, rep)); } else gt_assert(gt_hashmap_get(d->nodeinfo, node)); #endif return 0; }
static int gt_compressedbits_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCompressdbitsArguments *arguments = tool_arguments; int had_err = 0; unsigned long idx; unsigned long long num_of_bits = 0ULL; GtBitsequence *bits = NULL; GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL; GtStr *filename = gt_str_new(); FILE *fp = NULL; gt_error_check(err); gt_assert(arguments); gt_assert(argc == parsed_args); if (gt_option_is_set(arguments->filename_op)) { FILE *file = NULL; gt_assert(arguments->filename != NULL); file = gt_xfopen(gt_str_get(arguments->filename), "r"); if ((size_t) 1 != gt_xfread(&num_of_bits, sizeof (num_of_bits), (size_t) 1, file)) { had_err = -1; } if (!had_err) { gt_log_log("bits to read: %llu", num_of_bits); arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits); bits = gt_malloc(sizeof (*bits) * arguments->size); if ((size_t) arguments->size != gt_xfread(bits, sizeof (*bits), (size_t) arguments->size, file)) { had_err = -1; } } gt_xfclose(file); } else { bits = gt_calloc(sizeof (*bits), (size_t) arguments->size); num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size); if (arguments->fill_random) { for (idx = 0; idx < arguments->size; idx++) { bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX)); } } else { for (idx = 0; idx < arguments->size; idx++) bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx); } } if (!had_err) { fp = gt_xtmpfp(filename); gt_fa_xfclose(fp); fp = NULL; gt_log_log("filename: %s", gt_str_get(filename)); gt_log_log("size in words: %lu", arguments->size); cbs = gt_compressed_bitsequence_new( bits, arguments->samplerate, (unsigned long) num_of_bits); gt_log_log("original size in MB: %2.3f", (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0)); gt_log_log("compressed size in MB: %2.3f", gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0)); gt_log_log("popcount table size thereof in MB: %2.3f", gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0)); had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err); } if (!had_err) { read_cbs = gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err); if (read_cbs == NULL) had_err = -1; } if (!had_err && bits != NULL && arguments->check_consistency) { for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) { int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx); int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0; gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit); gt_assert(original == bit); } } gt_compressed_bitsequence_delete(cbs); gt_compressed_bitsequence_delete(read_cbs); gt_free(bits); gt_str_delete(filename); return had_err; }
int gt_mapspec_read(GtMapspecSetupFunc setup, void *data, const GtStr *filename, unsigned long expectedsize, void **mapped, GtError *err) { void *mapptr; uint64_t expectedaccordingtomapspec; unsigned long byteoffset = 0; size_t numofbytes; GtMapspec *ms = gt_malloc(sizeof (GtMapspec)); GtMapspecification *mapspecptr; int had_err = 0; unsigned long totalpadunits = 0; gt_error_check(err); GT_INITARRAY(&ms->mapspectable, GtMapspecification); setup(ms, data, false); mapptr = gt_fa_mmap_read(gt_str_get(filename), &numofbytes, err); if (mapptr == NULL) { had_err = -1; } *mapped = mapptr; if (!had_err) { if (assigncorrecttype(ms->mapspectable.spaceGtMapspecification, mapptr,0,err) != 0) { had_err = -1; } } if (!had_err) { expectedaccordingtomapspec = detexpectedaccordingtomapspec(&ms->mapspectable); if (expectedaccordingtomapspec != (uint64_t) numofbytes) { gt_error_set(err,"%lu bytes read from %s, but " Formatuint64_t " expected", (unsigned long) numofbytes, gt_str_get(filename), PRINTuint64_tcast(expectedaccordingtomapspec)); had_err = -1; } } if (!had_err) { mapspecptr = ms->mapspectable.spaceGtMapspecification; gt_assert(mapspecptr != NULL); byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (mapspecptr->sizeofunit * mapspecptr->numofunits)); if (byteoffset % (unsigned long) GT_WORDSIZE_INBYTES > 0) { size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES); byteoffset += (unsigned long) padunits; totalpadunits += (unsigned long) padunits; } for (mapspecptr++; mapspecptr < ms->mapspectable.spaceGtMapspecification + ms->mapspectable.nextfreeGtMapspecification; mapspecptr++) { if (assigncorrecttype(mapspecptr,mapptr,byteoffset,err) != 0) { had_err = -1; break; } byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (byteoffset + mapspecptr->sizeofunit * mapspecptr->numofunits)); if (byteoffset % (unsigned long) GT_WORDSIZE_INBYTES > 0) { size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES); byteoffset += (unsigned long) padunits; totalpadunits += (unsigned long) padunits; } } } if (!had_err) { if (expectedsize + totalpadunits != byteoffset) { gt_error_set(err,"mapping: expected file size is %lu bytes, " "but file has %lu bytes", expectedsize,byteoffset); had_err = -1; } } GT_FREEARRAY(&ms->mapspectable,GtMapspecification); gt_free(ms); return had_err; }
GtSafePipe *gt_safe_popen(const char *path, char *const argv[], char *const envp[], GtError *err) { #ifndef _WIN32 int stdin_pipe[2], stdout_pipe[2], had_err = 0; GtSafePipe *p = NULL; p = gt_malloc(sizeof(*p)); p->read_fd = p->write_fd = NULL; p->child_pid = (pid_t) -1; if ((had_err = pipe(stdin_pipe))) { gt_error_set(err, "could not open stdin pipe: %s", strerror(errno)); } if (!had_err) { if ((had_err = pipe(stdout_pipe))) { gt_error_set(err, "could not open stdout pipe: %s", strerror(errno)); } if (!had_err) { if (!(p->read_fd = fdopen(stdout_pipe[0], "r"))) { gt_error_set(err, "could not open stdout_pipe[0] for reading: %s", strerror(errno)); had_err = -1; } if (!had_err) { if (!(p->write_fd = fdopen(stdin_pipe[1], "w"))) { gt_error_set(err, "could not open stdin_pipe[1] for writing: %s", strerror(errno)); had_err = -1; } if (!had_err) { if ((p->child_pid = safe_fork()) == (pid_t) -1) { gt_error_set(err, "could not fork: %s", strerror(errno)); had_err = -1; } if (!had_err) { if (!p->child_pid) { /* this is the child process */ (void) close(stdout_pipe[0]); (void) close(stdin_pipe[1]); if (stdin_pipe[0] != 0) { (void) dup2(stdin_pipe[0], 0); (void) close(stdin_pipe[0]); } if (stdout_pipe[1] != 1) { (void) dup2(stdout_pipe[1], 1); (void) close(stdout_pipe[1]); } (void) execve(path, argv, envp); perror("could not execute external program: "); perror(strerror(errno)); exit(127); } (void) close(stdout_pipe[1]); (void) close(stdin_pipe[0]); } if (had_err) { (void) fclose(p->write_fd); } } if (had_err) { (void) fclose(p->read_fd); } } if (had_err) { (void) close(stdout_pipe[1]); (void) close(stdout_pipe[0]); } } if (had_err) { (void) close(stdin_pipe[1]); (void) close(stdin_pipe[0]); } } if (had_err) { gt_free(p); p = NULL; } return p; #else gt_error_set(err, "Function gt_safe_popen not implemented for windows yet"); return NULL; #endif }
int gt_mapspec_write(GtMapspecSetupFunc setup, FILE *fp, void *data, unsigned long expectedsize, GtError *err) { GtMapspecification *mapspecptr; unsigned long byteoffset = 0; int had_err = 0; unsigned long totalpadunits = 0; unsigned long byteswritten; GtMapspec *ms = gt_malloc(sizeof (GtMapspec)); gt_error_check(err); GT_INITARRAY(&ms->mapspectable,GtMapspecification); setup(ms, data, true); gt_assert(ms->mapspectable.spaceGtMapspecification != NULL); for (mapspecptr = ms->mapspectable.spaceGtMapspecification; mapspecptr < ms->mapspectable.spaceGtMapspecification + ms->mapspectable.nextfreeGtMapspecification; mapspecptr++) { #ifdef SKDEBUG printf("# gt_mapspec_flushtheindex2file"); showmapspec(mapspecptr); printf(" at byteoffset %lu\n",byteoffset); #endif if (mapspecptr->numofunits > 0) { switch (mapspecptr->typespec) { case GtCharType: WRITEACTIONWITHTYPE(char); break; case GtFilelengthvaluesType: WRITEACTIONWITHTYPE(GtFilelengthvalues); break; case GtUcharType: WRITEACTIONWITHTYPE(GtUchar); break; case Uint16Type: WRITEACTIONWITHTYPE(uint16_t); break; case Uint32Type: WRITEACTIONWITHTYPE(uint32_t); break; case GtUlongType: WRITEACTIONWITHTYPE(GtUlong); break; case Uint64Type: WRITEACTIONWITHTYPE(uint64_t); break; case GtBitsequenceType: WRITEACTIONWITHTYPE(GtBitsequence); break; case GtUlongBoundType: WRITEACTIONWITHTYPE(GtUlongBound); break; case GtPairBwtidxType: WRITEACTIONWITHTYPE(GtPairBwtidx); break; case GtTwobitencodingType: WRITEACTIONWITHTYPE(GtTwobitencoding); break; case GtSpecialcharinfoType: WRITEACTIONWITHTYPE(GtSpecialcharinfo); break; case GtBitElemType: WRITEACTIONWITHTYPE(BitElem); break; default: gt_error_set(err,"no map specification for size %lu", (unsigned long) mapspecptr->sizeofunit); had_err = -1; } } if (had_err) { break; } byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (byteoffset + mapspecptr->sizeofunit * mapspecptr->numofunits)); if (gt_mapspec_pad(fp,&byteswritten,byteoffset,err) != 0) { had_err = -1; } byteoffset += byteswritten; totalpadunits += byteswritten; } if (!had_err) { if (expectedsize + totalpadunits != byteoffset) { gt_error_set(err,"expected file size is %lu bytes, " "but file has %lu bytes", expectedsize, byteoffset); had_err = -1; } } GT_FREEARRAY(&ms->mapspectable,GtMapspecification); gt_free(ms); return had_err; }
static void showtranslation(GthSplicedSeq *splicedseq, char *frame0_in, char *frame1_in, char *frame2_in, GtArray *exons, bool gen_strand_forward, unsigned long gen_total_length, unsigned long gen_offset, unsigned int indentlevel, GthOutput *out) { char *dotline, *template_out, *frame0_out, *frame1_out, *frame2_out; unsigned long i, exonseparatorwidth = strlen(EXONSEPARATORSTRING), outlen = splicedseq->splicedseqlen + ((gt_array_size(exons) - 1) * exonseparatorwidth) + (splicedseq->splicedseqlen / TRANSLATIONLINEWIDTH); GtFile *outfp = out->outfp; dotline = gt_malloc(sizeof (unsigned char) * outlen); template_out = gt_malloc(sizeof (unsigned char) * outlen); frame0_out = gt_malloc(sizeof (unsigned char) * outlen); frame1_out = gt_malloc(sizeof (unsigned char) * outlen); frame2_out = gt_malloc(sizeof (unsigned char) * outlen); createoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out, (char*) splicedseq->splicedseq, frame0_in, frame1_in, frame2_in, splicedseq, exonseparatorwidth, outlen, out->gs2out); if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<translation>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_template>"); for (i = 0; i < outlen; i++) { if (template_out[i] != '\n') { gt_file_xfputc(template_out[i], outfp); } } gt_file_xprintf(outfp, "</gDNA_template>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<first_frame>"); for (i = 0; i < outlen; i++) { if (frame0_out[i] != '\n') { gt_file_xfputc(frame0_out[i], outfp); } } gt_file_xprintf(outfp, "</first_frame>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<second_frame>"); for (i = 0; i < outlen; i++) { if (frame1_out[i] != '\n') { gt_file_xfputc(frame1_out[i], outfp); } } gt_file_xprintf(outfp, "</second_frame>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<third_frame>"); for (i = 0; i < outlen; i++) { if (frame2_out[i] != '\n') { gt_file_xfputc(frame2_out[i], outfp); } } gt_file_xprintf(outfp, "</third_frame>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</translation>\n"); } else { showoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out, outlen, gen_strand_forward, gen_total_length, gen_offset, splicedseq->positionmapping, out); } gt_free(dotline); gt_free(template_out); gt_free(frame0_out); gt_free(frame1_out); gt_free(frame2_out); }
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes, const GtAlphabet *alphabet, GtError *err) { GtUchar *writeptr = filecontents, *readptr = filecontents; const GtUchar *endptr = filecontents + numofbytes; bool firstline = true, haserr = false; GtUword lastspecialrange_length = 0; GtBareSpecialrange *srptr = NULL; GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *smap = gt_alphabet_symbolmap(alphabet); bare_encseq->specialcharacters = 0; bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet); bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); readptr = filecontents; while (!haserr && readptr < endptr) { if (*readptr == '>') { if (!firstline) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; *writeptr++ = SEPARATOR; bare_encseq->specialcharacters++; } else { firstline = false; } while (readptr < endptr && *readptr != '\n') { readptr++; } readptr++; } else { while (readptr < endptr && *readptr != '\n') { if (!isspace(*readptr)) { GtUchar cc = smap[*readptr]; if (cc == UNDEFCHAR) { gt_error_set(err,"illegal input characters %c\n",*readptr); haserr = true; break; } if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } *writeptr++ = cc; } readptr++; } readptr++; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = filecontents; bare_encseq->totallength = (GtUword) (writeptr - filecontents); if (haserr) { gt_bare_encseq_delete(bare_encseq); return NULL; } return bare_encseq; }