void gt_consensus_sa(const void *set_of_sas, unsigned long number_of_sas, size_t size_of_sa, GetGenomicRangeFunc get_genomic_range, GetStrandFunc get_strand, GetExonsFunc get_exons, ProcessSpliceFormFunc process_splice_form, void *userdata) { ConsensusSA csa; gt_assert(set_of_sas && number_of_sas && size_of_sa); gt_assert(get_genomic_range && get_strand && get_exons); gt_assert(set_of_sas_is_sorted(set_of_sas, number_of_sas, size_of_sa, get_genomic_range)); gt_log_log("-from %lu", get_genomic_range(set_of_sas).start); gt_log_log("csa number_of_sas=%lu", number_of_sas); /* init */ csa.set_of_sas = set_of_sas; csa.number_of_sas = number_of_sas; csa.size_of_sa = size_of_sa; csa.get_genomic_range = get_genomic_range; csa.get_strand = get_strand; csa.get_exons = get_exons; csa.process_splice_form = process_splice_form; csa.userdata = userdata; /* computation */ compute_csas(&csa); gt_log_log("csa finished"); }
/** * @param alphabet ownership of alphabet is with the newly produced * sequence object if return value is not 0 */ static int initBWTSeqFromEncSeqIdx(BWTSeq *bwtSeq, struct encIdxSeq *seqIdx, MRAEnc *alphabet, GtUword *counts, enum rangeSortMode *rangeSort, const enum rangeSortMode *defaultRangeSort) { size_t alphabetSize; Symbol bwtTerminatorFlat; EISHint hint; gt_assert(bwtSeq && seqIdx); bwtSeq->alphabet = alphabet; alphabetSize = gt_MRAEncGetSize(alphabet); if (!alphabetSize) /* weird error, shouldn't happen, but I prefer error return to * segfault in case someone tampered with the input */ return 0; /* FIXME: this should probably be handled in chardef.h to have a * unique mapping */ /* FIXME: this assumes there is exactly two ranges */ gt_MRAEncAddSymbolToRange(alphabet, bwtTerminatorSym, 1); gt_assert(gt_MRAEncGetSize(alphabet) == alphabetSize + 1); alphabetSize = gt_MRAEncGetSize(alphabet); bwtSeq->bwtTerminatorFallback = bwtTerminatorFlat = MRAEncMapSymbol(alphabet, UNDEFBWTCHAR); bwtSeq->bwtTerminatorFallbackRange = 1; bwtSeq->count = counts; bwtSeq->rangeSort = rangeSort; bwtSeq->seqIdx = seqIdx; bwtSeq->alphabetSize = alphabetSize; bwtSeq->hint = hint = newEISHint(seqIdx); { Symbol i; GtUword len = EISLength(seqIdx), *count = bwtSeq->count; count[0] = 0; for (i = 0; i < bwtTerminatorFlat; ++i) count[i + 1] = count[i] + EISSymTransformedRank(seqIdx, i, len, hint); /* handle character which the terminator has been mapped to specially */ count[i + 1] = count[i] + EISSymTransformedRank(seqIdx, i, len, hint) - 1; gt_assert(count[i + 1] >= count[i]); /* now we can finish the rest of the symbols */ for (i += 2; i < alphabetSize; ++i) count[i] = count[i - 1] + EISSymTransformedRank(seqIdx, i - 1, len, hint); /* and finally place the 1-count for the terminator */ count[i] = count[i - 1] + 1; #ifdef EIS_DEBUG gt_log_log("count[alphabetSize]="GT_WU", len="GT_WU"",count[alphabetSize], len); for (i = 0; i <= alphabetSize; ++i) gt_log_log("count[%u]="GT_WU"", (unsigned)i, count[i]); #endif gt_assert(count[alphabetSize] == len); } gt_BWTSeqInitLocateHandling(bwtSeq, defaultRangeSort); return 1; }
static void log_fragments(GtFragment *fragments, unsigned long num_of_fragments) { unsigned long i; gt_log_log("show chaining fragments"); for (i = 0; i < num_of_fragments; i++) { GtFragment *frag = fragments + i; gt_log_log("#%lu: s1=%lu, s1=%lu, l1=%lu, s2=%lu, e2=%lu, l2=%lu, w=%lu", i, frag->startpos1, frag->endpos1, frag->endpos1 - frag->startpos1 + 1, frag->startpos2, frag->endpos2, frag->endpos2 - frag->startpos2 + 1, frag->weight); } }
/*call makeblastdb with given path to <dbfile>*/ static inline int gt_condenser_search_create_blastdb(const char* dbfile, const char *dbtype, GtError *err) { int had_err = 0, pipe_status; GtStr *call = gt_str_new_cstr("makeblastdb -dbtype "); char *call_str; FILE* fpipe; gt_str_append_cstr(call, dbtype); gt_str_append_cstr(call, " -in "); gt_str_append_cstr(call, dbfile); call_str = gt_str_get(call); gt_log_log("executed call: %s", call_str); if ((fpipe = popen(call_str, "r")) == NULL) { gt_error_set(err, "Could not open pipe to call makeblastdb"); had_err = -1; } if (!had_err) { char *newline = NULL; char line[BUFSIZ + 1]; line[BUFSIZ] = '\0'; while (fgets(line, (int) BUFSIZ, fpipe) != NULL) { if ((newline = strrchr(line, '\n')) != NULL) { *newline = '\0'; newline = NULL; } gt_log_log("%.*s", BUFSIZ, line); } } gt_str_delete(call); if (!had_err) { pipe_status = pclose(fpipe); if (pipe_status != 0) { had_err = -1; if (errno == ECHILD) gt_error_set(err, "Error calling makeblastdb."); #ifndef _WIN32 else if (WEXITSTATUS(pipe_status) == 127) gt_error_set(err, "shell returned 127, makeblastdb not installed?"); else gt_error_set(err, "makeblastdb error, returned %d", WEXITSTATUS(pipe_status)); #endif } } return had_err; }
static bool gt_encseq2spm_kmersize(GtEncseq2spmArguments *arguments, unsigned int *kmersize, GtError *err) { bool haserr = false; gt_assert(kmersize != NULL); if (arguments->forcek > 0) { *kmersize = arguments->forcek; if (*kmersize > arguments->minmatchlength) { gt_error_set(err,"argument %u to option -forcek > l", *kmersize); haserr = true; } else if (*kmersize > (unsigned int)GT_UNITSIN2BITENC) { gt_error_set(err, "argument %u to option -forcek > %u (machine word size/2)", *kmersize, (unsigned int)GT_UNITSIN2BITENC); haserr = true; } } else { *kmersize = MIN((unsigned int) GT_UNITSIN2BITENC, arguments->minmatchlength); } gt_log_log("kmersize=%u", *kmersize); gt_assert(*kmersize > 0); return haserr; }
static int add_to_parent(GtDiagram *d, GtFeatureNode *node, GtFeatureNode *parent, GtError *err) { GtBlock *block = NULL; NodeInfoElement *par_ni, *ni; gt_assert(d && node); if (!parent) return 0; par_ni = nodeinfo_get(d, parent); ni = nodeinfo_get(d, node); gt_log_log("adding %s to parent %p", gt_feature_node_get_type(node), parent); ni->parent = parent; block = nodeinfo_find_block(par_ni, gt_feature_node_get_type(node), parent); if (!block) { block = gt_block_new_from_node(parent); gt_block_set_type(block, gt_feature_node_get_type(node)); if (assign_block_caption(d, node, parent, block, err) < 0) { gt_block_delete(block); return -1; } nodeinfo_add_block(par_ni, gt_feature_node_get_type((GtFeatureNode*) node), parent, block); } gt_assert(block); gt_block_insert_element(block, node); return 0; }
void gt_Sfxmappedrange_usetmp(GtSfxmappedrange *sfxmappedrange, const GtStr *tmpfilename, void **usedptrptr, GtUword numofentries, bool writable) { gt_assert(sfxmappedrange != NULL); sfxmappedrange->ptr = NULL; /*gt_assert(usedptrptr != NULL && *usedptrptr == NULL);*/ sfxmappedrange->usedptrptr = usedptrptr; sfxmappedrange->filename = gt_str_clone(tmpfilename); sfxmappedrange->writable = writable; if (sfxmappedrange->type == GtSfxGtBitsequence) { sfxmappedrange->numofunits = GT_NUMOFINTSFORBITS(numofentries); } else { sfxmappedrange->numofunits = (size_t) numofentries; } gt_log_log("use file %s for table %s ("GT_WU" units of "GT_WU" bytes)", gt_str_get(sfxmappedrange->filename), gt_str_get(sfxmappedrange->tablename), (GtUword) sfxmappedrange->numofunits, (GtUword) sfxmappedrange->sizeofunit); gt_free(*sfxmappedrange->usedptrptr); *sfxmappedrange->usedptrptr = NULL; }
void *gt_Sfxmappedrange_map_entire(GtSfxmappedrange *sfxmappedrange, GtError *err) { size_t mappedsize; gt_assert(sfxmappedrange != NULL); sfxmappedrange->entire = gt_fa_mmap_read(gt_str_get(sfxmappedrange->filename), &mappedsize,err); if (sfxmappedrange->entire == NULL) { return NULL; } if (mappedsize != gt_Sfxmappedrange_size_entire(sfxmappedrange)) { gt_error_set(err, "map file %s: mapped size = "GT_WU" != " GT_WU" = expected size", gt_str_get(sfxmappedrange->filename), (GtUword) mappedsize, (GtUword) gt_Sfxmappedrange_size_entire(sfxmappedrange)); gt_fa_xmunmap(sfxmappedrange->entire); sfxmappedrange->entire = NULL; return NULL; } gt_log_log("map %s completely ("GT_WU" units of size %u)", gt_str_get(sfxmappedrange->tablename), (GtUword) sfxmappedrange->numofunits, (unsigned int) sfxmappedrange->sizeofunit); return sfxmappedrange->entire; }
static int gt_xrf_abbr_parse_tree_blank_line(GtIO *xrf_abbr_file, GtError *err) { int had_err; gt_error_check(err); gt_log_log("blank"); had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err); while (!had_err) { char cc = gt_io_peek(xrf_abbr_file); if (cc == XRF_COMMENT_CHAR) return gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); else if (cc == GT_CARRIAGE_RETURN) { gt_io_next(xrf_abbr_file); if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE) gt_io_next(xrf_abbr_file); break; } else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) { gt_io_next(xrf_abbr_file); break; } else had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err); } return had_err; }
static int gt_xrf_abbr_parse_tree_comment_line(GtIO *xrf_abbr_file, GtError *err) { int had_err; gt_error_check(err); gt_log_log("comment"); had_err = gt_io_expect(xrf_abbr_file, XRF_COMMENT_CHAR, err); while (!had_err) { switch (gt_io_peek(xrf_abbr_file)) { case GT_CARRIAGE_RETURN: gt_io_next(xrf_abbr_file); if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE) gt_io_next(xrf_abbr_file); return had_err; case GT_END_OF_LINE: gt_io_next(xrf_abbr_file); /*@fallthrough@*/ case GT_END_OF_FILE: return had_err; default: gt_io_next(xrf_abbr_file); } } return had_err; }
extern void BWTSeqInitLocateHandling(BWTSeq *bwtSeq, const enum rangeSortMode *defaultRangeSort) { struct encIdxSeq *seqIdx; struct locateHeader locHeader; gt_assert(bwtSeq); seqIdx = bwtSeq->seqIdx; if (!readLocateInfoHeader(seqIdx, &locHeader) || !locHeader.locateInterval) { gt_log_log("Index does not contain locate information.\n" "Localization of matches will not be supported!"); bwtSeq->locateSampleInterval = 0; bwtSeq->featureToggles = BWTBaseFeatures; } else { bwtSeq->locateSampleInterval = locHeader.locateInterval; bwtSeq->rot0Pos = locHeader.rot0Pos; /* FIXME: this really deserves its own header */ bwtSeq->featureToggles = locHeader.featureToggles; if (readRankSortHeader(seqIdx, &bwtSeq->bitsPerOrigRank, bwtSeq->alphabet, bwtSeq->rangeSort)) ; else { AlphabetRangeID numRanges = MRAEncGetNumRanges(bwtSeq->alphabet); bwtSeq->bitsPerOrigRank = 0; memcpy(bwtSeq->rangeSort, defaultRangeSort, numRanges * sizeof (defaultRangeSort[0])); } } }
static void gt_firstcodes_updatemax(GtFirstcodesspacelog *fcsl) { if (fcsl->workspace + fcsl->splitspace > fcsl->spacepeak) { fcsl->spacepeak = fcsl->workspace + fcsl->splitspace; gt_log_log("update spacepeak to %.2f MB", GT_MEGABYTES(fcsl->spacepeak)); } }
int gt_cntlist_parse(const char *filename, bool alloc_cntlist, GtBitsequence **cntlist, GtUword *nofreads, GtError *err) { int c, retval = 0; FILE *infp; gt_log_log("parse contained reads list file: %s", filename); infp = gt_fa_fopen(filename, "rb", err); if (infp == NULL) return -1; c = gt_xfgetc(infp); switch (c) { case EOF: gt_error_set(err, "%s: unexpected end of file", filename); retval = 1; break; case GT_CNTLIST_BIN_HEADER: gt_log_log("contained reads list format: BIN"); retval = gt_cntlist_parse_bin(infp, alloc_cntlist, cntlist, nofreads, err); break; case GT_CNTLIST_BIT_HEADER: gt_log_log("contained reads list format: BIT"); retval = gt_cntlist_parse_bit(infp, alloc_cntlist, cntlist, nofreads, err); break; case GT_CNTLIST_ASCII_HEADER: gt_xungetc(c, infp); gt_log_log("contained reads list format: ASCII"); retval = gt_cntlist_parse_ascii(infp, alloc_cntlist, cntlist, nofreads, err); break; default: gt_error_set(err, "%s: unrecognized format", filename); retval = 1; break; } gt_fa_fclose(infp); return retval; }
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd, unsigned long amount, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; int had_err = 0; unsigned long rand, max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1, count; GtStr *timer_comment = gt_str_new_cstr("extracting "); GtStr *desc = gt_str_new(); gt_str_append_ulong(timer_comment, amount); gt_str_append_cstr(timer_comment, " reads of "); gt_str_append_ulong(timer_comment, max_rand + 1); gt_str_append_cstr(timer_comment, "!"); if (timer == NULL) { timer = gt_timer_new_with_progress_description("extract random reads"); gt_timer_start(timer); } else { gt_timer_show_progress(timer, "extract random reads", stdout); } gt_log_log("%s",gt_str_get(timer_comment)); for (count = 0; count < amount; count++) { if (!had_err) { rand = gt_rand_max(max_rand); gt_log_log("get read: %lu", rand); had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err); gt_log_log("%s",gt_str_get(desc)); gt_log_log("%s",seq); gt_log_log("%s",qual); } } gt_str_delete(timer_comment); gt_str_delete(desc); if (!gt_showtime_enabled()) gt_timer_delete(timer); return had_err; }
FILE *gt_blast_process_call_run(GtBlastProcessCall *call, GtError *err) { int had_err = 0; #ifndef _WIN32 int errcode; #endif FILE *blastout = NULL, *installcheck = NULL; gt_assert(call->query && call->db); installcheck = popen(call->version_call, "r"); if (installcheck == NULL) { gt_error_set(err, "Could not open pipe to run %s: %s", call->version_call, strerror(errno)); had_err = -1; } /* this assures that we get the output if debugging is set, and also we prevent BROKEN_PIPE error if pclose(3) is called before the version call exits */ if (!had_err) { char *newline = NULL; char line[BUFSIZ + 1]; line[BUFSIZ] = '\0'; while (fgets(line, (int) BUFSIZ, installcheck) != NULL) { if ((newline = strrchr(line, '\n')) != NULL) { *newline = '\0'; newline = NULL; } gt_log_log("%.*s", BUFSIZ, line); } } if (!had_err) { #ifndef _WIN32 errcode = pclose(installcheck); if ((call->all && WEXITSTATUS(errcode) != 1) || errcode != 0) { if (errno == ECHILD) gt_error_set(err, "Error calling %s.", call->version_call); else if (WEXITSTATUS(errcode) == 127) gt_error_set(err, "shell returned 127, BLAST not installed?"); else gt_error_set(err, "%s error, returned %d", call->version_call, WEXITSTATUS(errcode)); had_err = -1; } #endif } if (!had_err) { blastout = popen(gt_str_get(call->str), "r"); if (blastout == NULL) { gt_error_set(err, "Could not open pipe to run BLAST process: %s", strerror(errno)); } } return blastout; }
static void add_recursive(GtDiagram *d, GtFeatureNode *node, GtFeatureNode* parent, GtFeatureNode *original_node) { NodeInfoElement *ni; GtFeatureNode *rep = GT_UNDEF_REPR; gt_assert(d && node && original_node); if (!parent) return; ni = nodeinfo_get(d, node); if (gt_feature_node_is_multi(original_node)) { rep = gt_feature_node_get_multi_representative(original_node); } /* end of recursion, insert into target block */ if (parent == node) { GtBlock *block ; block = nodeinfo_find_block(ni, gt_feature_node_get_type(node), rep); if (!block) { block = gt_block_new_from_node(node); nodeinfo_add_block(ni, gt_feature_node_get_type(node), rep, block); } gt_block_insert_element(block, original_node); gt_log_log("add %s to target %s", gt_feature_node_get_type(original_node), gt_block_get_type(block)); } else { /* not at target type block yet, set up reverse entry and follow */ NodeInfoElement *parent_ni; /* set up reverse entry */ ni->parent = parent; parent_ni = gt_hashmap_get(d->nodeinfo, parent); if (parent_ni) { gt_log_log("recursion: %s -> %s", gt_feature_node_get_type(node), gt_feature_node_get_type(parent)); add_recursive(d, parent, parent_ni->parent, original_node); } } }
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name, GtTimer *timer, GtError *err) { int had_err = 0; GtStr *name1; gt_error_check(err); if (timer != NULL) gt_timer_show_progress(timer, "write encoding", stdout); if (hcr_enc->encdesc_encoder != NULL) { GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files, err); had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder, cstr_iterator, name, err); gt_cstr_iterator_delete(cstr_iterator); } if (!had_err) had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err); if (!had_err && gt_log_enabled()) { name1 = gt_str_new_cstr(name); gt_str_append_cstr(name1, HCRFILESUFFIX); gt_log_log("sequences with qualities encoding overview:"); gt_log_log("**>"); if (hcr_enc->page_sampling) gt_log_log("applied sampling technique: sampling every " GT_WU "th page", hcr_enc->sampling_rate); else if (hcr_enc->regular_sampling) gt_log_log("applied sampling technique: sampling every " GT_WU "th read", hcr_enc->sampling_rate); else gt_log_log("applied sampling technique: none"); gt_log_log("total number of encoded nucleotide sequences with qualities: " GT_WU, hcr_enc->num_of_reads); gt_log_log("total number of encoded nucleotides: " GT_LLU, hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("bits per nucleotide encoding: %f", (gt_file_estimate_size(gt_str_get(name1)) * 8.0) / hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("<**"); gt_str_delete(name1); } return had_err; }
static void gt_ltrdigest_pdom_visitor_chainproc(GtChain *c, GtFragment *f, GT_UNUSED unsigned long nof_frags, GT_UNUSED unsigned long gap_length, void *data) { unsigned long i, *chainno = (unsigned long*) data; gt_log_log("resulting chain has %ld GtFragments, score %ld", gt_chain_size(c), gt_chain_get_score(c)); for (i = 0; i < gt_chain_size(c); i++) { GtFragment frag; frag = f[gt_chain_get_fragnum(c, i)]; gt_log_log("(%lu %lu) (%lu %lu)", frag.startpos1, frag.endpos1, frag.startpos2, frag.endpos2); gt_array_add(((GtHMMERSingleHit*) frag.data)->chains, *chainno); } (*chainno)++; gt_log_log("\n"); }
static int add_to_current(GtDiagram *d, GtFeatureNode *node, GtFeatureNode *parent, GtError *err) { GtBlock *block; NodeInfoElement *ni; GtStyleQueryStatus rval; GtStr *caption = NULL; bool status = true; const char *nnid_p = NULL, *nnid_n = NULL, *nodetype; gt_assert(d && node); nodetype = gt_feature_node_get_type(node); if (get_caption_display_status(d, nodetype, &status, err) < 0) { return -1; } /* Get nodeinfo element and set itself as parent */ ni = nodeinfo_get(d, node); gt_log_log("adding %s to self", nodetype); ni->parent = node; /* create new GtBlock tuple and add to node info */ block = gt_block_new_from_node(node); caption = gt_str_new(); rval = gt_style_get_str(d->style, nodetype, "block_caption", caption, node, err); if (rval == GT_STYLE_QUERY_ERROR) { gt_str_delete(caption); gt_block_delete(block); return -1; } else if (rval == GT_STYLE_QUERY_NOT_SET) { nnid_p = get_node_name_or_id(parent); nnid_n = get_node_name_or_id(node); if ((nnid_p || nnid_n) && status) { if (parent) { if (nnid_p && gt_feature_node_has_children(parent)) gt_str_append_cstr(caption, nnid_p); else gt_str_append_cstr(caption, "-"); gt_str_append_cstr(caption, "/"); } if (nnid_n) gt_str_append_cstr(caption, nnid_n); } else { gt_str_delete(caption); caption = NULL; } } gt_block_set_caption(block, caption); gt_block_insert_element(block, node); nodeinfo_add_block(ni, gt_feature_node_get_type(node), GT_UNDEF_REPR, block); return 0; }
static void reset_data_iterator_to_pos(HcrHuffDataIterator *data_iter, size_t pos) { gt_assert(data_iter); gt_assert(pos < data_iter->end); gt_assert(data_iter->start <= pos); gt_fa_xmunmap(data_iter->data); gt_log_log("reset to pos: "GT_WU"", (GtUword) pos); data_iter->data = NULL; data_iter->pos = pos; }
static void gt_ltrdigest_pdom_visitor_chainproc(GtChain *c, GtFragment *f, GT_UNUSED GtUword nof_frags, GT_UNUSED GtUword gap_length, void *data) { GtUword i, *chainno = (GtUword*) data; gt_log_log("resulting chain has "GT_WD" GtFragments, score "GT_WD"", gt_chain_size(c), gt_chain_get_score(c)); for (i = 0; i < gt_chain_size(c); i++) { GtFragment frag; frag = f[gt_chain_get_fragnum(c, i)]; gt_log_log("("GT_WU" "GT_WU") ("GT_WU" "GT_WU")", frag.startpos1, frag.endpos1, frag.startpos2, frag.endpos2); gt_array_add(((GtHMMERSingleHit*) frag.data)->chains, *chainno); } (*chainno)++; gt_log_log("\n"); }
static void reset_data_iterator_to_pos(HcrHuffDataIterator *data_iter, size_t pos) { gt_assert(data_iter); gt_assert(pos < data_iter->end); gt_assert(data_iter->start <= pos); gt_fa_xmunmap(data_iter->data); gt_log_log("reset to pos: %lu", (unsigned long) pos); data_iter->data = NULL; data_iter->pos = pos; }
static inline void gt_readjoiner_assembly_show_current_space(const char *label) { GtUword m, f; if (gt_ma_bookkeeping_enabled()) { m = gt_ma_get_space_current(); f = gt_fa_get_space_current(); gt_log_log("used space %s: %.2f MB (ma: %.2f MB; fa: %.2f MB)", label == NULL ? "" : label, GT_MEGABYTES(m + f), GT_MEGABYTES(m), GT_MEGABYTES(f)); } }
static bool gt_xrf_abbr_parse_tree_ignored_line(GtIO *xrf_abbr_file, GtError *err) { gt_error_check(err); gt_log_log("ignored"); if (gt_io_peek(xrf_abbr_file) == XRF_BLANK_CHAR) return gt_xrf_abbr_parse_tree_blank_line(xrf_abbr_file, err); if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR) return gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); gt_io_next(xrf_abbr_file); return false; }
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val, void *data, GT_UNUSED GtError *err) { GtHMMERModelHit *mh = (GtHMMERModelHit*) val; GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data; const char *mdl = (const char*) key; GtArray *hits = NULL; GtUword nof_hits; GtFragment *frags; if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0) hits = mh->fwd_hits; else hits = mh->rev_hits; gt_assert(hits); nof_hits = gt_array_size(hits); if (nof_hits == 0) return 0; if (nof_hits > 1UL) { GtUword i, chainno; frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment)); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); gt_assert(h); frags[i].startpos1 = h->hmmfrom; frags[i].endpos1 = h->hmmto; frags[i].startpos2 = h->alifrom; frags[i].endpos2 = h->alito; frags[i].weight = (GtWord) (h->alito - h->alifrom + 1) * h->score; frags[i].data = h; } qsort(frags, (size_t) nof_hits, sizeof (GtFragment), gt_ltrdigest_pdom_visitor_fragcmp); gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits); gt_globalchaining_max(frags, nof_hits, (GtUword) lv->chain_max_gap_length, gt_ltrdigest_pdom_visitor_chainproc, &chainno); gt_free(frags); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } } else { GtUword chainno = 0UL; GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0); gt_array_add(h->chains, chainno); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } return 0; }
static void numeric_field_check_distri_dependence(DescField *field, bool *needs_delta_dist, bool *needs_value_dist) { field->use_hc = false; if (field->use_delta_coding) { if (!field->is_delta_cons && field->delta_values_size <= GT_ENCDESC_MAX_NUM_VAL_HUF) { *needs_delta_dist = true; field->use_hc = true; gt_log_log("delta_values_size: "GT_WU"", field->delta_values_size); } } else { if (!field->is_value_cons && field->num_values_size > 0 && field->num_values_size <= GT_ENCDESC_MAX_NUM_VAL_HUF) { *needs_value_dist = true; field->use_hc = true; gt_log_log("num_values_size: "GT_WU"", field->num_values_size); } } }
static void pbs_attach_results_to_gff3(GtPBSResults *results, GtLTRElement *element, GtStrand *canonical_strand, GtStr *tag) { GtRange pbs_range; GtGenomeNode *gf; unsigned long i = 0; char buffer[BUFSIZ]; GtPBSHit* hit = gt_pbs_results_get_ranked_hit(results, i++); if (*canonical_strand == GT_STRAND_UNKNOWN) *canonical_strand = gt_pbs_hit_get_strand(hit); else { /* do we have to satisfy a strand constraint? * then find best-scoring PBS on the given canonical strand */ while (gt_pbs_hit_get_strand(hit) != *canonical_strand && i < gt_pbs_results_get_number_of_hits(results)) { gt_log_log("dropping PBS because of nonconsistent strand: %s\n", gt_feature_node_get_attribute(element->mainnode, "ID")); hit = gt_pbs_results_get_ranked_hit(results, i++); } /* if there is none, do not report a PBS */ if (gt_pbs_hit_get_strand(hit) != *canonical_strand) return; } pbs_range = gt_pbs_hit_get_coords(hit); pbs_range.start++; pbs_range.end++; /* GFF3 is 1-based */ gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) element->mainnode), GT_PBS_TYPE, pbs_range.start, pbs_range.end, gt_pbs_hit_get_strand(hit)); gt_feature_node_set_source((GtFeatureNode*) gf, tag); gt_feature_node_set_score((GtFeatureNode*) gf, (float) gt_pbs_hit_get_score(hit)); if (gt_pbs_hit_get_trna(hit) != NULL) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "trna", gt_pbs_hit_get_trna(hit)); } gt_feature_node_set_strand(element->mainnode, gt_pbs_hit_get_strand(hit)); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_tstart(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "trnaoffset", buffer); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_offset(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "pbsoffset", buffer); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_edist(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "edist", buffer); gt_feature_node_add_child(element->mainnode, (GtFeatureNode*) gf); }
static int hmmsearch_call_coarse_search(GtCondenseq* ces, char *hmmsearch_path, char *table_filename, char *hmm_filename, GtLogger *logger, GtError *err) { int had_err = 0; char **hmmargs = NULL, *hmmenv[] = { NULL }; GtStr *coarse_fas = gt_condenseq_unique_fasta_file(ces); GtSafePipe *pipe = NULL; gt_assert(coarse_fas != NULL); /* Array has to end with NULL */ hmmargs = gt_calloc((size_t) 8, sizeof (*hmmargs)); hmmargs[0] = hmmsearch_path; hmmargs[1] = gt_cstr_dup("--noali"); hmmargs[2] = gt_cstr_dup("--notextw"); hmmargs[3] = gt_cstr_dup("--domtblout"); hmmargs[4] = table_filename; hmmargs[5] = hmm_filename; hmmargs[6] = gt_str_get(coarse_fas); gt_logger_log(logger, "calling: %s", hmmsearch_path); pipe = gt_safe_popen(hmmsearch_path, hmmargs, hmmenv, err); if (pipe == NULL) had_err = -1; gt_free(hmmargs[1]); gt_free(hmmargs[2]); gt_free(hmmargs[3]); gt_free(hmmargs); gt_str_delete(coarse_fas); /* pipe test for splint */ if (!had_err && pipe != NULL) { if (gt_log_enabled()) { GtStr *line = gt_str_new(); while (gt_str_read_next_line(line, pipe->read_fd) == 0) { gt_log_log("%s", gt_str_get(line)); gt_str_reset(line); } gt_str_delete(line); } (void) gt_safe_pclose(pipe); } return had_err; }
void gt_Sfxmappedrange_delete(GtSfxmappedrange *sfxmappedrange) { if (sfxmappedrange == NULL) { return; } gt_log_log("delete table %s",gt_str_get(sfxmappedrange->tablename)); gt_fa_xmunmap(sfxmappedrange->ptr); sfxmappedrange->ptr = NULL; gt_fa_xmunmap(sfxmappedrange->entire); sfxmappedrange->entire = NULL; if (sfxmappedrange->usedptrptr != NULL) { *sfxmappedrange->usedptrptr = NULL; } if (sfxmappedrange->filename != NULL) { gt_log_log("remove \"%s\"",gt_str_get(sfxmappedrange->filename)); gt_xunlink(gt_str_get(sfxmappedrange->filename)); } gt_str_delete(sfxmappedrange->tablename); gt_str_delete(sfxmappedrange->filename); gt_free(sfxmappedrange); }
static int gt_xrf_abbr_parse_tree_tag_line(GtIO *xrf_abbr_file, GtStr *tag, GtStr *value, GtError *err) { int had_err = 0; gt_error_check(err); gt_log_log("tag"); gt_assert(xrf_abbr_file && tag && value); do { had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, tag, false, err); } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, false)); if (!had_err) had_err = gt_io_expect(xrf_abbr_file, XRF_SEPARATOR_CHAR, err); while (!had_err && gt_io_peek(xrf_abbr_file) == XRF_BLANK_CHAR) gt_io_next(xrf_abbr_file); if (!had_err) { do { had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, value, true, err); } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, true)); } if (!had_err) { if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR) had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); else had_err = gt_io_expect(xrf_abbr_file, GT_END_OF_LINE, err); } if (!had_err && !gt_xrf_abbr_parse_tree_valid_label(gt_str_get(tag))) { gt_warning("file \"%s\": line "GT_WU": unknown label \"%s\"", gt_io_get_filename(xrf_abbr_file), gt_io_get_line_number(xrf_abbr_file), gt_str_get(tag)); } gt_log_log("parsed line %s/%s", gt_str_get(tag), gt_str_get(value)); return had_err; }