static int processleafedge_shulen(bool firstsucc, unsigned long fatherdepth, GtBUinfo_shulen *father, unsigned long leafnumber, GtBUstate_shulen *state, GT_UNUSED GtError *err) { unsigned long gnum; #ifdef SHUDEBUG printf("processleafedge %lu firstsucc=%s, " " depth(father)=%lu, path=", leafnumber, firstsucc ? "true" : "false", fatherdepth); if (fatherdepth > 0) { gt_encseq_showatstartposwithdepth(stdout, state->encseq, GT_READMODE_FORWARD, leafnumber, fatherdepth); } printf("\n"); #endif if (state->file_to_genome_map != NULL) { gnum = state->file_to_genome_map[gt_encseq_filenum(state->encseq, leafnumber)]; } else { gnum = gt_encseq_filenum(state->encseq,leafnumber); } if (firstsucc) { gt_assert(father != NULL); if (father->gnumdist == NULL) { father->gnumdist = gt_malloc(sizeof (*father->gnumdist) * state->numofdbfiles); } resetgnumdist_shulen(father,state->numofdbfiles); #ifdef SHUDEBUG shownode(__LINE__,state,"father",father); #endif } else { #ifdef SHUDEBUG shownode(__LINE__,state,"father",father); #endif shu_compute_leaf_edge_contrib(state,father->gnumdist,gnum,fatherdepth); } father->gnumdist[gnum]++; #ifdef SHUDEBUG printf("gnumdist[id=%lu,filenum=%lu]=%lu\n",father->id,gnum, father->gnumdist[gnum]); state->lastleafnumber = leafnumber; #endif return 0; }
static int encseq_lua_filenum(lua_State *L) { GtEncseq **encseq; GtUword pos; encseq = check_encseq(L, 1); pos = luaL_checknumber(L, 2); luaL_argcheck(L, pos < gt_encseq_total_length(*encseq), 2, "cannot exceed total length of encoded sequence"); lua_pushnumber(L, gt_encseq_filenum(*encseq, pos)); return 1; }
static int gt_encseq_col_do_grep_desc(GtEncseqCol *esc, GtUword *filenum, GtUword *seqnum, GtStr *seqid, GtError *err) { GtUword j; const GtSeqInfo *seq_info_ptr; GtSeqInfo seq_info; bool match = false; int had_err = 0; gt_error_check(err); gt_assert(esc && filenum && seqnum && seqid); /* create cache */ if (!esc->grep_cache) esc->grep_cache = gt_seq_info_cache_new(); /* try to read from cache */ seq_info_ptr = gt_seq_info_cache_get(esc->grep_cache, gt_str_get(seqid)); if (seq_info_ptr) { *filenum = seq_info_ptr->filenum; *seqnum = seq_info_ptr->seqnum; return 0; } for (j = 0; !had_err && j < gt_encseq_num_of_sequences(esc->encseq); j++) { const char *desc; char *buf; GtUword desc_len; desc = gt_encseq_description(esc->encseq, &desc_len, j); buf = gt_calloc(desc_len + 1, sizeof (char)); memcpy(buf, desc, desc_len * sizeof (char)); had_err = gt_grep(&match, gt_str_get(seqid), buf, err); gt_free(buf); if (!had_err && match) { *filenum = seq_info.filenum = gt_encseq_filenum(esc->encseq, gt_encseq_seqstartpos(esc->encseq, j)); *seqnum = seq_info.seqnum = j - gt_encseq_filenum_first_seqnum(esc->encseq, *filenum); gt_seq_info_cache_add(esc->grep_cache, gt_str_get(seqid), &seq_info); break; } } if (!had_err && !match) { gt_error_set(err, "no description matched sequence ID '%s'", gt_str_get(seqid)); had_err = -1; } return had_err; }
static int gt_gdiffcalc_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtGenomediffArguments *arguments = tool_arguments; int had_err = 0, i; GtUword lcounter = 0, zcounter = 0; double **shusums = NULL; GtEncseq *encseq = NULL; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("load encseq"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (timer != NULL) gt_timer_show_progress(timer, "load units", stdout); if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (timer != NULL) gt_timer_show_progress(timer, "read table", stdout); if (!had_err) { GtIO *table_file = NULL; GtTokenizer *tokenizer = NULL; GtStr *line = NULL; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); table_file = gt_io_new(gt_str_array_get(arguments->filenames, 0), "r"); tokenizer = gt_tokenizer_new(table_file); line = gt_tokenizer_get_token(tokenizer); while (line != NULL && !had_err) { char *cline = gt_str_get(line); char *elem = strtok(cline, ";"); zcounter = 0; while (elem != NULL && !had_err) { if (*elem != '#') { if (1 != sscanf(elem, "%lf", &shusums[lcounter][zcounter])) { had_err = 1; gt_error_set(err, "couldn't scan"); break; } gt_logger_log(logger,"wert: %lf", shusums[lcounter][zcounter]); zcounter++; } else { gt_logger_log(logger, "name: %s", elem++); } elem = strtok(NULL, ";"); } gt_tokenizer_next_token(tokenizer); gt_str_delete(line); line = gt_tokenizer_get_token(tokenizer); lcounter++; gt_logger_log(logger, "line "GT_WD"", lcounter); } } if (!had_err) { GtUword num_of_seq, file_idx, seq_idx, startpos; GT_UNUSED GtUword oldpos = 0; gt_assert(unit_info != NULL); gt_assert(lcounter == zcounter); gt_assert(lcounter == unit_info->num_of_genomes); num_of_seq = gt_encseq_num_of_sequences(unit_info->encseq); for (seq_idx = 0; seq_idx < num_of_seq; seq_idx++) { startpos = gt_encseq_seqstartpos(unit_info->encseq, seq_idx); file_idx = gt_encseq_filenum(unit_info->encseq, startpos); gt_log_log("seq: "GT_WU" starts at: "GT_WU"\n" "belonges to file: "GT_WU" which is part of genome: %s", seq_idx, startpos, file_idx, gt_str_array_get(unit_info->genome_names, unit_info->map_files[file_idx])); gt_assert(oldpos <= startpos); oldpos = startpos; } } if (!had_err && shusums != NULL) { had_err = gt_genomediff_calculate_div_from_avg(shusums, arguments, unit_info, logger, timer, err); gt_array2dim_delete(shusums); } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }