/* allocate and fill the following sparse matrix: 11111 012345678901234 +---------------+ 0|000000 | 1| 111111 | 2| 222222 | 3| 333333 | 4| 444444 | 5| 555555 | 6| 666666 | 7| 777777 | 8| 888888 | 9| 999999| +---------------+ */ int gt_array2dim_sparse_example(GT_UNUSED GtError *err) { int **a2dim, i, j; GtRowInfo ri[10]; gt_error_check(err); /* initialize row info */ for (i = 0; i < 10; i++) { ri[i].offset = i; ri[i].length = 6; } /* create sparse matrix */ gt_array2dim_sparse_calloc(a2dim, 10, 60, ri); /* fill matrix */ for (i = 0; i < 10; i++) { for (j = 0; j < 6; j++) a2dim[i][i+j] = i; } gt_assert(a2dim[0][0] == 0); gt_assert(a2dim[5][5] == 5); gt_assert(a2dim[9][14] == 9); /* free */ gt_array2dim_delete(a2dim); return 0; }
static void hcr_base_qual_distr_delete(GtBaseQualDistr *bqd) { if (!bqd) return; gt_array2dim_delete(bqd->distr); gt_free(bqd); }
void gt_score_matrix_delete(GtScoreMatrix *sm) { if (!sm) return; gt_alphabet_delete(sm->alphabet); gt_array2dim_delete(sm->scores); gt_free(sm); }
int gt_multiesa2shulengthdist_print(Sequentialsuffixarrayreader *ssar, const GtEncseq *encseq, GtError *err) { GtBUstate_shulen *state; bool haserr = false; state = gt_malloc(sizeof (*state)); state->numofdbfiles = gt_encseq_num_of_files(encseq); state->encseq = encseq; #ifdef GENOMEDIFF_PAPER_IMPL state->leafdist = gt_malloc(sizeof (*state->leafdist) * state->numofdbfiles); #endif #ifdef SHUDEBUG state->nextid = 0; #endif state->shulengthdist = shulengthdist_new(state->numofdbfiles); if (gt_esa_bottomup_shulen(ssar, state, err) != 0) { haserr = true; } if (!haserr) { shulengthdist_print(NULL,(const uint64_t * const*) state->shulengthdist, state->numofdbfiles); } gt_array2dim_delete(state->shulengthdist); #ifdef GENOMEDIFF_PAPER_IMPL gt_free(state->leafdist); #endif gt_free(state); return haserr ? -1 : 0; }
void gth_dp_scores_protein_delete(GthDPScoresProtein *dp_scores_protein) { if (!dp_scores_protein) return; gt_array3dim_delete(dp_scores_protein->codon2amino); gt_array2dim_delete(dp_scores_protein->score); gt_free(dp_scores_protein); }
void gt_bucketspec2_delete(GtBucketspec2 *bucketspec2) { gt_assert(bucketspec2 != NULL); gt_array2dim_delete(bucketspec2->subbuckettab); gt_free(bucketspec2->superbuckettab); gt_free(bucketspec2->order); gt_free(bucketspec2); }
/* create an local alignment in square space, to use it in linear context you * have to generate an spacemanager before, in any other case it can be NULL */ GtWord alignment_in_square_space_local_generic(GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, const GtScoreHandler *scorehandler) { GtWord score = 0, **Ltabcolumn; GtMaxcoordvalue *max; gt_assert(align != NULL); if (spacemanager == NULL) { /*use it in normally case*/ gt_array2dim_malloc(Ltabcolumn, (ulen+1), (vlen+1)); max = gt_max_new(); } else { /*use it in lineraspace context*/ Ltabcolumn = (GtWord **) gt_linspaceManagement_change_to_square(spacemanager,ulen,vlen); max = gt_linspaceManagement_get_maxspace(spacemanager); } score = fillDPtab_in_square_space_local(Ltabcolumn, max, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); /* reconstruct local alignment from 2dimarray Ltabcolumn */ reconstructalignment_from_Ltab(align, Ltabcolumn, max, useq, ustart, ulen, vseq,vstart,vlen,scorehandler); if (gt_max_get_length_safe(max)) { ustart = ustart+(gt_max_get_start(max)).a; vstart = vstart+(gt_max_get_start(max)).b; ulen = gt_max_get_row_length(max); vlen = gt_max_get_col_length(max); gt_alignment_set_seqs(align, &useq[ustart], ulen, &vseq[vstart], vlen); } if (spacemanager == NULL) { gt_array2dim_delete(Ltabcolumn); gt_max_delete(max); } return score; }
GtAlignment* gt_affinealign(const char *u, unsigned long ulen, const char *v, unsigned long vlen, int replacement_cost, int gap_opening_cost, int gap_extension_cost) { AffinealignDPentry **dptable; GtAlignment *a; gt_assert(u && ulen && v && vlen); gt_array2dim_malloc(dptable, ulen+1, vlen+1); affinealign_fill_table(dptable, u, ulen, v, vlen, replacement_cost, gap_opening_cost, gap_extension_cost); a = gt_alignment_new_with_seqs((const GtUchar *) u, ulen, (const GtUchar *) v, vlen); affinealign_traceback(a, dptable, ulen, vlen); gt_array2dim_delete(dptable); return a; }
GtAlignment* gt_affinealign(const char *u, GtUword ulen, const char *v, GtUword vlen, int matchcost, int mismatchcost, int gap_opening_cost, int gap_extension_cost) { AffinealignDPentry **dptable; GtAlignment *a; gt_assert(u && v); /*gt_assert(ulen && vlen);*/ gt_array2dim_malloc(dptable, ulen+1, vlen+1); affinealign_fill_table(dptable, u, ulen, v, vlen, matchcost, mismatchcost, gap_opening_cost, gap_extension_cost); a = gt_alignment_new_with_seqs((const GtUchar *) u, ulen, (const GtUchar *) v, vlen); affinealign_traceback(a, dptable, ulen, vlen); gt_array2dim_delete(dptable); return a; }
GtUword distance_only_global_alignment(const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, const GtScoreHandler *scorehandler) { GtUword **E, distance; gt_assert(scorehandler); gt_array2dim_malloc(E, (ulen+1), (vlen+1)); fillDPtab_in_square_space(E, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); distance = E[ulen][vlen]; gt_array2dim_delete(E); return distance; }
/* example usage of the array2dim macros */ int gt_array2dim_example(GT_UNUSED GtError *err) { double **a2dim; int i, j; gt_error_check(err); /* create a 10 x 20 double array */ gt_array2dim_malloc(a2dim, 10, 20); /* ... (use array a2dim in conventional way via a2dim[row][column]) */ for (i = 1; i < 10; i++) { for (j = 1; j < 20; j++) a2dim[i][j] = i + j; } /* free */ gt_array2dim_delete(a2dim); return 0; }
static void hcr_base_qual_distr_trim(GtBaseQualDistr *bqd) { if (bqd->min_qual != 0) { GtUint64 **distr_trimmed; unsigned nrows_new, i, j; nrows_new = bqd->max_qual - bqd->min_qual + 1; gt_array2dim_calloc(distr_trimmed, (GtUword) nrows_new, bqd->ncols); for (i = 0; i < nrows_new; i++) for (j = 0; j < bqd->ncols; j++) distr_trimmed[i][j] = bqd->distr[i + bqd->min_qual][j]; gt_array2dim_delete(bqd->distr); bqd->distr = distr_trimmed; bqd->nrows = nrows_new; bqd->qual_offset = bqd->min_qual; } }
void gt_sfx_multiesashulengthdist_delete(GtBUstate_shulen *bustate, GenomediffInfo *gd_info) { if (bustate == NULL) { return; } gt_assert(bustate->shulengthdist != NULL); if (gd_info == NULL) { shulengthdist_print(bustate->unit_info->file_names, (const uint64_t * const*) bustate->shulengthdist, bustate->numofdbfiles); gt_array2dim_delete(bustate->shulengthdist); gt_shu_unit_info_delete(bustate->unit_info); } gt_GtArrayGtBUItvinfo_delete_shulen(bustate->stack,bustate); #ifdef GENOMEDIFF_PAPER_IMPL gt_free(bustate->leafdist); #endif gt_free(bustate); }
/* create an global alignment in square space, to use it in linear context you * have to generate an spacemanager before, in any other case it can be NULL */ GtUword alignment_in_square_space_generic (GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, const GtScoreHandler *scorehandler) { GtUword **E, distance; gt_assert(align && scorehandler); if (spacemanager == NULL) { /*use it in normally case*/ gt_array2dim_malloc(E, (ulen+1), (vlen+1)); } else { /*use it in lineraspace context*/ E = gt_linspaceManagement_change_to_square(spacemanager,ulen,vlen); } fillDPtab_in_square_space(E, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); distance = E[ulen][vlen]; /* reconstruct alignment from 2dimarray E */ reconstructalignment_from_EDtab(align, E, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); if (spacemanager == NULL) { gt_array2dim_delete(E); } return distance; }
int mg_computepath(CombinedScoreMatrixEntry **combinedscore_matrix, HitInformation *hit_information, unsigned long rows, unsigned long contig_len, ParseStruct *parsestruct_ptr, GtError * err) { int had_err = 0; /* Initialisieren der Matrix fuer die Pfadberechnung */ PathMatrixEntry **path_matrix; /* i: Zaehlvariable fuer die Matrix-Zeilen; k: Zaehlvariable Precursors (von 0 bis max 2) maxpath_frame: Speichern des vorherigen Frames von dem der max-Wert berechnet wird */ unsigned short row_index = 0, precursor_index = 0, precursors_row = 0, maxpath_frame = 0; /* Position in der Query-DNA */ unsigned long column_index = 0; /* Variablen fuer den aktuellen Frame, den vorherigen Frame(speichert einen Wert aus precursors[], die Zeile des vorherigen Frames, GtArray mit den Precursors-Frames */ short current_frame = 0, precursors_frame = 0, precursors[NUM_PRECURSORS]; /* q ist der Wert, der bei Aus- oder Eintreten in ein Gen auf dem Forward- bzw. Reverse-Strang berechnet wird */ double q = ARGUMENTSSTRUCT(leavegene_value), max_new = 1, max_old = 1; /* Speicherreservierung fuer die Path-Matrix - Groesse entsprechend der CombinedScore-Matrix */ gt_array2dim_calloc(path_matrix, 7, contig_len); gt_error_check(err); /* fuer die erste Spalte der Path-Matrix wird die erste Spalte der CombinedScore-Matrix uebernommen */ for (row_index = 0; row_index < rows; row_index++) { path_matrix[row_index][0].score = combinedscore_matrix[row_index][0].matrix_score; path_matrix[row_index][0].path_frame = row_index; } /* Spaltenweise Berechnung des opt. Pfades */ for (column_index = 1; column_index < contig_len; column_index++) { for (row_index = 0; row_index < rows; row_index++) { /* Zaehlvariable fuer die Zeile wird umgerechnet in den entsprechenden Leserahmen */ current_frame = get_current_frame(row_index); /* Aufruf der Methode zum Berechnen der moeglichen Leserahmen anhand von aktuellem Leserahmen und der Query-DNA-Sequenz */ compute_precursors(current_frame, column_index, precursors); /* der max-Wert der moeglichen Vorgaenger wird berechnet */ for (precursor_index = 0; precursor_index < NUM_PRECURSORS && (precursors[precursor_index] != UNDEFINED); ++precursor_index) { /* aktueller Vorgaengerleserahmen - es gibt max. 3 moegliche Vorgaenger */ precursors_frame = precursors[precursor_index]; /* Vorgaengerleserahmen wird umgerechnet in die entsprechende Matrix-Zeile */ precursors_row = get_matrix_row(precursors_frame); /* der DP-Algo umfasst 3 moegliche Faelle 1. Fall: Wechsel vom Reversen- auf den Forward-Strang bzw. umgekehrt */ if ((current_frame < 0 && precursors_frame > 0) || (current_frame > 0 && precursors_frame < 0)) { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index].matrix_score + 2*q; } /* 2. Fall: Einfacher Wechsel des Leserahmens, also von + zu + bzw.- zu - */ else if (current_frame != 0 && precursors_frame != current_frame) { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index].matrix_score + q; } /* 3. Fall: Leserahmen wird beibehalten bzw. Wechsel von kodierend zu nicht-kodierend oder umgekehrt */ else { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index] .matrix_score; } /* Bestimmen des Max-Wertes der max. 3 Moeglichkeiten und Speichern der Zeile, von der der Max-Wert stammt */ if (gt_double_compare(max_new, max_old) > 0) { max_old = max_new; maxpath_frame = precursors_row; } } /* Speichern des Max-Wertes und der "Vorgaenger"-Zeile; zuruecksetzen der Variablen */ path_matrix[row_index][column_index].score = max_old; path_matrix[row_index][column_index].path_frame = maxpath_frame; max_new = DBL_MIN; max_old = DBL_MIN; maxpath_frame = 0; } } /* Aufruf der Methode zur Genvorhersage */ had_err = mg_compute_gene_prediction(combinedscore_matrix, path_matrix, contig_len, hit_information, parsestruct_ptr, err); gt_array2dim_delete(path_matrix); return had_err; }
static int gt_gdiffcalc_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtGenomediffArguments *arguments = tool_arguments; int had_err = 0, i; GtUword lcounter = 0, zcounter = 0; double **shusums = NULL; GtEncseq *encseq = NULL; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("load encseq"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (timer != NULL) gt_timer_show_progress(timer, "load units", stdout); if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (timer != NULL) gt_timer_show_progress(timer, "read table", stdout); if (!had_err) { GtIO *table_file = NULL; GtTokenizer *tokenizer = NULL; GtStr *line = NULL; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); table_file = gt_io_new(gt_str_array_get(arguments->filenames, 0), "r"); tokenizer = gt_tokenizer_new(table_file); line = gt_tokenizer_get_token(tokenizer); while (line != NULL && !had_err) { char *cline = gt_str_get(line); char *elem = strtok(cline, ";"); zcounter = 0; while (elem != NULL && !had_err) { if (*elem != '#') { if (1 != sscanf(elem, "%lf", &shusums[lcounter][zcounter])) { had_err = 1; gt_error_set(err, "couldn't scan"); break; } gt_logger_log(logger,"wert: %lf", shusums[lcounter][zcounter]); zcounter++; } else { gt_logger_log(logger, "name: %s", elem++); } elem = strtok(NULL, ";"); } gt_tokenizer_next_token(tokenizer); gt_str_delete(line); line = gt_tokenizer_get_token(tokenizer); lcounter++; gt_logger_log(logger, "line "GT_WD"", lcounter); } } if (!had_err) { GtUword num_of_seq, file_idx, seq_idx, startpos; GT_UNUSED GtUword oldpos = 0; gt_assert(unit_info != NULL); gt_assert(lcounter == zcounter); gt_assert(lcounter == unit_info->num_of_genomes); num_of_seq = gt_encseq_num_of_sequences(unit_info->encseq); for (seq_idx = 0; seq_idx < num_of_seq; seq_idx++) { startpos = gt_encseq_seqstartpos(unit_info->encseq, seq_idx); file_idx = gt_encseq_filenum(unit_info->encseq, startpos); gt_log_log("seq: "GT_WU" starts at: "GT_WU"\n" "belonges to file: "GT_WU" which is part of genome: %s", seq_idx, startpos, file_idx, gt_str_array_get(unit_info->genome_names, unit_info->map_files[file_idx])); gt_assert(oldpos <= startpos); oldpos = startpos; } } if (!had_err && shusums != NULL) { had_err = gt_genomediff_calculate_div_from_avg(shusums, arguments, unit_info, logger, timer, err); gt_array2dim_delete(shusums); } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }
static int gt_genomediff_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { bool mirrored = false; int had_err = 0, i; GtEncseq *encseq = NULL; GtGenomediffArguments *arguments = tool_arguments; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (timer != NULL) gt_timer_show_progress(timer, "start shu search", stdout); if (gt_str_array_size(arguments->filenames) > 1UL) { GtEncseqEncoder *ee = gt_encseq_encoder_new(); gt_encseq_encoder_set_timer(ee, timer); gt_encseq_encoder_set_logger(ee, logger); /* kr only makes sense for dna, so we can check this already with ee */ gt_encseq_encoder_set_input_dna(ee); had_err = gt_encseq_encoder_encode(ee, arguments->filenames, gt_str_get(arguments->indexname), err); gt_encseq_encoder_delete(ee); } else { gt_str_append_str(arguments->indexname, gt_str_array_get_str(arguments->filenames, 0)); if (arguments->with_esa || arguments->with_pck) { GtStr *current_line = gt_str_new(); FILE *prj_fp; const char *buffer; char **elements = NULL; prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname), GT_PROJECTFILESUFFIX,"rb",err); if (prj_fp == NULL) had_err = -1; while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) { buffer = gt_str_get(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); elements = gt_cstr_split(buffer, '='); gt_log_log("%s", elements[0]); if (strcmp("mirrored", elements[0]) == 0) { gt_log_log("%s", elements[1]); if (strcmp("1", elements[1]) == 0) { mirrored = true; gt_log_log("sequences are treated as mirrored"); } } gt_str_reset(current_line); } gt_str_delete(current_line); if (elements != NULL) { gt_free(elements[0]); gt_free(elements[1]); } gt_free(elements); gt_fa_xfclose(prj_fp); } } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); if (mirrored) gt_encseq_loader_mirror(el); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (!had_err) { uint64_t **shusums = NULL; if (arguments->with_esa || arguments->with_pck) { shusums = gt_genomediff_shulen_sum(arguments, unit_info, logger, timer, err); if (shusums == NULL) had_err = -1; } else { const bool doesa = true; GenomediffInfo gd_info; Suffixeratoroptions sopts; sopts.beverbose = arguments->verbose; sopts.indexname = arguments->indexname; sopts.db = NULL; sopts.encopts = NULL; sopts.genomediff = true; sopts.inputindex = arguments->indexname; sopts.loadopts = arguments->loadopts; sopts.showprogress = false; sopts.idxopts = arguments->idxopts; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); gd_info.shulensums = shusums; gd_info.unit_info = unit_info; had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err); } if (!had_err && shusums != NULL) { had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info, arguments->with_pck, logger, timer, err); gt_array2dim_delete(shusums); } } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }