static void* gt_sequniq_arguments_new(void) { GtSequniqArguments *arguments = gt_calloc((size_t)1, sizeof *arguments); arguments->ofi = gt_output_file_info_new(); return arguments; }
static void* gt_seqorder_arguments_new(void) { GtSeqorderArguments *arguments = gt_calloc((size_t)1, sizeof *arguments); return arguments; }
GtFeatureNodeObserver* gt_feature_node_observer_new() { GtFeatureNodeObserver* fno = gt_calloc(1, sizeof (GtFeatureNodeObserver)); return fno; }
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav, GtError *err) { GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *last_mRNA = NULL; GtStr *mrnaseq, *seqid; int had_err = 0; mrnaseq = gt_str_new(); seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene); fni = gt_feature_node_iterator_new(sav->gene); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtFeatureNode *curnode2; if (last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); last_mRNA = curnode; gt_str_reset(mrnaseq); } } else last_mRNA = curnode; if (!had_err) { mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { char *tmp; GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid, rng.start, rng.end, err); if (!had_err) { gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng)); gt_free(tmp); } } } gt_feature_node_iterator_delete(mrnafni); } } } if (!had_err && last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); } } gt_feature_node_iterator_delete(fni); gt_str_delete(mrnaseq); return had_err; }
static void* gt_seqfilter_arguments_new(void) { SeqFilterArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->ofi = gt_output_file_info_new(); return arguments; }
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRdigestFileOutStream *ls; GtFeatureNode *fn; GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}; int had_err; GtUword i=0; gt_error_check(err); ls = gt_ltrdigest_file_out_stream_cast(ns); /* initialize this element */ memset(&ls->element, 0, sizeof (GtLTRElement)); /* get annotations from parser */ had_err = gt_node_stream_next(ls->in_stream, gn, err); if (!had_err && *gn) { GtFeatureNodeIterator* gni; GtFeatureNode *mygn; /* only process feature nodes */ if (!(fn = gt_feature_node_try_cast(*gn))) return 0; ls->element.pdomorder = gt_array_new(sizeof (const char*)); /* fill LTRElement structure from GFF3 subgraph */ gni = gt_feature_node_iterator_new(fn); for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni)) (void) gt_genome_node_accept((GtGenomeNode*) mygn, (GtNodeVisitor*) ls->lv, err); gt_feature_node_iterator_delete(gni); } if (!had_err && ls->element.mainnode != NULL) { char desc[GT_MAXFASTAHEADER]; GtFeatureNode *ltr3, *ltr5; GtStr *sdesc, *sreg, *seq; /* find sequence in GtEncseq */ sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode); sdesc = gt_str_new(); had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err); if (!had_err) { GtRange rng; ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char)); (void) snprintf(ls->element.seqid, MIN((size_t) gt_str_length(sdesc), (size_t) ls->seqnamelen)+1, "%s", gt_str_get(sdesc)); gt_cstr_rep(ls->element.seqid, ' ', '_'); if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen) ls->element.seqid[ls->seqnamelen] = '\0'; (void) gt_ltrelement_format_description(&ls->element, ls->seqnamelen, desc, (size_t) (GT_MAXFASTAHEADER-1)); gt_str_delete(sdesc); /* output basic retrotransposon data */ lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR); rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR); rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode); gt_file_xprintf(ls->tabout_file, GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t" GT_WU"\t"GT_WU"\t"GT_WU"\t", rng.start, rng.end, gt_ltrelement_length(&ls->element), ls->element.seqid, lltr_rng.start, lltr_rng.end, gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start, rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element)); } seq = gt_str_new(); /* output TSDs */ if (!had_err && ls->element.leftTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.leftTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); if (!had_err && ls->element.rightTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.rightTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); /* output PPT */ if (!had_err && ls->element.ppt != NULL) { GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt); ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.ppt, gt_symbol(gt_ft_RR_tract), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng), GT_FSWIDTH, ls->pptout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t", ppt_rng.start, ppt_rng.end, gt_str_get(seq), GT_STRAND_CHARS[ppt_strand], (ppt_strand == GT_STRAND_FORWARD ? abs((int) (rltr_rng.start - ppt_rng.end)) : abs((int) (lltr_rng.end - ppt_rng.start)))); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t"); /* output PBS */ if (!had_err && ls->element.pbs != NULL) { GtStrand pbs_strand; pbs_strand = gt_feature_node_get_strand(ls->element.pbs); pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.pbs, gt_symbol(gt_ft_primer_binding_site), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng), GT_FSWIDTH, ls->pbsout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t", pbs_rng.start, pbs_rng.end, GT_STRAND_CHARS[pbs_strand], gt_feature_node_get_attribute(ls->element.pbs, "trna"), gt_str_get(seq), gt_feature_node_get_attribute(ls->element.pbs, "pbsoffset"), gt_feature_node_get_attribute(ls->element.pbs, "trnaoffset"), gt_feature_node_get_attribute(ls->element.pbs, "edist")); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t"); /* output protein domains */ if (!had_err && ls->element.pdoms != NULL) { GtStr *pdomorderstr = gt_str_new(); for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* key = *(const char**) gt_array_get(ls->element.pdomorder, i); GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key); had_err = write_pdom(ls, entry, key, ls->rmap, desc, err); } if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode)) gt_array_reverse(ls->element.pdomorder); for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* name = *(const char**) gt_array_get(ls->element.pdomorder, i); gt_str_append_cstr(pdomorderstr, name); if (i != gt_array_size(ls->element.pdomorder)-1) gt_str_append_cstr(pdomorderstr, "/"); } gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr)); gt_str_delete(pdomorderstr); } /* output LTRs (we just expect them to exist) */ switch (gt_feature_node_get_strand(ls->element.mainnode)) { case GT_STRAND_REVERSE: ltr5 = ls->element.rightLTR; ltr3 = ls->element.leftLTR; break; case GT_STRAND_FORWARD: default: ltr5 = ls->element.leftLTR; ltr3 = ls->element.rightLTR; break; } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr5out_file); gt_str_reset(seq); } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr3out_file); gt_str_reset(seq); } /* output complete oriented element */ if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.mainnode, gt_symbol(gt_ft_LTR_retrotransposon), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->elemout_file); gt_str_reset(seq); } gt_file_xprintf(ls->tabout_file, "\n"); gt_str_delete(seq); } gt_hashmap_delete(ls->element.pdoms); gt_array_delete(ls->element.pdomorder); gt_free(ls->element.seqid); return had_err; }
static void* gt_seqmutate_arguments_new(void) { MutateArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->ofi = gt_outputfileinfo_new(); return arguments; }
static void* gt_mergefeat_arguments_new(void) { InterFeatArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->ofi = gt_output_file_info_new(); return arguments; }
static void* gt_script_filter_arguments_new(void) { GtScriptFilterArguments *arguments = gt_calloc(1, sizeof *arguments); return arguments; }
static void* gt_seqtranslate_arguments_new(void) { GtTranslateArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->ofi = gt_output_file_info_new(); return arguments; }
GtError* gt_error_new(void) { return gt_calloc(1, sizeof (GtError)); }
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0; GtUword i, nof_targets = 0, nof_hits = 0; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal", buf, (size_t) 8)) { GtUword no, hmmfrom, hmmto, alifrom, alito; double score, evalue; char threshold_ok = '-'; if ((buf[0] == '>' && buf[1] == '>')) { char *b = buf; b = strtok(buf+3, " "); gt_str_reset(status->cur_model); gt_str_append_cstr(status->cur_model, b); had_err = pdom_parser_get_next_line(buf, instream, err); if (!had_err && strncmp(" [No individual", buf, (size_t) 17)) { for (i = 0UL; i < 2UL && !had_err; i++) had_err = pdom_parser_get_next_line(buf, instream, err); } nof_targets++; nof_hits = 0UL; gt_hmmer_parse_status_mark_frame_finished(status); } while (!had_err && 8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s " GT_WU" "GT_WU"", &no, &threshold_ok, &score, &evalue, &hmmfrom, &hmmto, &alifrom, &alito)) { GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit)); shit->hmmfrom = hmmfrom; shit->hmmto = hmmto; shit->alifrom = alifrom; shit->alito = alito; shit->score = score; shit->evalue = evalue; shit->strand = status->strand; shit->frame = (GtUword) status->frame; shit->reported = (threshold_ok == '!'); shit->chains = gt_array_new(sizeof (GtUword)); gt_hmmer_parse_status_add_hit(status, shit); nof_hits++; had_err = pdom_parser_get_next_line(buf, instream, err); } if (!had_err) { if (nof_hits > 0) had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf, instream, err); else had_err = pdom_parser_get_next_line(buf, instream, err); } } return had_err; }
GtDiscDistri* gt_disc_distri_new(void) { return gt_calloc(1, sizeof (GtDiscDistri)); }
GtDlist* gt_dlist_new(GtCompare cmp_func) { GtDlist *dlist = gt_calloc(1, sizeof (GtDlist)); dlist->cmp_func = cmp_func; return dlist; }
GtPBSResults* gt_pbs_find(const char *seq, const char *rev_seq, GtLTRElement *element, GtPBSOptions *o, GtError *err) { GtSeq *seq_forward, *seq_rev; GtPBSResults *results; unsigned long j; GtAlignment *ali; GtAlphabet *a = gt_alphabet_new_dna(); GtScoreFunction *sf = gt_dna_scorefunc_new(a, o->ali_score_match, o->ali_score_mismatch, o->ali_score_insertion, o->ali_score_deletion); gt_assert(seq && rev_seq && sf && a && element); results = gt_pbs_results_new(element, o); seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element)) - (o->radius), 2*o->radius + 1, a); seq_rev = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element)) - (o->radius), 2*o->radius + 1, a); for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++) { GtSeq *trna_seq, *trna_from3; char *trna_from3_full; unsigned long trna_seqlen; trna_seq = gt_bioseq_get_seq(o->trna_lib, j); trna_seqlen = gt_seq_length(trna_seq); trna_from3_full = gt_calloc(trna_seqlen, sizeof (char)); memcpy(trna_from3_full, gt_seq_get_orig(trna_seq), sizeof (char)*trna_seqlen); (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err); trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a); ali = gt_swalign(seq_forward, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_FORWARD, results); gt_alignment_delete(ali); ali = gt_swalign(seq_rev, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_REVERSE, results); gt_alignment_delete(ali); gt_seq_delete(trna_from3); } gt_seq_delete(seq_forward); gt_seq_delete(seq_rev); gt_score_function_delete(sf); gt_alphabet_delete(a); gt_array_sort(results->hits, gt_pbs_hit_compare); return results; }
GthBSSMParam* gth_bssm_param_new(void) { return gt_calloc(1, sizeof (GthBSSMParam)); }
static void* gt_encseq_check_arguments_new(void) { GtEncseqCheckArguments *arguments = gt_calloc(1, sizeof *arguments); return arguments; }
static void condenseq_process_descriptions(GtCondenseq *condenseq, const GtEncseq *orig_es, GtLogger *logger) { GtUword *dist; const char *desc; char *cur_id_startptr; GtUword desclen, dist_idx, distsize = (GtUword) 128, idlen, idx, maxendidx = 0, maxlen = 0, minlen = GT_UWORD_MAX, wastedmem = 0, sdssize, cur_total_id_len = 0; bool use_const_len; condenseq->ids_total_len = 0; dist = gt_calloc((size_t) distsize, sizeof (*dist)); for (idx = 0; idx < condenseq->orig_num_seq; ++idx) { desc = gt_encseq_description(orig_es, &desclen, idx); idlen = condenseq_idlen(desc, desclen); if (distsize <= idlen) { dist = gt_realloc(dist, (size_t) (idlen + 1) * sizeof (*dist)); for (dist_idx = distsize; dist_idx <= idlen; dist_idx++) dist[dist_idx] = 0; distsize = idlen + 1; } dist[idlen]++; if (idlen > maxlen) maxlen = idlen; if (idlen < minlen) minlen = idlen; maxendidx += idlen; } /* calculate memory we would waste if we assume equal length, and size if we store actual descriptions */ for (dist_idx = minlen; dist_idx < maxlen; dist_idx++) { wastedmem += dist[dist_idx] * (maxlen - dist_idx); condenseq->ids_total_len += dist[dist_idx] * dist_idx; } condenseq->ids_total_len += dist_idx * dist[dist_idx]; sdssize = (GtUword) gt_intset_best_memory_size(maxendidx, condenseq->orig_num_seq); use_const_len = wastedmem < sdssize; if (use_const_len) { gt_logger_log(logger, "Condenseq descriptions will use const len, " GT_WU ", \"wasting\" " GT_WU " bytes. SDS would use " GT_WU " bytes", maxlen, wastedmem, sdssize); condenseq->id_len = maxlen; condenseq->ids_total_len = maxlen * condenseq->orig_num_seq; } else { gt_logger_log(logger, "Condenseq descriptions will use sdstab with size " GT_WU ". Const length would have wasted " GT_WU " bytes.", sdssize, wastedmem); condenseq->sdstab = gt_intset_best_new(maxendidx, condenseq->orig_num_seq); } condenseq->orig_ids = gt_calloc((size_t) condenseq->ids_total_len, sizeof (*condenseq->orig_ids)); cur_id_startptr = condenseq->orig_ids; for (idx = 0; idx < condenseq->orig_num_seq; ++idx) { desc = gt_encseq_description(orig_es, &desclen, idx); idlen = condenseq_idlen(desc, desclen); gt_assert(idlen <= maxlen); (void) memcpy(cur_id_startptr, desc, (size_t) idlen); if (use_const_len) { cur_id_startptr += maxlen; cur_total_id_len += maxlen; } else { cur_id_startptr += idlen; cur_total_id_len += idlen; gt_intset_add(condenseq->sdstab, cur_total_id_len); } } gt_assert(cur_total_id_len == condenseq->ids_total_len); gt_free(dist); }
int gt_ltrdigest_file_out_stream_write_metadata(GtLTRdigestFileOutStream *ls, int tests_to_run, const char *trnafilename, const char *gfffilename, GtRange ppt_len, GtRange ubox_len, unsigned int ppt_radius, GtRange alilen, unsigned int max_edist, GtRange offsetlen, GtRange trnaoffsetlen, unsigned int pbs_radius, GtStrArray *hmm_files, unsigned int chain_max_gap_length, double evalue_cutoff, GtError *err) { int buflen = 1024; GtFile *metadata_file; char *buffer, fn[GT_MAXFILENAMELEN]; (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_conditions.csv", ls->fileprefix); metadata_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); if (!metadata_file) return -1; buffer = gt_calloc((size_t) (buflen+1), sizeof (char)); /* get working directory */ while (getcwd(buffer, (size_t) buflen) == NULL) { buflen += 1024; buffer = gt_realloc(buffer, (buflen+1) * sizeof (char)); } gt_assert(buffer && strlen(buffer) > 0); /* append working dir to relative paths if necessary */ if (gfffilename == NULL) { gt_file_xprintf(metadata_file, "GFF3 input used\t<stdin>\n"); } else { if (gfffilename[0] != GT_PATH_SEPARATOR) gt_file_xprintf(metadata_file, "GFF3 input used\t%s/%s\n", buffer, gfffilename); else gt_file_xprintf(metadata_file, "GFF3 input used\t%s\n", gfffilename); } if (tests_to_run & GT_LTRDIGEST_RUN_PPT) { gt_file_xprintf(metadata_file, "PPT length\t"GT_WU"-"GT_WU"nt\t8-30nt\n", ppt_len.start, ppt_len.end); gt_file_xprintf(metadata_file, "U-box length\t"GT_WU"-"GT_WU"nt\t3-30nt\n", ubox_len.start, ubox_len.end); gt_file_xprintf(metadata_file, "PPT search radius\t%u\t30\n", ppt_radius); } if (tests_to_run & GT_LTRDIGEST_RUN_PBS) { if (trnafilename[0] != GT_PATH_SEPARATOR) gt_file_xprintf(metadata_file, "tRNA library for PBS detection\t%s/%s\n", buffer, trnafilename); else gt_file_xprintf(metadata_file, "tRNA library for PBS detection\t%s\n", trnafilename); gt_file_xprintf(metadata_file, "allowed PBS/tRNA alignment length" " range\t"GT_WU"-"GT_WU"nt\t11-30nt\n", alilen.start, alilen.end); gt_file_xprintf(metadata_file, "PBS/tRNA maximum unit edit distance\t%u\t1\n", max_edist); gt_file_xprintf(metadata_file, "allowed PBS offset from 5' LTR range" "\t"GT_WU"-"GT_WU"nt\t0-5nt\n", offsetlen.start, offsetlen.end); gt_file_xprintf(metadata_file, "allowed PBS offset from 3' tRNA end" " range\t"GT_WU"-"GT_WU"nt\t0-5nt\n", trnaoffsetlen.start, trnaoffsetlen.end); gt_file_xprintf(metadata_file, "PBS search radius\t%d\t30\n", pbs_radius); } if (tests_to_run & GT_LTRDIGEST_RUN_PDOM) { GtUword i; gt_file_xprintf(metadata_file, "Protein domain models\t"GT_WU" (", gt_str_array_size(hmm_files)); for (i=0;i<gt_str_array_size(hmm_files);i++) { gt_file_xprintf(metadata_file, "%s", gt_str_array_get(hmm_files, i)); if (i != gt_str_array_size(hmm_files)-1) gt_file_xprintf(metadata_file, ", "); } gt_file_xprintf(metadata_file, ")\n"); gt_file_xprintf(metadata_file, "pHMM e-value cutoff \t%g\t%g\n", evalue_cutoff, 0.000001); gt_file_xprintf(metadata_file, "maximal allowed gap length between fragments to chain" " \t%u\t%u\n", chain_max_gap_length, 50); } gt_file_xprintf(metadata_file, "\n"); if (metadata_file != NULL) gt_file_delete(metadata_file); gt_free(buffer); return 0; }
static int condenseq_io(GtCondenseq *condenseq, FILE* fp, GtIOFunc io_func, GtError *err) { int had_err = 0; int file_format = GT_CONDENSEQ_VERSION; GtUword idx; had_err = gt_condenseq_io_one(condenseq->orig_length); if (!had_err) had_err = gt_condenseq_io_one(file_format); if (!had_err && file_format != GT_CONDENSEQ_VERSION) { gt_error_set(err, "condenseq index is format version %d, current is " "%d -- please re-encode", file_format, GT_CONDENSEQ_VERSION); had_err = -1; } if (!had_err) had_err = gt_condenseq_io_one(condenseq->orig_num_seq); if (!had_err) had_err = gt_condenseq_io_one(condenseq->ldb_nelems); if (!had_err) { if (condenseq->ldb_nelems == 0) { gt_warning("compression of condenseq did not succeed in finding any " "compressable similarities, maybe the input is to small or " "the chosen parameters should be reconsidered."); } if (condenseq->links == NULL) { condenseq->links = gt_calloc((size_t) condenseq->ldb_nelems, sizeof (*condenseq->links)); condenseq->ldb_allocated = condenseq->ldb_nelems; } had_err = gt_condenseq_io_one(condenseq->udb_nelems); } if (!had_err) { gt_assert(condenseq->udb_nelems > 0); if (condenseq->uniques == NULL) { condenseq->uniques = gt_malloc(sizeof (*condenseq->uniques) * condenseq->udb_nelems ); condenseq->udb_allocated = condenseq->udb_nelems; } } for (idx = 0; !had_err && idx < condenseq->ldb_nelems; idx++) { had_err = condenseq_linkentry_io(&condenseq->links[idx], fp, io_func, err); } for (idx = 0; !had_err && idx < condenseq->udb_nelems; idx++) { had_err = condenseq_uniqueentry_io(&condenseq->uniques[idx], fp, io_func, err); } if (!had_err && condenseq->orig_num_seq > (GtUword) 1) { condenseq->ssptab = gt_intset_io(condenseq->ssptab, fp, err); if (condenseq->ssptab == NULL) had_err = 1; } if (!had_err) had_err = gt_condenseq_io_one(condenseq->id_len); if (!had_err) { if (condenseq->id_len == GT_UNDEF_UWORD) { condenseq->sdstab = gt_intset_io(condenseq->sdstab, fp, err); if (condenseq->sdstab == NULL) had_err = 1; } } if (!had_err) had_err = gt_condenseq_io_one(condenseq->ids_total_len); if (!had_err) { condenseq->orig_ids = gt_realloc(condenseq->orig_ids, (size_t) condenseq->ids_total_len); had_err = io_func(condenseq->orig_ids, sizeof (*condenseq->orig_ids), (size_t) condenseq->ids_total_len, fp, err); } return had_err; }
static void* gt_encseq_bitextract_arguments_new(void) { GtEncseqBitextractArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->readmode = gt_str_new(); return arguments; }
GtToolinfo* gt_toolinfo_new(void) { return gt_calloc(1, sizeof (GtToolinfo)); }
static void* gt_encseq_encode_arguments_new(void) { GtEncseqEncodeArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->indexname = gt_str_new(); return arguments; }
GtTypecheckInfo* gt_typecheck_info_new(void) { GtTypecheckInfo *tci = gt_calloc(1, sizeof *tci); tci->typecheck = gt_str_new(); return tci; }
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha, bool descs, GtQualRange qrange, GtTimer *timer, GtError *err) { GtBaseQualDistr *bqd; GtHcrEncoder *hcr_enc; GtSeqIterator *seqit; GtStrArray *file; int had_err = 0, status; GtUword len1, len2, i, num_of_reads = 0; const GtUchar *seq, *qual; char *desc; gt_error_check(err); gt_assert(alpha && files); if (timer != NULL) gt_timer_show_progress(timer, "get <base,qual> distr", stdout); if (qrange.start != GT_UNDEF_UINT) if (qrange.start == qrange.end) { gt_error_set(err, "qrange.start must unequal qrange.end"); return NULL; } hcr_enc = gt_malloc(sizeof (GtHcrEncoder)); hcr_enc->files = files; hcr_enc->num_of_files = gt_str_array_size(files); hcr_enc->num_of_reads = 0; hcr_enc->page_sampling = false; hcr_enc->regular_sampling = false; hcr_enc->sampling_rate = 0; hcr_enc->pagesize = gt_pagesize(); if (descs) { hcr_enc->encdesc_encoder = gt_encdesc_encoder_new(); if (timer != NULL) gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer); } else hcr_enc->encdesc_encoder = NULL; hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder)); hcr_enc->seq_encoder->alpha = alpha; hcr_enc->seq_encoder->sampling = NULL; hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files, sizeof (*(hcr_enc->seq_encoder->fileinfos))); hcr_enc->seq_encoder->qrange = qrange; bqd = hcr_base_qual_distr_new(alpha, qrange); /* check if reads in the same file are of same length and get <base, quality> pair distribution */ for (i = 0; i < hcr_enc->num_of_files; i++) { file = gt_str_array_new(); gt_str_array_add(file, gt_str_array_get_str(files, i)); seqit = gt_seq_iterator_fastq_new(file, err); if (!seqit) { gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object"); had_err = -1; } if (!had_err) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha)); gt_seq_iterator_set_quality_buffer(seqit, &qual); status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err); if (status == 1) { num_of_reads = 1UL; while (!had_err) { status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err); if (status == -1) had_err = -1; if (status != 1) break; if (len2 != len1) { gt_error_set(err, "reads have to be of equal length"); had_err = -1; break; } if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0) had_err = -1; len1 = len2; num_of_reads++; } } else if (status == -1) had_err = -1; if (!had_err) { if (i == 0) hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads; else hcr_enc->seq_encoder->fileinfos[i].readnum = hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads; hcr_enc->seq_encoder->fileinfos[i].readlength = len1; } } hcr_enc->num_of_reads += num_of_reads; gt_str_array_delete(file); gt_seq_iterator_delete(seqit); } if (!had_err) hcr_base_qual_distr_trim(bqd); if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "build huffman tree for sequences and" " qualities", stdout); hcr_enc->seq_encoder->huffman = gt_huffman_new(bqd, hcr_base_qual_distr_func, (GtUword) bqd->ncols * bqd->nrows); } if (!had_err) { hcr_enc->seq_encoder->qual_offset = bqd->qual_offset; hcr_base_qual_distr_delete(bqd); return hcr_enc; } return NULL; }
GtXRFCheckInfo* gt_xrfcheck_info_new(void) { GtXRFCheckInfo *xci = gt_calloc(1, sizeof *xci); xci->xrfcheck = gt_str_new(); return xci; }
static void* gt_gff3validator_arguments_new(void) { GFF3ValidatorArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->tci = gt_typecheck_info_new(); return arguments; }
int gt_bitPackStringInt8_unit_test(GtError *err) { BitString bitStore = NULL; BitString bitStoreCopy = NULL; uint8_t *randSrc = NULL; /*< create random ints here for input as bit * store */ uint8_t *randCmp = NULL; /*< used for random ints read back */ unsigned *numBitsList = NULL; size_t i, numRnd; BitOffset offsetStart, offset; int had_err = 0; offset = offsetStart = random()%(sizeof (uint8_t) * CHAR_BIT); numRnd = random() % (MAX_RND_NUMS_uint8_t + 1); gt_log_log("offset=%lu, numRnd=%lu\n", (long unsigned)offsetStart, (long unsigned)numRnd); { BitOffset numBits = sizeof (uint8_t) * CHAR_BIT * numRnd + offsetStart; randSrc = gt_malloc(sizeof (uint8_t)*numRnd); bitStore = gt_malloc(bitElemsAllocSize(numBits) * sizeof (BitElem)); bitStoreCopy = gt_calloc(bitElemsAllocSize(numBits), sizeof (BitElem)); randCmp = gt_malloc(sizeof (uint8_t)*numRnd); } /* first test unsigned types */ gt_log_log("gt_bsStoreUInt8/gt_bsGetUInt8: "); for (i = 0; i < numRnd; ++i) { #if 8 > 32 && LONG_BIT < 8 uint8_t v = randSrc[i] = (uint8_t)random() << 32 | random(); #else /* 8 > 32 && LONG_BIT < 8 */ uint8_t v = randSrc[i] = random(); #endif /* 8 > 32 && LONG_BIT < 8 */ int bits = gt_requiredUInt8Bits(v); gt_bsStoreUInt8(bitStore, offset, bits, v); offset += bits; } offset = offsetStart; for (i = 0; i < numRnd; ++i) { uint8_t v = randSrc[i]; int bits = gt_requiredUInt8Bits(v); uint8_t r = gt_bsGetUInt8(bitStore, offset, bits); gt_ensure(had_err, r == v); if (had_err) { gt_log_log("Expected %"PRIu8", got %"PRIu8", i = %lu\n", v, r, (unsigned long)i); freeResourcesAndReturn(had_err); } offset += bits; } gt_log_log("passed\n"); if (numRnd > 0) { uint8_t v = randSrc[0], r = 0; unsigned numBits = gt_requiredUInt8Bits(v); BitOffset i = offsetStart + numBits; uint8_t mask = ~(uint8_t)0; if (numBits < 8) mask = ~(mask << numBits); gt_log_log("bsSetBit, gt_bsClearBit, bsToggleBit, gt_bsGetBit: "); while (v) { int lowBit = v & 1; v >>= 1; gt_ensure(had_err, lowBit == (r = gt_bsGetBit(bitStore, --i))); if (had_err) { gt_log_log("Expected %d, got %d, i = %llu\n", lowBit, (int)r, (unsigned long long)i); freeResourcesAndReturn(had_err); } } i = offsetStart + numBits; gt_bsClear(bitStoreCopy, offsetStart, numBits, random()&1); v = randSrc[0]; while (i) { int lowBit = v & 1; v >>= 1; if (lowBit) bsSetBit(bitStoreCopy, --i); else gt_bsClearBit(bitStoreCopy, --i); } v = randSrc[0]; r = gt_bsGetUInt8(bitStoreCopy, offsetStart, numBits); gt_ensure(had_err, r == v); if (had_err) { gt_log_log("Expected %"PRIu8", got %"PRIu8"\n", v, r); freeResourcesAndReturn(had_err); } for (i = 0; i < numBits; ++i) bsToggleBit(bitStoreCopy, offsetStart + i); r = gt_bsGetUInt8(bitStoreCopy, offsetStart, numBits); gt_ensure(had_err, r == (v = (~v & mask))); if (had_err) { gt_log_log("Expected %"PRIu8", got %"PRIu8"\n", v, r); freeResourcesAndReturn(had_err); } gt_log_log("passed\n"); }
GtSplitter* gt_splitter_new(void) { return gt_calloc(1, sizeof (GtSplitter)); }
static void* gt_splitfasta_arguments_new(void) { SplitfastaArguments *arguments = gt_calloc(1, sizeof *arguments); arguments->splitdesc = gt_str_new(); return arguments; }