GtSeqCol* gt_bioseq_col_new(GtStrArray *sequence_files, GtError *err) { GtSeqCol *sc; GtBioseqCol *bsc; GtUword i; int had_err = 0; gt_error_check(err); gt_assert(sequence_files); gt_assert(gt_str_array_size(sequence_files)); sc = gt_seq_col_create(gt_bioseq_col_class()); bsc = gt_bioseq_col_cast(sc); bsc->num_of_seqfiles = gt_str_array_size(sequence_files); bsc->bioseqs = gt_calloc(bsc->num_of_seqfiles, sizeof (GtBioseq*)); for (i = 0; !had_err && i < bsc->num_of_seqfiles; i++) { bsc->bioseqs[i] = gt_bioseq_new(gt_str_array_get(sequence_files, i), err); if (!bsc->bioseqs[i]) had_err = -1; } if (had_err) { gt_bioseq_col_delete(sc); return NULL; } bsc->matchdescstart = false; return sc; }
static void construct_description(GtStr *description, const char *type, GtUword counter, bool join, bool translate, GtStr *seqid, GtStrArray *target_ids) { gt_assert(!gt_str_length(description)); gt_str_append_cstr(description, type); gt_str_append_char(description, '_'); gt_str_append_ulong(description, counter); if (join) gt_str_append_cstr(description, " (joined)"); if (translate) gt_str_append_cstr(description, " (translated)"); if (seqid) { gt_assert(gt_str_length(seqid)); gt_str_append_cstr(description, " [seqid "); gt_str_append_str(description, seqid); gt_str_append_char(description, ']'); } if (target_ids && gt_str_array_size(target_ids)) { GtUword i; gt_str_append_cstr(description, " [target IDs "); gt_str_append_cstr(description, gt_str_array_get(target_ids, 0)); for (i = 1; i < gt_str_array_size(target_ids); i++) { gt_str_append_char(description, ','); gt_str_append_cstr(description, gt_str_array_get(target_ids, i)); } gt_str_append_char(description, ']'); } }
static void showsimpleoptions(const Cmppairwiseopt *opt) { if (gt_str_array_size(opt->strings) > 0) { if (!opt->showedist) printf("# two strings \"%s\" \"%s\"\n", gt_str_array_get(opt->strings,0), gt_str_array_get(opt->strings,1UL)); return; } if (gt_str_array_size(opt->files) > 0) { printf("# two files \"%s\" \"%s\"\n", gt_str_array_get(opt->files,0), gt_str_array_get(opt->files,1UL)); return; } if (opt->charlistlen != NULL) { printf("# alphalen \"%s\" " GT_WU "\n", gt_str_get(opt->charlistlen->charlist), opt->charlistlen->len); return; } if (gt_str_length(opt->text) > 0) { printf("# text \"%s\"\n", gt_str_get(opt->text)); return; } }
static GtUword applycheckfunctiontosimpleoptions( Checkcmppairfuntype checkfunction, const Cmppairwiseopt *opt) { if (gt_str_array_size(opt->strings) > 0) { bool forward = true; while (true) { checkfunction(forward, (const GtUchar *) gt_str_array_get(opt->strings,0), (GtUword) strlen(gt_str_array_get(opt->strings,0)), (const GtUchar *) gt_str_array_get(opt->strings,1UL), (GtUword) strlen(gt_str_array_get(opt->strings,1UL))); if (!forward) { break; } forward = false; } return 2UL; /* number of testcases */ } if (gt_str_array_size(opt->files) > 0) { if (opt->fasta) { GtUword i, j; for (i = 0; i < gt_str_array_size(opt->fastasequences0); i++) { for (j = 0; j < gt_str_array_size(opt->fastasequences1); j++) { checkfunction(true, (const GtUchar *) gt_str_array_get(opt->fastasequences0,i), (GtUword) strlen(gt_str_array_get(opt->fastasequences0,i)), (const GtUchar *) gt_str_array_get(opt->fastasequences1,j), (GtUword) strlen(gt_str_array_get(opt->fastasequences1,j))); } } } else { gt_runcheckfunctionontwofiles(checkfunction, gt_str_array_get(opt->files,0), gt_str_array_get(opt->files,1UL)); } return 2UL; } if (opt->charlistlen != NULL) { return gt_runcheckfunctiononalphalen(checkfunction, gt_str_get(opt->charlistlen->charlist), opt->charlistlen->len); } if (gt_str_length(opt->text) > 0) { return gt_runcheckfunctionontext(checkfunction, gt_str_get(opt->text)); } gt_assert(false); return 0; }
GtFastaBuffer* gt_fastabuffer_new(const GtStrArray *filenametab, const GtUchar *symbolmap, bool plainformat, Filelengthvalues **filelengthtab, GtQueue *descptr, unsigned long *characterdistribution) { GtFastaBuffer *fb; fb = gt_calloc(1, sizeof (GtFastaBuffer)); fb->plainformat = plainformat; fb->filenum = 0; fb->firstoverallseq = true; fb->firstseqinfile = true; fb->nextfile = true; fb->nextread = fb->nextfree = 0; fb->filenametab = filenametab; fb->symbolmap = symbolmap; fb->complete = false; fb->lastspeciallength = 0; fb->descptr = descptr; if (filelengthtab) { *filelengthtab = gt_calloc(gt_str_array_size(filenametab), sizeof (Filelengthvalues)); fb->filelengthtab = *filelengthtab; } else { fb->filelengthtab = NULL; } fb->characterdistribution = characterdistribution; GT_INITARRAY(&fb->headerbuffer, char); return fb; }
int gt_parse_algbounds(Sfxstrategy *sfxstrategy, const GtStrArray *algbounds, GtError *err) { bool haserr = false; const char *arg; GtWord readint; if (gt_str_array_size(algbounds) != 3UL) { gt_error_set(err,"option -algbds must have exactly 3 arguments"); haserr = true; } GT_IDXOPTS_READMAXBOUND(maxinsertionsort, 0); GT_IDXOPTS_READMAXBOUND(maxbltriesort, 1UL); if (sfxstrategy->maxinsertionsort > sfxstrategy->maxbltriesort) { gt_error_set(err,"first argument of option -algbds must not be larger " "than second argument"); haserr = true; } GT_IDXOPTS_READMAXBOUND(maxcountingsort, 2UL); if (sfxstrategy->maxbltriesort > sfxstrategy->maxcountingsort) { gt_error_set(err,"second argument of option -algbds must not be larger " "than third argument"); haserr = true; } return haserr ? -1 : 0; }
int gt_seqiterator_fastq_next(GtSeqIterator *seqit, const GtUchar **sequence, unsigned long *len, char **desc, GtError *err) { int errstatus = 0; GtSeqIteratorFastQ *seqitf; gt_assert(seqit); seqitf = gt_seqiterator_fastq_cast((GtSeqIterator*) seqit); gt_assert(seqit && len && desc); seqitf = gt_seqiterator_fastq_cast(seqit); gt_str_reset(seqitf->qualsbuffer); gt_str_reset(seqitf->qdescbuffer); gt_str_reset(seqitf->sequencebuffer); gt_str_reset(seqitf->descbuffer); /* parse file */ errstatus = parse_fastq_block(seqitf, err); if (!errstatus) { *sequence = (GtUchar*) gt_str_get(seqitf->sequencebuffer); *len = gt_str_length(seqitf->sequencebuffer); *desc = gt_str_get(seqitf->descbuffer); if (seqitf->qualities) *seqitf->qualities = (GtUchar*) gt_str_get(seqitf->qualsbuffer); errstatus = 1; } else { if (errstatus == EOF) { /* we could not get a next entry from this file */ /* can we open another? */ if (seqitf->filenum+1 < gt_str_array_size(seqitf->filenametab)) { const char *filename; filename = gt_str_array_get(seqitf->filenametab, ++seqitf->filenum); gt_file_delete(seqitf->curfile); seqitf->curfile = gt_file_xopen(filename, "r"); seqitf->curline = 1; /* get first entry from next file*/ errstatus = parse_fastq_block(seqitf, err); if (!errstatus) { *sequence = (GtUchar*) gt_str_get(seqitf->sequencebuffer); *len = gt_str_length(seqitf->sequencebuffer); *desc = gt_str_get(seqitf->descbuffer); if (seqitf->qualities) *seqitf->qualities = (GtUchar*) gt_str_get(seqitf->qualsbuffer); errstatus = 1; } else { errstatus = -1; } } else { /* all entries read from all files */ errstatus = 0; } } else { errstatus = -1; } } return errstatus; }
static int encode_sequence_files(GtStrArray *infiles, GtEncseqOptions *opts, const char *indexname, bool verbose, bool esq_no_header, GtError *err) { GtEncseqEncoder *encseq_encoder; GtLogger *logger; int had_err = 0; gt_error_check(err); gt_assert(infiles && gt_str_array_size(infiles) > 0 && opts); logger = gt_logger_new(verbose, "# ", stderr); encseq_encoder = gt_encseq_encoder_new_from_options(opts, err); if (!encseq_encoder) had_err = -1; if (!had_err) { gt_encseq_encoder_set_logger(encseq_encoder, logger); if (esq_no_header) { gt_encseq_encoder_disable_esq_header(encseq_encoder); } had_err = gt_encseq_encoder_encode(encseq_encoder, infiles, indexname, err); } gt_encseq_encoder_delete(encseq_encoder); gt_logger_delete(logger); return had_err; }
static int gt_idxlocali_runner (GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError * err) { IdxlocaliOptions *arguments = tool_arguments; bool haserr = false; unsigned long idx; gt_error_check (err); gt_assert (arguments != NULL); gt_assert (parsed_args == argc); printf ("# indexname(%s)=%s\n", arguments->withesa ? "esa" : "pck", gt_str_get (arguments->indexname)); for (idx = 0; idx < gt_str_array_size (arguments->queryfiles); idx++) { printf ("# queryfile=%s\n",gt_str_array_get (arguments->queryfiles, idx)); } printf ("# threshold=%lu\n", arguments->threshold); if (!haserr && runidxlocali (arguments, err) != 0) { haserr = true; } return haserr ? -1 : 0; }
static int inputthesequences(unsigned int *numofchars, unsigned long *nextpostable, Suffixarray *suffixarraytable, const GtStrArray *indexnametab, unsigned int demand, GtLogger *logger, GtError *err) { unsigned long idx; const char *indexname; gt_error_check(err); for (idx=0; idx<gt_str_array_size(indexnametab); idx++) { indexname = gt_str_array_get(indexnametab,idx); if (streamsuffixarray(&suffixarraytable[idx], demand, indexname, logger, err) != 0) { return -1; } if (idx == 0) { *numofchars = gt_alphabet_num_of_chars( gt_encseq_alphabet(suffixarraytable[idx].encseq)); } nextpostable[idx] = 0; } return 0; }
static int gt_seqids_runner(GT_UNUSED int argc, const char **argv, int parsed_args, GT_UNUSED void *tool_arguments, GtError *err) { GtNodeStream *in_stream, *v_stream; GtCstrTable *cst; int had_err = 0; gt_error_check(err); cst = gt_cstr_table_new(); in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); v_stream = gt_visitor_stream_new(in_stream, gt_collect_ids_visitor_new(cst)); had_err = gt_node_stream_pull(v_stream, err); if (!had_err) { GtStrArray *seqids; GtUword i; seqids = gt_cstr_table_get_all(cst); for (i = 0; i < gt_str_array_size(seqids); i++) { printf("%s\n", gt_str_array_get(seqids, i)); } gt_str_array_delete(seqids); } gt_node_stream_delete(v_stream); gt_node_stream_delete(in_stream); gt_cstr_table_delete(cst); return had_err; }
/* single sequences (-ss)*/ static void get_onesequence(const GtSequenceTable *sequence_table, const GtStrArray *strings, GtUword idx) { gt_assert(sequence_table != NULL && strings != NULL && idx < gt_str_array_size(strings)); sequence_table->seqarray[0] = gt_str_new_cstr(gt_str_array_get(strings,idx)); }
off_t gt_files_estimate_total_size(const GtStrArray *filenames) { GtUword filenum; off_t totalsize = 0; for (filenum = 0; filenum < gt_str_array_size(filenames); filenum++) totalsize += gt_file_estimate_size(gt_str_array_get(filenames, filenum)); return totalsize; }
static unsigned long applycheckfunctiontosimpleoptions( Checkcmppairfuntype checkfunction, const Cmppairwiseopt *opt) { if (gt_str_array_size(opt->strings) > 0) { bool forward = true; while (true) { checkfunction(forward, (const GtUchar *) gt_str_array_get(opt->strings,0), (unsigned long) strlen(gt_str_array_get(opt->strings,0)), (const GtUchar *) gt_str_array_get(opt->strings,1UL), (unsigned long) strlen(gt_str_array_get(opt->strings,1UL))); if (!forward) { break; } forward = false; } return 2UL; /* number of testcases */ } if (gt_str_array_size(opt->files) > 0) { gt_runcheckfunctionontwofiles(checkfunction, gt_str_array_get(opt->files,0), gt_str_array_get(opt->files,1UL)); return 2UL; } if (opt->charlistlen != NULL) { return gt_runcheckfunctiononalphalen(checkfunction, gt_str_get(opt->charlistlen->charlist), opt->charlistlen->len); } if (gt_str_length(opt->text) > 0) { return gt_runcheckfunctionontext(checkfunction,gt_str_get(opt->text)); } gt_assert(false); return 0; }
void gt_lua_push_strarray_as_table(lua_State *L, GtStrArray *sa) { unsigned long i; gt_assert(L && sa); lua_newtable(L); for (i = 0; i < gt_str_array_size(sa); i++) { lua_pushinteger(L, i+1); /* in Lua we index from 1 on */ lua_pushstring(L, gt_str_array_get(sa, i)); lua_rawset(L, -3); } }
static int gt_ltr_cluster_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRClusterStream *lcs; GtGenomeNode *ref_gn; int had_err = 0; unsigned long i = 0; gt_error_check(err); lcs = gt_ltr_cluster_stream_cast(ns); if (lcs->first_next) { while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) { gt_assert(*gn && !had_err); ref_gn = gt_genome_node_ref(*gn); gt_array_add(lcs->nodes, ref_gn); had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err); if (had_err) { gt_genome_node_delete(*gn); *gn = NULL; break; } } lcs->feat_to_encseq = gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv); lcs->feat_to_encseq_keys = gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv); if (!had_err) { for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) { had_err = process_feature(lcs, gt_str_array_get(lcs->feat_to_encseq_keys, i), err); if (had_err) break; } } if (!had_err) { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; lcs->first_next = false; return 0; } } else { if (lcs->next_index >= gt_array_size(lcs->nodes)) *gn = NULL; else { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; } return 0; } return had_err; }
GtQuerysubstringmatchiterator *gt_querysubstringmatchiterator_new( const GtEncseq *dbencseq, GtUword totallength, const ESASuffixptr *suftabpart, GtReadmode db_readmode, GtUword numberofsuffixes, const GtStrArray *query_files, const GtEncseq *query_encseq, GtReadmode query_readmode, unsigned int userdefinedleastlength, GtError *err) { GtQuerysubstringmatchiterator *qsmi = gt_malloc(sizeof *qsmi); qsmi->dbencseq = dbencseq; qsmi->suftabpart = suftabpart; qsmi->db_readmode = db_readmode; qsmi->numberofsuffixes = numberofsuffixes; qsmi->totallength = totallength; qsmi->userdefinedleastlength = (GtUword) userdefinedleastlength; qsmi->queryunitnum = 0; qsmi->desc = NULL; qsmi->query_for_seqit = NULL; qsmi->query_seqlen = 0; qsmi->queryrep.sequence = NULL; qsmi->queryrep.encseq = query_encseq; qsmi->queryrep.readmode = query_readmode; qsmi->queryrep.startpos = 0; qsmi->dbstart = 0; qsmi->matchlength = 0; qsmi->querysubstring.queryrep = &qsmi->queryrep; qsmi->mmsi = gt_mmsearchiterator_new_empty(); qsmi->mmsi_defined = false; if (query_files == NULL || gt_str_array_size(query_files) == 0) { gt_assert(query_encseq != NULL); qsmi->seqit = NULL; qsmi->query_encseq_numofsequences = (uint64_t) gt_encseq_num_of_sequences(query_encseq); } else { gt_assert(query_encseq == NULL); qsmi->seqit = gt_seq_iterator_sequence_buffer_new(query_files, err); if (qsmi->seqit == NULL) { gt_querysubstringmatchiterator_delete(qsmi); return NULL; } gt_seq_iterator_set_symbolmap(qsmi->seqit, gt_alphabet_symbolmap(gt_encseq_alphabet(dbencseq))); } return qsmi; }
static int extracttarget_from_seqfiles(const char *target, GtStrArray *seqfiles, GtError *err) { GtStr *unescaped_target; char *escaped_target; GtSplitter *splitter; unsigned long i; int had_err = 0; gt_error_check(err); gt_assert(target && seqfiles); splitter = gt_splitter_new(); unescaped_target = gt_str_new(); escaped_target = gt_cstr_dup(target); gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ','); for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) { GtSplitter *blank_splitter; char *token = gt_splitter_get_token(splitter, i); blank_splitter = gt_splitter_new(); gt_splitter_split(blank_splitter, token, strlen(token), ' '); had_err = gt_gff3_unescape(unescaped_target, gt_splitter_get_token(blank_splitter, 0), strlen(gt_splitter_get_token(blank_splitter, 0)), err); if (!had_err) { unsigned long j; for (j = 0; j < gt_str_array_size(seqfiles); j++) { unsigned long k; GtBioseq *bioseq; if (!(bioseq = gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) { had_err = -1; break; } for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) { TargetInfo target_info; const char *desc = gt_bioseq_get_description(bioseq, k); target_info.bioseq = bioseq; target_info.seqnum = k; gt_string_matching_bmh(desc, strlen(desc), gt_str_get(unescaped_target), gt_str_length(unescaped_target), show_target, &target_info); } gt_bioseq_delete(bioseq); } } gt_splitter_delete(blank_splitter); } gt_free(escaped_target); gt_str_delete(unescaped_target); gt_splitter_delete(splitter); return had_err; }
int gth_process_intermediate_files(GthInput *input, GtStrArray *consensusfiles, GthSAProcessFunc saprocessfunc, void *data, GthShowVerbose showverbose, GtError *err) { GtUword i; GtFile *fp, *genfile; int had_err = 0; gt_error_check(err); /* process all files */ if (gt_str_array_size(consensusfiles)) { for (i = 0; !had_err && i < gt_str_array_size(consensusfiles); i++) { /* open file */ fp = gt_file_xopen(gt_str_array_get(consensusfiles, i), "r"); if (showverbose) { show_parse_file_status(showverbose, i, gt_str_array_size(consensusfiles), gt_str_array_get(consensusfiles, i)); } had_err = gt_parse_intermediate_output(input, saprocessfunc, data, gt_str_array_get(consensusfiles, i), fp, err); /* close file */ gt_file_delete(fp); } } else { genfile = gt_file_new_from_fileptr(stdin); had_err = gt_parse_intermediate_output(input, saprocessfunc, data, "stdin", genfile, err); gt_file_delete_without_handle(genfile); } return had_err; }
void vcfoutput_write(VcfOutput *v, ResultSet *r) { gt_assert(v); gt_assert(r); GtStr *temp = gt_str_new(); unsigned long i =0; unsigned long vcf_size = 0; vcf_size = gt_str_array_size(resultset_get_vcf_array(r)); for(i=0;i<gt_str_array_size(resultset_get_vcf_array(r));i++) { gt_str_set(temp, gt_str_array_get(resultset_get_vcf_array(r),i)); if(i == vcf_size-1) { gt_str_append_cstr(temp,";"); if(resultset_get_exon(r) != 0) { gt_str_append_cstr(temp,"EX;"); } if(resultset_get_frms(r) != 0) { gt_str_append_cstr(temp,"NSF;"); } if(resultset_get_miss(r) != 0) { gt_str_append_cstr(temp,"NSM;"); } if(resultset_get_nons(r) != 0) { gt_str_append_cstr(temp,"NSN;"); } if(resultset_get_threeprime(r) != 0) { gt_str_append_cstr(temp,"ASS;"); } if(resultset_get_fiveprime(r) != 0) { gt_str_append_cstr(temp,"DSS;"); } } gt_str_append_cstr(temp,"\t"); gt_file_xwrite(v->file,gt_str_get(temp),gt_str_length(temp)); gt_str_reset(temp); } gt_str_delete(temp); }
GtTranslatorStatus gt_translator_find_codon(GtTranslator *translator, GtStrArray *codons, GtUword *pos, GtError *err) { char n1, n2, n3; unsigned int frame; GtUword i; GtCodonIteratorStatus retval; gt_assert(translator && codons && pos); gt_error_check(err); for (i = 0; i<gt_str_array_size(codons); i++) { int len; if ((len = (int) strlen(gt_str_array_get(codons, i))) != GT_CODON_LENGTH) { gt_error_set(err, "invalid codon length for codon %s: %d", gt_str_array_get(codons, i), len); return GT_TRANSLATOR_ERROR; } } while (!(retval = gt_codon_iterator_next(translator->ci, &n1, &n2, &n3, &frame, err))) { for (i = 0; i<gt_str_array_size(codons); i++) { const char *codon; codon = gt_str_array_get(codons, i); if (n1 == codon[0] && n2 == codon[1] && n3 == codon[2]) { *pos = gt_codon_iterator_current_position(translator->ci)-1; return GT_TRANSLATOR_OK; } } } if (retval == GT_CODON_ITERATOR_END) return GT_TRANSLATOR_END; else return GT_TRANSLATOR_ERROR; }
static int gt_tyr_search_arguments_check(int rest_argc, void *tool_arguments, GtError *err) { Optionargmodedesc showmodedesctable[] = { {"qseqnum","query sequence number",SHOWQSEQNUM}, {"qpos","query position",SHOWQPOS}, {"counts","number of occurrence counts",SHOWCOUNTS}, {"sequence","mer-sequence",SHOWSEQUENCE} }; Optionargmodedesc stranddesctable[] = { {"f","forward strand",STRAND_FORWARD}, {"p","reverse strand",STRAND_REVERSE}, {"fp","forward and reverse strand",STRAND_FORWARD | STRAND_REVERSE} }; unsigned long idx; Tyr_search_options *arguments = tool_arguments; if (rest_argc != 0) { gt_error_set(err,"superfluous arguments"); return -1; } for (idx=0; idx<gt_str_array_size(arguments->showmodespec); idx++) { if (optionargaddbitmask(showmodedesctable, sizeof (showmodedesctable)/ sizeof (showmodedesctable[0]), &arguments->showmode, "-output", gt_str_array_get(arguments->showmodespec,idx), err) != 0) { return -1; } } if (optionargaddbitmask(stranddesctable, sizeof (stranddesctable)/ sizeof (stranddesctable[0]), &arguments->strand, "-output", gt_str_get(arguments->strandspec),err) != 0) { return -1; } return 0; }
static int encseq_lua_filenames(lua_State *L) { GtEncseq **encseq; const GtStrArray *filenames; GtUword i; encseq = check_encseq(L, 1); filenames = gt_encseq_filenames(*encseq); lua_newtable(L); for (i = 0; i < gt_str_array_size(filenames); i++) { lua_pushinteger(L, i+1); /* in Lua we index from 1 on */ lua_pushstring(L, gt_str_array_get(filenames, i)); lua_rawset(L, -3); } return 1; }
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSequenceNodeAddStream *s; int had_err; gt_error_check(err); s = gt_sequence_node_add_stream_cast(ns); /* stream nodes as long as we have some, record seen seqids */ if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) { had_err = gt_genome_node_accept(*gn, s->collect_vis, err); } /* if there are no more */ if (!had_err && !*gn) { if (!s->seqids) { s->seqids = gt_cstr_table_get_all(s->seqid_table); } gt_assert(s->seqids); if (s->cur_seqid >= gt_str_array_size(s->seqids)) { *gn = NULL; return 0; } else { GtGenomeNode *new_sn; GtUword len; char *seq = NULL; GtStr *seqid = gt_str_new(), *seqstr = gt_str_new(); gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid)); had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err); if (!had_err) { had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len, err); } if (!had_err) { gt_str_append_cstr_nt(seqstr, seq, len); new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr); *gn = new_sn; } s->cur_seqid++; gt_free(seq); gt_str_delete(seqid); gt_str_delete(seqstr); } } return had_err; }
int gt_trans_table_unit_test(GtError *err) { int had_err = 0; GtStrArray *schemes; gt_error_check(err); /* check retrieval of table descriptions */ schemes = gt_trans_table_get_scheme_descriptions(); gt_ensure( gt_str_array_size(schemes) == (GtUword) GT_NUMOFTRANSSCHEMES); /* check switching translation scheme */ /* test_errnum = gt_translator_set_translation_scheme(tr, 3, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); */ /* check switching to invalid translation scheme */ /* test_errnum = gt_translator_set_translation_scheme(tr, 7, test_err); gt_ensure(test_errnum && gt_error_is_set(test_err)); */ /* switch back to default translation scheme */ /* gt_error_unset(test_err); test_errnum = gt_translator_set_translation_scheme(tr, 1, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); */ /* check single codon translation */ /* * char *bases = "AaCcGgTt"; * gt_error_unset(test_err); for (i=0; i<8; i++) { char c1 = bases[i]; for (j=0; j<8; j++) { char c2 = bases[j]; for (k=0; k<8; k++) { char c3 = bases[k], ret1, ret2; test_errnum = gt_translator_codon2amino(tr, c1, c2, c3, &ret1, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); ret2 = gt_transa(tr->scheme->aminos, true, c1, c2, c3, NULL, test_err); gt_ensure(ret1 == ret2); } } } */ return had_err; }
int gt_esa2shulengthqueryfiles(unsigned long *totalgmatchlength, const Suffixarray *suffixarray, const GtStrArray *queryfilenames, GtError *err) { bool haserr = false; GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; GtAlphabet *alphabet; gt_error_check(err); alphabet = gt_encseq_alphabet(suffixarray->encseq); gt_assert(gt_str_array_size(queryfilenames) == 1UL); seqit = gt_seq_iterator_sequence_buffer_new(queryfilenames, err); if (!seqit) { haserr = true; } if (!haserr) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet)); for (; /* Nothing */; ) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } *totalgmatchlength += gt_esa2shulengthquery(suffixarray,query,querylen); } gt_seq_iterator_delete(seqit); } return haserr ? -1 : 0; }
static int m2i_change_target_seqids(GtFeatureNode *fn, const char *target, GtRegionMapping *region_mapping, GtError *err) { GtStrArray *target_ids; GtArray *target_ranges, *target_strands; GtStr *desc, *new_seqid; unsigned long i; int had_err; gt_error_check(err); gt_assert(fn && target && region_mapping); target_ids = gt_str_array_new(); target_ranges = gt_array_new(sizeof (GtRange)); target_strands = gt_array_new(sizeof (GtStrand)); desc = gt_str_new(); new_seqid = gt_str_new(); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, target_ranges, target_strands, "", 0, err); for (i = 0; !had_err && i < gt_str_array_size(target_ids); i++) { GtStr *seqid; gt_str_reset(desc); gt_str_reset(new_seqid); seqid = gt_str_array_get_str(target_ids, i); had_err = gt_region_mapping_get_description(region_mapping, desc, seqid, err); if (!had_err) gt_regular_seqid_save(new_seqid, desc); gt_str_array_set(target_ids, i, new_seqid); } if (!had_err) { GtStr *new_target = gt_str_new(); gt_gff3_parser_build_target_str(new_target, target_ids, target_ranges, target_strands); gt_feature_node_set_attribute(fn, GT_GFF_TARGET, gt_str_get(new_target)); gt_str_delete(new_target); } gt_str_delete(new_seqid); gt_str_delete(desc); gt_array_delete(target_strands); gt_array_delete(target_ranges); gt_str_array_delete(target_ids); return had_err; }
static int gt_encseq_encode_runner(GT_UNUSED int argc, const char **argv, int parsed_args, GT_UNUSED void *tool_arguments, GtError *err) { int had_err = 0, i; GtEncseqEncodeArguments *arguments = (GtEncseqEncodeArguments*) tool_arguments; GtStrArray *infiles; gt_error_check(err); infiles = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(infiles, argv[i]); } if (gt_str_length(arguments->indexname) == 0UL) { if (gt_str_array_size(infiles) > 1UL) { gt_error_set(err,"if more than one input file is given, then " "option -indexname is mandatory"); had_err = -1; } else { char *basenameptr; basenameptr = gt_basename(gt_str_array_get(infiles, 0UL)); gt_str_set(arguments->indexname, basenameptr); gt_free(basenameptr); } } if (!had_err) { gt_assert(gt_str_length(arguments->indexname) > 0UL); had_err = encode_sequence_files(infiles, arguments->eopts, gt_str_get(arguments->indexname), arguments->verbose, arguments->no_esq_header, err); } if (!had_err && arguments->showstats) show_encoded_statistics(infiles, gt_str_get(arguments->indexname)); gt_str_array_delete(infiles); return had_err; }
int gt_mergeesa(int argc, const char **argv, GtError *err) { GtStr *storeindex; GtStrArray *indexnametab; bool haserr = false; int parsed_args; gt_error_check(err); storeindex = gt_str_new(); indexnametab = gt_str_array_new(); switch (parse_options(storeindex, indexnametab, &parsed_args, argc, argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: haserr = true; break; case GT_OPTION_PARSER_REQUESTS_EXIT: return 0; } if (!haserr) { GtUword i; GtLogger *logger; printf("# storeindex=%s\n",gt_str_get(storeindex)); for (i=0; i<gt_str_array_size(indexnametab); i++) { printf("# input=%s\n",gt_str_array_get(indexnametab,i)); } logger = gt_logger_new(false, GT_LOGGER_DEFLT_PREFIX, stdout); if (gt_performtheindexmerging(storeindex, indexnametab, logger, err) != 0) { haserr = true; } gt_logger_delete(logger); } gt_str_delete(storeindex); gt_str_array_delete(indexnametab); return haserr ? -1 : 0; }
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *error) { GtFeatureInStream *stream = feature_in_stream_cast(ns); gt_error_check(error); if (!stream->init) { feature_in_stream_init(stream); stream->init = true; } if (gt_queue_size(stream->regioncache) > 0) { GtGenomeNode *region = gt_queue_get(stream->regioncache); *gn = region; return 0; } if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0) { if (stream->featurecache != NULL) { gt_array_delete(stream->featurecache); stream->featurecache = NULL; } if (stream->seqindex == gt_str_array_size(stream->seqids)) { *gn = NULL; return 0; } const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++); stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi, seqid, error); gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare); gt_array_reverse(stream->featurecache); } GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache); *gn = gt_genome_node_ref(feat); return 0; }