/* Formats a given position number for short display in the ruler. */ void gt_format_ruler_label(char *txt, GtUword pos, const char *unitstr, size_t buflen) { double fpos; int logval; GtStr *formatstring; gt_assert(txt); logval = (int) floor(log10(pos)); formatstring = gt_str_new_cstr("%."); if (pos >= 1000000000) { fpos = (double) pos / 1000000000; while (pos % 10 == 0) { pos /= 10; logval--; } /*@ignore@*/ gt_str_append_ulong(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fG%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else if (pos >= 1000000) { fpos = (double) pos / 1000000; while (pos % 10 == 0) { pos /= 10; logval--; } /*@ignore@*/ gt_str_append_ulong(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fM%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else if (pos >= 1000) { fpos = (double) pos / 1000; while (pos % 10 == 0) { pos /= 10; logval--; } /*@ignore@*/ gt_str_append_ulong(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fk%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else { /*@ignore@*/ (void) snprintf(txt, buflen, " "GT_WU"%s", pos, unitstr); /*@end@*/ } gt_str_delete(formatstring); }
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr) { GtGenomeNode *gn; gt_assert(fn && outstr); gn = (GtGenomeNode*) fn; gt_str_append_str(outstr, gt_genome_node_get_seqid(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_source(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_type(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_ulong(outstr, gt_genome_node_get_start(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_ulong(outstr, gt_genome_node_get_end(gn)); gt_str_append_char(outstr, '\t'); if (gt_feature_node_score_is_defined(fn)) { char buf[BUFSIZ]; (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn)); gt_str_append_cstr(outstr, buf); } else gt_str_append_char(outstr, '.'); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]); gt_str_append_char(outstr, '\t'); }
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtGFF3Visitor *gff3_visitor; gt_error_check(err); gff3_visitor = gff3_visitor_cast(nv); gt_assert(nv && rn); gff3_version_string(nv); if (!gff3_visitor->outstr) { gt_file_xprintf(gff3_visitor->outfp, "%s %s "GT_WU" "GT_WU"\n", GT_GFF_SEQUENCE_REGION, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)), gt_genome_node_get_start((GtGenomeNode*) rn), gt_genome_node_get_end((GtGenomeNode*) rn)); } else { gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION); gt_str_append_cstr(gff3_visitor->outstr, " "); gt_str_append_cstr(gff3_visitor->outstr, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn))); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_start((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_end((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, '\n'); } return 0; }
static void set_gff3_target_attribute(GthSA *sa, bool md5ids) { gt_assert(sa && !sa->gff3_target_attribute); sa->gff3_target_attribute = gt_str_new(); if (md5ids) { gt_assert(sa->ref_md5); gt_str_append_cstr(sa->gff3_target_attribute, GT_MD5_SEQID_PREFIX); gt_str_append_str(sa->gff3_target_attribute, sa->ref_md5); gt_str_append_char(sa->gff3_target_attribute, ':'); } gt_gff3_escape(sa->gff3_target_attribute, gt_str_get(sa->ref_id), gt_str_length(sa->ref_id)); gt_str_append_char(sa->gff3_target_attribute, ' '); gt_str_append_ulong(sa->gff3_target_attribute, gth_sa_referencecutoff_start(sa) + 1); /* XXX: use reference dpstartpos */ gt_str_append_char(sa->gff3_target_attribute, ' '); gt_str_append_ulong(sa->gff3_target_attribute, gth_sa_ref_total_length(sa) - /* XXX */ gth_sa_referencecutoff_end(sa)); gt_str_append_char(sa->gff3_target_attribute, ' '); if (sa->ref_strand_forward) { gt_str_append_char(sa->gff3_target_attribute, GT_STRAND_CHARS[GT_STRAND_FORWARD]); } else { gt_str_append_char(sa->gff3_target_attribute, GT_STRAND_CHARS[GT_STRAND_REVERSE]); } }
static void construct_description(GtStr *description, const char *type, GtUword counter, bool join, bool translate, GtStr *seqid, GtStrArray *target_ids) { gt_assert(!gt_str_length(description)); gt_str_append_cstr(description, type); gt_str_append_char(description, '_'); gt_str_append_ulong(description, counter); if (join) gt_str_append_cstr(description, " (joined)"); if (translate) gt_str_append_cstr(description, " (translated)"); if (seqid) { gt_assert(gt_str_length(seqid)); gt_str_append_cstr(description, " [seqid "); gt_str_append_str(description, seqid); gt_str_append_char(description, ']'); } if (target_ids && gt_str_array_size(target_ids)) { GtUword i; gt_str_append_cstr(description, " [target IDs "); gt_str_append_cstr(description, gt_str_array_get(target_ids, 0)); for (i = 1; i < gt_str_array_size(target_ids); i++) { gt_str_append_char(description, ','); gt_str_append_cstr(description, gt_str_array_get(target_ids, i)); } gt_str_append_char(description, ']'); } }
static void close_output_files(Store_in_subset_file_data *store_in_subset_file_data) { unsigned long i; GtStr *buf; buf = gt_str_new(); for (i = 0; i < store_in_subset_file_data->num_of_subset_files; i++) { if (store_in_subset_file_data->subset_files[i]) { if (store_in_subset_file_data->gthsplitinfo->showverbose) { gt_str_reset(buf); gt_str_append_cstr(buf, "split file created: "); gt_str_append_str(buf, store_in_subset_file_data->subset_filenames[i]); gt_str_append_cstr(buf, " (size="); gt_str_append_ulong(buf, store_in_subset_file_data->subset_file_sa_counter[i]); gt_str_append_cstr(buf, ")"); store_in_subset_file_data->gthsplitinfo->showverbose(gt_str_get(buf)); } gt_assert(store_in_subset_file_data->subset_filenames[i]); /* put XML trailer in file before closing it */ gth_xml_show_trailer(true, store_in_subset_file_data->subset_files[i]); gt_file_delete(store_in_subset_file_data->subset_files[i]); gt_str_delete(store_in_subset_file_data->subset_filenames[i]); store_in_subset_file_data->subset_files[i] = NULL; store_in_subset_file_data->subset_file_sa_counter[i] = 0; } } gt_str_delete(buf); }
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv, GtHMMERModelHit *modelhit, GtHMMERSingleHit *singlehit) { GT_UNUSED GtUword i; GtGenomeNode *gf; int had_err = 0; GtRange rrng; gt_assert(lv && singlehit); rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit); if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) { char buf[32]; gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) lv->ltr_retrotrans), gt_ft_protein_match, rrng.start, rrng.end, singlehit->strand); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment", gt_str_ref(singlehit->alignment), (GtFree) gt_str_delete); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq", gt_str_ref(singlehit->aastring), (GtFree) gt_str_delete); gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag); gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue); (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame); gt_feature_node_add_attribute((GtFeatureNode*) gf, "reading_frame", buf); if (modelhit->modelname != NULL) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "name", modelhit->modelname); } if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) { GtStr *buffer; GtUword j; gt_assert(singlehit->chains != NULL); buffer = gt_str_new(); for (j = 0UL; j < gt_array_size(singlehit->chains); j++) { gt_str_append_cstr(buffer, modelhit->modelname); gt_str_append_char(buffer, ':'); gt_str_append_ulong(buffer, *(GtUword*) gt_array_get(singlehit->chains, j)); if (j != gt_array_size(singlehit->chains) - 1) { gt_str_append_char(buffer, ','); } } gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains", gt_str_get(buffer)); gt_str_delete(buffer); } gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf); } gt_array_delete(singlehit->chains); singlehit->chains = NULL; return had_err; }
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd, unsigned long amount, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; int had_err = 0; unsigned long rand, max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1, count; GtStr *timer_comment = gt_str_new_cstr("extracting "); GtStr *desc = gt_str_new(); gt_str_append_ulong(timer_comment, amount); gt_str_append_cstr(timer_comment, " reads of "); gt_str_append_ulong(timer_comment, max_rand + 1); gt_str_append_cstr(timer_comment, "!"); if (timer == NULL) { timer = gt_timer_new_with_progress_description("extract random reads"); gt_timer_start(timer); } else { gt_timer_show_progress(timer, "extract random reads", stdout); } gt_log_log("%s",gt_str_get(timer_comment)); for (count = 0; count < amount; count++) { if (!had_err) { rand = gt_rand_max(max_rand); gt_log_log("get read: %lu", rand); had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err); gt_log_log("%s",gt_str_get(desc)); gt_log_log("%s",seq); gt_log_log("%s",qual); } } gt_str_delete(timer_comment); gt_str_delete(desc); if (!gt_showtime_enabled()) gt_timer_delete(timer); return had_err; }
static void* test_symbol(GT_UNUSED void *data) { GtStr *symbol; GtUword i; symbol = gt_str_new(); for (i = 0; i < NUMBER_OF_SYMBOLS; i++) { gt_str_reset(symbol); gt_str_append_ulong(symbol, gt_rand_max(MAX_SYMBOL)); gt_symbol(gt_str_get(symbol)); gt_assert(!strcmp(gt_symbol(gt_str_get(symbol)), gt_str_get(symbol))); } gt_str_delete(symbol); return NULL; }
static int gt_seqtranslate_do_translation(GtTranslateArguments *arguments, const char *sequence, GtUword length, const char *desc, GtStr **translations, bool rev, GtError *err) { GtTranslator *tr; GT_UNUSED GtTranslatorStatus trst; GtCodonIterator *ci; char translated; int had_err = 0; GtStr *str; unsigned int frame, i; ci = gt_codon_iterator_simple_new(sequence, length, err); tr = gt_translator_new(ci); trst = gt_translator_next(tr, &translated, &frame, err); while (trst == GT_TRANSLATOR_OK) { gt_str_append_char(translations[frame], translated); trst = gt_translator_next(tr, &translated, &frame, err); } gt_codon_iterator_delete(ci); gt_translator_delete(tr); if (trst == GT_TRANSLATOR_ERROR) return -1; str = gt_str_new(); for (i = 0; i < 3; i++) { if (gt_str_length(translations[i]) > 0) { gt_str_append_cstr(str, desc); gt_str_append_cstr(str, " ("); gt_str_append_ulong(str, i+1); gt_str_append_cstr(str, rev ? "-" : "+"); gt_str_append_cstr(str, ")"); gt_fasta_show_entry(gt_str_get(str), gt_str_get(translations[i]), gt_str_length(translations[i]), arguments->fasta_width, arguments->outfp); gt_str_reset(translations[i]); gt_str_reset(str); } } gt_str_delete(str); return had_err; }
static GtStr* create_unique_id(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn) { const char *type; GtStr *id; gt_assert(gff3_visitor && fn); type = gt_feature_node_get_type(fn); /* increase id counter */ gt_string_distri_add(gff3_visitor->id_counter, type); /* build id string */ id = gt_str_new_cstr(type); gt_str_append_ulong(id, gt_string_distri_get(gff3_visitor->id_counter, type)); /* store (unique) id */ gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id); return id; }
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav, GtFeatureNode *mRNA, GtFeatureNode *snp, GtUword variant_pos, GtUword variant_idx, char variant_char, #ifndef NDEBUG GT_UNUSED char reference_char, #endif GT_UNUSED GtError *err) { int had_err = 0; char *mrnaseq; const char *variant_effect = NULL; gt_assert(mRNA && snp && sav); gt_log_log("processing variant char %c for SNP %s\n", variant_char, gt_feature_node_get_attribute(snp, "Dbxref")); mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA); gt_assert(mrnaseq); if (mrnaseq) { char codon[3], variant_codon[3]; GtStr *effect_string; char oldamino, newamino; GT_UNUSED GtUword mrnalen; GtUword startpos = variant_pos / GT_CODON_LENGTH, variantoffset = variant_pos % GT_CODON_LENGTH; mrnalen = strlen(mrnaseq); gt_assert(variant_pos < mrnalen); variant_codon[0] = codon[0] = mrnaseq[3*startpos]; variant_codon[1] = codon[1] = mrnaseq[3*startpos+1]; variant_codon[2] = codon[2] = mrnaseq[3*startpos+2]; variant_codon[variantoffset] = variant_char; #ifndef NDEBUG gt_assert(toupper(codon[variantoffset]) == toupper(reference_char)); #endif if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) { if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2])) { variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT); } else { variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT); } } else { if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2])) { variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT); } else { had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1], codon[2], &oldamino, err); if (!had_err) { had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2], &newamino, err); } if (!had_err) { if (newamino == oldamino) { variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT); } else { variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT); } } } } if (!had_err) { const char *var_attrib; gt_assert(variant_effect != NULL); if ((var_attrib = gt_feature_node_get_attribute(snp, GT_GVF_VARIANT_EFFECT))) { effect_string = gt_str_new_cstr(var_attrib); gt_str_append_cstr(effect_string, ","); gt_str_append_cstr(effect_string, variant_effect); } else { effect_string = gt_str_new_cstr(variant_effect); } gt_str_append_cstr(effect_string, " "); gt_str_append_ulong(effect_string, variant_idx); gt_str_append_cstr(effect_string, " "); gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA)); gt_str_append_cstr(effect_string, " "); gt_str_append_cstr(effect_string, gt_feature_node_get_attribute(mRNA, GT_GFF_ID)); gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT, gt_str_get(effect_string)); gt_str_reset(effect_string); gt_str_delete(effect_string); } } return had_err; }
static int store_in_subset_file(void *data, GthSA *sa, const char *outputfilename, GtError *err) { Store_in_subset_file_data *store_in_subset_file_data = (Store_in_subset_file_data*) data; double split_determing_percentage = 0.0; unsigned long filenum; char filenamesuffix[4]; int had_err = 0; gt_error_check(err); /* filter before we do any further processing */ if (gth_sa_filter_filter_sa(store_in_subset_file_data->sa_filter, sa)) { /* and free it afterwards */ gth_sa_delete(sa); /* discard */ return 0; } /* check whether we got a new output file to process */ if (!store_in_subset_file_data->current_outputfilename) { store_in_subset_file_data->current_outputfilename = gt_cstr_dup(outputfilename); } else if (strcmp(store_in_subset_file_data->current_outputfilename, outputfilename)) { /* close current output files */ close_output_files(store_in_subset_file_data); gt_free(store_in_subset_file_data->current_outputfilename); } /* determine in which file the current sa needs to be put */ switch (store_in_subset_file_data->gthsplitinfo->splitmode) { case ALIGNMENTSCORE_SPLIT: split_determing_percentage = gth_sa_score(sa); strcpy(filenamesuffix, "scr"); break; case COVERAGE_SPLIT: split_determing_percentage = gth_sa_coverage(sa); strcpy(filenamesuffix, "cov"); break; default: gt_assert(0); } gt_assert(split_determing_percentage >= 0.0); /* XXX: change into an assertion when coverage problem is fixed */ if (split_determing_percentage > 1.0) split_determing_percentage = 1.0; if (split_determing_percentage == 1.0) filenum = store_in_subset_file_data->num_of_subset_files - 1; else { filenum = floor(split_determing_percentage * 100.0 / store_in_subset_file_data->gthsplitinfo->range); } gt_assert(filenum < store_in_subset_file_data->num_of_subset_files); /* make sure the file exists and is open */ if (!store_in_subset_file_data->subset_files[filenum]) { gt_assert(store_in_subset_file_data->subset_filenames[filenum] == NULL); store_in_subset_file_data->subset_filenames[filenum] = gt_str_new(); gt_str_append_cstr_nt(store_in_subset_file_data->subset_filenames[filenum], outputfilename, gt_file_basename_length(outputfilename)); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '.'); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], filenamesuffix); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], filenum * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '-'); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], (filenum + 1) * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], gt_file_mode_suffix(store_in_subset_file_data ->gthsplitinfo->file_mode)); /* if not disabled by -force, check if file already exists */ if (!store_in_subset_file_data->gthsplitinfo->force) { store_in_subset_file_data->subset_files[filenum] = gt_file_open(store_in_subset_file_data->gthsplitinfo->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "r", NULL); if (store_in_subset_file_data->subset_files[filenum]) { gt_error_set(err, "file \"%s\" exists already. use option -%s to " "overwrite", gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), GT_FORCE_OPT_CSTR); had_err = -1; } } if (!had_err) { /* open split file for writing */ store_in_subset_file_data->subset_files[filenum] = gt_file_xopen_file_mode(store_in_subset_file_data->gthsplitinfo ->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "w"); /* store XML header in file */ gth_xml_show_leader(true, store_in_subset_file_data->subset_files[filenum]); } } /* put it there */ if (!had_err) { gth_xml_inter_sa_visitor_set_outfp(store_in_subset_file_data->sa_visitor, store_in_subset_file_data ->subset_files[filenum]); gth_sa_visitor_visit_sa(store_in_subset_file_data->sa_visitor, sa); } /* adjust counter */ if (!had_err) store_in_subset_file_data->subset_file_sa_counter[filenum]++; /* and free it afterwards */ gth_sa_delete(sa); return had_err; }
static int split_fasta_file(const char *filename, unsigned long max_filesize, bool force, GtError *err) { GtFile *srcfp = NULL, *destfp = NULL; GtStr *destfilename = NULL; unsigned long filenum = 0, bytecount = 0, separator_pos; int read_bytes, had_err = 0; char buf[BUFSIZ]; gt_error_check(err); gt_assert(filename && max_filesize); /* open source file */ srcfp = gt_file_xopen(filename, "r"); gt_assert(srcfp); /* read start characters */ if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) { gt_error_set(err, "file \"%s\" is empty", filename); had_err = -1; } bytecount += read_bytes; /* make sure the file is in fasta format */ if (!had_err && buf[0] != '>') { gt_error_set(err, "file is not in FASTA format"); had_err = -1; } if (!had_err) { /* open destination file */ destfilename = gt_str_new(); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; } if (!had_err) gt_file_xwrite(destfp, buf, read_bytes); while (!had_err && (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) { if (bytecount + read_bytes > max_filesize) { int offset = bytecount < max_filesize ? max_filesize - bytecount : 0; if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) { separator_pos--; gt_assert(separator_pos < read_bytes); if (separator_pos) gt_file_xwrite(destfp, buf, separator_pos); /* close current file */ gt_file_delete(destfp); /* open new file */ gt_str_reset(destfilename); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; break; } bytecount = read_bytes - separator_pos; /* reset */ gt_assert(buf[separator_pos] == '>'); gt_file_xwrite(destfp, buf + separator_pos, read_bytes - separator_pos); continue; } } bytecount += read_bytes; gt_file_xwrite(destfp, buf, read_bytes); } } /* free */ gt_str_delete(destfilename); /* close current file */ gt_file_delete(destfp); /* close source file */ gt_file_delete(srcfp); return had_err; }
static void make_unique_id_string(GtStr *current_id, GtUword counter) { /* name => name.1 */ gt_str_append_char(current_id, '.'); gt_str_append_ulong(current_id, counter); }
static void make_sequence_region(GtHashmap *sequence_regions, GtStr *sequenceid, GthRegionFactory *srf, GthInput *input, unsigned long filenum, unsigned long seqnum) { unsigned long offset_is_defined = false; GtRange range, descrange; GtGenomeNode *sr = NULL; gt_assert(sequence_regions && sequenceid && srf && input); if (gth_input_use_substring_spec(input)) { range.start = gth_input_genomic_substring_from(input); range.end = gth_input_genomic_substring_to(input); } else { range = gth_input_get_relative_genomic_range(input, filenum, seqnum); } if (srf->use_desc_ranges) { GtStr *description = gt_str_new(); gth_input_get_genomic_description(input, description, filenum, seqnum); if (!gt_parse_description_range(gt_str_get(description), &descrange)) offset_is_defined = true; gt_str_delete(description); } if (offset_is_defined) range = gt_range_offset(&range, descrange.start); else range = gt_range_offset(&range, 1); /* 1-based */ if (!gt_str_length(sequenceid) || (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) && !offset_is_defined)) { /* sequenceid is empty or exists already (and no offset has been parsed) -> make one up */ GtStr *seqid; char *base; base = gt_basename(gth_input_get_genomic_filename(input, filenum)); seqid = gt_str_new_cstr(base); gt_free(base); gt_str_append_char(seqid, '|'); gt_str_append_ulong(seqid, seqnum + 1); /* 1-based */ seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_ULONG); gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid))); gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid)); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)), sr); gt_str_delete(seqid); } else { /* sequenceid does not exists already (or an offset has been parsed) -> use this one */ if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) { /* no sequence region with this id exists -> create one */ gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid)); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_ULONG); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)), sr); } else { GtRange prev_range, new_range; /* sequence region with this id exists already -> modify range */ sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid)); gt_assert(sr); prev_range = gt_genome_node_get_range(sr); new_range = gt_range_join(&prev_range, &range); gt_genome_node_set_range(sr, &new_range); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_ULONG); } } gt_assert(sr); }