static int construct_bioseq_files(GtBioseq *bs, GtStr *bioseq_indexname, GtError *err) { GtStr *sequence_filename; GtEncseqEncoder *ee; GtStrArray *indexfn; int had_err = 0; gt_error_check(err); /* register the signal handler to remove incomplete files upon termination */ if (!bs->use_stdin) { gt_bioseq_index_filename = gt_str_get(bs->sequence_file); gt_sig_register_all(remove_bioseq_files); } /* if stdin is used as input, we need to create a tempfile containing the sequence as GtEncseq cannot be built from stdin directly */ if (bs->use_stdin) { GtStr *tmpfilename; FILE *tmpfile = NULL; int i; char buf[BUFSIZ]; tmpfilename = gt_str_new(); tmpfile = gt_xtmpfp(tmpfilename); gt_assert(tmpfile); i = 1; while (i > 0) { i = fread(buf, 1, BUFSIZ, stdin); if (i > 0) fwrite(buf, 1, i, tmpfile); } gt_fa_xfclose(tmpfile); sequence_filename = tmpfilename; } else { sequence_filename = gt_str_ref(bs->sequence_file); } gt_assert(gt_str_length(sequence_filename) > 0); ee = gt_encseq_encoder_new(); gt_encseq_encoder_enable_description_support(ee); gt_encseq_encoder_enable_md5_support(ee); gt_encseq_encoder_enable_multiseq_support(ee); gt_encseq_encoder_enable_lossless_support(ee); indexfn = gt_str_array_new(); gt_str_array_add(indexfn, sequence_filename); gt_str_delete(sequence_filename); had_err = gt_encseq_encoder_encode(ee, indexfn, gt_str_get(bioseq_indexname), err); /* unregister the signal handler */ if (!bs->use_stdin) gt_sig_unregister_all(); gt_str_array_delete(indexfn); gt_encseq_encoder_delete(ee); return had_err; }
GtLeftborderOutbuffer *gt_leftborderbuffer_new(const char *name, GtFirstcodesspacelog *fcsl) { GtLeftborderOutbuffer *lbbuf = gt_malloc(sizeof (*lbbuf)); lbbuf->totalwrite = 0; lbbuf->outfilename = gt_str_new(); lbbuf->fp = gt_xtmpfp(lbbuf->outfilename); lbbuf->nextfree = 0; lbbuf->allocated = 1024UL; lbbuf->name = gt_str_new_cstr(name); lbbuf->spaceuint32_t = gt_malloc(sizeof (*lbbuf->spaceuint32_t) * lbbuf->allocated); GT_FCI_ADDWORKSPACE(fcsl,name, sizeof (*lbbuf->spaceuint32_t) * lbbuf->allocated); return lbbuf; }
static void gt_Sfxmappedrange_storetmp(GtSfxmappedrange *sfxmappedrange, GtSfxStoretype usedptrptr, GtSfxmappedrangetype type, bool writable) { FILE *outfp; gt_assert(sfxmappedrange != NULL); sfxmappedrange->ptr = NULL; sfxmappedrange->filename = gt_str_new(); sfxmappedrange->writable = writable; outfp = gt_xtmpfp(sfxmappedrange->filename); gt_assert(outfp != NULL); gt_log_log("write %s to file %s ("GT_WU" units of "GT_WU" bytes)", gt_str_get(sfxmappedrange->tablename), gt_str_get(sfxmappedrange->filename), (GtUword) sfxmappedrange->numofunits, (GtUword) sfxmappedrange->sizeofunit); switch (type) { case GtSfxGtBitsequence: gt_xfwrite(*(usedptrptr.bs),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.bs; gt_free(*(usedptrptr.bs)); *(usedptrptr.bs) = NULL; break; case GtSfxunsignedlong: gt_xfwrite(*(usedptrptr.ulong),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.ulong; gt_free(*(usedptrptr.ulong)); *(usedptrptr.ulong) = NULL; break; case GtSfxuint32_t: gt_xfwrite(*(usedptrptr.uint32),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.uint32; gt_free(*(usedptrptr.uint32)); *(usedptrptr.uint32) = NULL; break; } gt_fa_fclose(outfp); }
int gt_pbs_unit_test(GtError *err) { int had_err = 0; GtLTRElement element; GtPBSOptions o; GtStr *tmpfilename; FILE *tmpfp; GtPBSResults *res; GtPBSHit *hit; double score1, score2; GtRange rng; char *rev_seq, *seq, tmp[BUFSIZ]; const char *fullseq = "aaaaaaaaaaaaaaaaaaaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "acatactaggatgctag" /* <- PBS forward */ "aatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatag" /* PBS reverse -> */ "gatcctaaggctac" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "aaaaaaaaaaaaaaaaaaaa"; /* notice previous errors */ gt_error_check(err); /* create temporary tRNA library file */ tmpfilename = gt_str_new(); tmpfp = gt_xtmpfp(tmpfilename); fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n" ">test2\ncccccccccgatcctagggctaccctttc\n"); gt_fa_xfclose(tmpfp); ensure(had_err, gt_file_exists(gt_str_get(tmpfilename))); /* setup testing parameters */ o.radius = 30; o.max_edist = 1; o.alilen.start = 11; o.alilen.end = 30; o.offsetlen.start = 0; o.offsetlen.end = 5; o.trnaoffsetlen.start = 0; o.trnaoffsetlen.end = 40; o.ali_score_match = 5; o.ali_score_mismatch = -10; o.ali_score_insertion = o.ali_score_deletion = -20; o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err); ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2); element.leftLTR_5 = 20; element.leftLTR_3 = 119; element.rightLTR_5 = 520; element.rightLTR_3 = 619; /* setup sequences */ seq = gt_malloc(600 * sizeof (char)); rev_seq = gt_malloc(600 * sizeof (char)); memcpy(seq, fullseq + 20, 600); memcpy(rev_seq, fullseq + 20, 600); gt_reverse_complement(rev_seq, 600, err); /* try to find PBS in sequences */ res = gt_pbs_find(seq, rev_seq, &element, &o, err); ensure(had_err, res != NULL); ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2); /* check first hit on forward strand */ hit = gt_pbs_results_get_ranked_hit(res, 0); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17); ensure(had_err, gt_pbs_hit_get_edist(hit) == 0); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 120); ensure(had_err, rng.end == 136); score1 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0); /* check second hit on reverse strand */ hit = gt_pbs_results_get_ranked_hit(res, 1); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14); ensure(had_err, gt_pbs_hit_get_edist(hit) == 1); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 506); ensure(had_err, rng.end == 519); score2 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_double_compare(score1, score2) > 0); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0); /* clean up */ gt_xremove(gt_str_get(tmpfilename)); ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename))); gt_str_delete(tmpfilename); gt_bioseq_delete(o.trna_lib); gt_free(rev_seq); gt_free(seq); gt_pbs_results_delete(res); return had_err; }
static int gt_compressedbits_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCompressdbitsArguments *arguments = tool_arguments; int had_err = 0; unsigned long idx; unsigned long long num_of_bits = 0ULL; GtBitsequence *bits = NULL; GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL; GtStr *filename = gt_str_new(); FILE *fp = NULL; gt_error_check(err); gt_assert(arguments); gt_assert(argc == parsed_args); if (gt_option_is_set(arguments->filename_op)) { FILE *file = NULL; gt_assert(arguments->filename != NULL); file = gt_xfopen(gt_str_get(arguments->filename), "r"); if ((size_t) 1 != gt_xfread(&num_of_bits, sizeof (num_of_bits), (size_t) 1, file)) { had_err = -1; } if (!had_err) { gt_log_log("bits to read: %llu", num_of_bits); arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits); bits = gt_malloc(sizeof (*bits) * arguments->size); if ((size_t) arguments->size != gt_xfread(bits, sizeof (*bits), (size_t) arguments->size, file)) { had_err = -1; } } gt_xfclose(file); } else { bits = gt_calloc(sizeof (*bits), (size_t) arguments->size); num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size); if (arguments->fill_random) { for (idx = 0; idx < arguments->size; idx++) { bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX)); } } else { for (idx = 0; idx < arguments->size; idx++) bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx); } } if (!had_err) { fp = gt_xtmpfp(filename); gt_fa_xfclose(fp); fp = NULL; gt_log_log("filename: %s", gt_str_get(filename)); gt_log_log("size in words: %lu", arguments->size); cbs = gt_compressed_bitsequence_new( bits, arguments->samplerate, (unsigned long) num_of_bits); gt_log_log("original size in MB: %2.3f", (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0)); gt_log_log("compressed size in MB: %2.3f", gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0)); gt_log_log("popcount table size thereof in MB: %2.3f", gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0)); had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err); } if (!had_err) { read_cbs = gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err); if (read_cbs == NULL) had_err = -1; } if (!had_err && bits != NULL && arguments->check_consistency) { for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) { int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx); int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0; gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit); gt_assert(original == bit); } } gt_compressed_bitsequence_delete(cbs); gt_compressed_bitsequence_delete(read_cbs); gt_free(bits); gt_str_delete(filename); return had_err; }