bool gth_intermediate_output_is_correct(char *outputfilename, GthSACollection *orig_sa_collection, GthInput *input, GtFile **outfp, GtError *err) { SACollectionData sa_collection_data; GthSACollection *read_sa_collection; GtFileMode file_mode; bool rval; #ifndef NDEBUG GtUword numofgenomicfiles, numofreferencefiles; #endif gt_error_check(err); gt_assert(outputfilename); gt_assert(*outfp); #ifndef NDEBUG numofgenomicfiles = gth_input_num_of_gen_files(input); numofreferencefiles = gth_input_num_of_ref_files(input); #endif /* init */ read_sa_collection = gth_sa_collection_new(GTH_DC_NONE); sa_collection_data.sa_collection = read_sa_collection; sa_collection_data.sa_filter = NULL; sa_collection_data.stat = NULL; /* store file mode */ file_mode = gt_file_mode(*outfp); /* close output file */ gt_file_delete(*outfp); /* open intermediate file again for reading */ *outfp = gt_file_xopen_file_mode(file_mode, outputfilename, "r"); gt_assert(*outfp); /* read in the intermediate output */ if (gt_parse_intermediate_output(input, store_in_sa_collection, &sa_collection_data, outputfilename, *outfp, err)) { fprintf(stderr, "error: %s\n", gt_error_get(err)); exit(EXIT_FAILURE); } /* array of genomic files did not grow */ gt_assert(numofgenomicfiles == gth_input_num_of_gen_files(input)); /* array of reference files did not grow */ gt_assert(numofreferencefiles == gth_input_num_of_ref_files(input)); /* compare the trees */ rval = gth_sa_collections_are_equal(orig_sa_collection, read_sa_collection); /* free */ gth_sa_collection_delete(read_sa_collection); return rval; }
static int split_fasta_file(const char *filename, unsigned long max_filesize, bool force, GtError *err) { GtFile *srcfp = NULL, *destfp = NULL; GtStr *destfilename = NULL; unsigned long filenum = 0, bytecount = 0, separator_pos; int read_bytes, had_err = 0; char buf[BUFSIZ]; gt_error_check(err); gt_assert(filename && max_filesize); /* open source file */ srcfp = gt_file_xopen(filename, "r"); gt_assert(srcfp); /* read start characters */ if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) { gt_error_set(err, "file \"%s\" is empty", filename); had_err = -1; } bytecount += read_bytes; /* make sure the file is in fasta format */ if (!had_err && buf[0] != '>') { gt_error_set(err, "file is not in FASTA format"); had_err = -1; } if (!had_err) { /* open destination file */ destfilename = gt_str_new(); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; } if (!had_err) gt_file_xwrite(destfp, buf, read_bytes); while (!had_err && (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) { if (bytecount + read_bytes > max_filesize) { int offset = bytecount < max_filesize ? max_filesize - bytecount : 0; if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) { separator_pos--; gt_assert(separator_pos < read_bytes); if (separator_pos) gt_file_xwrite(destfp, buf, separator_pos); /* close current file */ gt_file_delete(destfp); /* open new file */ gt_str_reset(destfilename); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; break; } bytecount = read_bytes - separator_pos; /* reset */ gt_assert(buf[separator_pos] == '>'); gt_file_xwrite(destfp, buf + separator_pos, read_bytes - separator_pos); continue; } } bytecount += read_bytes; gt_file_xwrite(destfp, buf, read_bytes); } } /* free */ gt_str_delete(destfilename); /* close current file */ gt_file_delete(destfp); /* close source file */ gt_file_delete(srcfp); return had_err; }