static int determine_outfp(void *data, GtError *err) { GtOutputFileInfo *ofi = (GtOutputFileInfo*) data; GtFileMode file_mode; int had_err = 0; gt_error_check(err); gt_assert(ofi); if (!gt_str_length(ofi->output_filename)) *ofi->outfp = NULL; /* no output file given -> use stdout */ else { /* outputfile given -> create generic file pointer */ gt_assert(!(ofi->gzip && ofi->bzip2)); if (ofi->gzip) file_mode = GT_FILE_MODE_GZIP; else if (ofi->bzip2) file_mode = GT_FILE_MODE_BZIP2; else file_mode = GT_FILE_MODE_UNCOMPRESSED; if (file_mode != GT_FILE_MODE_UNCOMPRESSED && strcmp(gt_str_get(ofi->output_filename) + gt_str_length(ofi->output_filename) - strlen(gt_file_mode_suffix(file_mode)), gt_file_mode_suffix(file_mode))) { gt_warning("output file '%s' doesn't have correct suffix '%s', appending " "it", gt_str_get(ofi->output_filename), gt_file_mode_suffix(file_mode)); gt_str_append_cstr(ofi->output_filename, gt_file_mode_suffix(file_mode)); } if (!ofi->force && gt_file_exists(gt_str_get(ofi->output_filename))) { gt_error_set(err, "file \"%s\" exists already, use option -%s to " "overwrite", gt_str_get(ofi->output_filename), GT_FORCE_OPT_CSTR); had_err = -1; } if (!had_err) { *ofi->outfp = gt_file_xopen_file_mode(file_mode, gt_str_get(ofi->output_filename), "w"); gt_assert(*ofi->outfp); } } return had_err; }
static int store_in_subset_file(void *data, GthSA *sa, const char *outputfilename, GtError *err) { Store_in_subset_file_data *store_in_subset_file_data = (Store_in_subset_file_data*) data; double split_determing_percentage = 0.0; unsigned long filenum; char filenamesuffix[4]; int had_err = 0; gt_error_check(err); /* filter before we do any further processing */ if (gth_sa_filter_filter_sa(store_in_subset_file_data->sa_filter, sa)) { /* and free it afterwards */ gth_sa_delete(sa); /* discard */ return 0; } /* check whether we got a new output file to process */ if (!store_in_subset_file_data->current_outputfilename) { store_in_subset_file_data->current_outputfilename = gt_cstr_dup(outputfilename); } else if (strcmp(store_in_subset_file_data->current_outputfilename, outputfilename)) { /* close current output files */ close_output_files(store_in_subset_file_data); gt_free(store_in_subset_file_data->current_outputfilename); } /* determine in which file the current sa needs to be put */ switch (store_in_subset_file_data->gthsplitinfo->splitmode) { case ALIGNMENTSCORE_SPLIT: split_determing_percentage = gth_sa_score(sa); strcpy(filenamesuffix, "scr"); break; case COVERAGE_SPLIT: split_determing_percentage = gth_sa_coverage(sa); strcpy(filenamesuffix, "cov"); break; default: gt_assert(0); } gt_assert(split_determing_percentage >= 0.0); /* XXX: change into an assertion when coverage problem is fixed */ if (split_determing_percentage > 1.0) split_determing_percentage = 1.0; if (split_determing_percentage == 1.0) filenum = store_in_subset_file_data->num_of_subset_files - 1; else { filenum = floor(split_determing_percentage * 100.0 / store_in_subset_file_data->gthsplitinfo->range); } gt_assert(filenum < store_in_subset_file_data->num_of_subset_files); /* make sure the file exists and is open */ if (!store_in_subset_file_data->subset_files[filenum]) { gt_assert(store_in_subset_file_data->subset_filenames[filenum] == NULL); store_in_subset_file_data->subset_filenames[filenum] = gt_str_new(); gt_str_append_cstr_nt(store_in_subset_file_data->subset_filenames[filenum], outputfilename, gt_file_basename_length(outputfilename)); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '.'); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], filenamesuffix); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], filenum * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '-'); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], (filenum + 1) * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], gt_file_mode_suffix(store_in_subset_file_data ->gthsplitinfo->file_mode)); /* if not disabled by -force, check if file already exists */ if (!store_in_subset_file_data->gthsplitinfo->force) { store_in_subset_file_data->subset_files[filenum] = gt_file_open(store_in_subset_file_data->gthsplitinfo->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "r", NULL); if (store_in_subset_file_data->subset_files[filenum]) { gt_error_set(err, "file \"%s\" exists already. use option -%s to " "overwrite", gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), GT_FORCE_OPT_CSTR); had_err = -1; } } if (!had_err) { /* open split file for writing */ store_in_subset_file_data->subset_files[filenum] = gt_file_xopen_file_mode(store_in_subset_file_data->gthsplitinfo ->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "w"); /* store XML header in file */ gth_xml_show_leader(true, store_in_subset_file_data->subset_files[filenum]); } } /* put it there */ if (!had_err) { gth_xml_inter_sa_visitor_set_outfp(store_in_subset_file_data->sa_visitor, store_in_subset_file_data ->subset_files[filenum]); gth_sa_visitor_visit_sa(store_in_subset_file_data->sa_visitor, sa); } /* adjust counter */ if (!had_err) store_in_subset_file_data->subset_file_sa_counter[filenum]++; /* and free it afterwards */ gth_sa_delete(sa); return had_err; }
static int split_fasta_file(const char *filename, unsigned long max_filesize, bool force, GtError *err) { GtFile *srcfp = NULL, *destfp = NULL; GtStr *destfilename = NULL; unsigned long filenum = 0, bytecount = 0, separator_pos; int read_bytes, had_err = 0; char buf[BUFSIZ]; gt_error_check(err); gt_assert(filename && max_filesize); /* open source file */ srcfp = gt_file_xopen(filename, "r"); gt_assert(srcfp); /* read start characters */ if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) { gt_error_set(err, "file \"%s\" is empty", filename); had_err = -1; } bytecount += read_bytes; /* make sure the file is in fasta format */ if (!had_err && buf[0] != '>') { gt_error_set(err, "file is not in FASTA format"); had_err = -1; } if (!had_err) { /* open destination file */ destfilename = gt_str_new(); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; } if (!had_err) gt_file_xwrite(destfp, buf, read_bytes); while (!had_err && (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) { if (bytecount + read_bytes > max_filesize) { int offset = bytecount < max_filesize ? max_filesize - bytecount : 0; if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) { separator_pos--; gt_assert(separator_pos < read_bytes); if (separator_pos) gt_file_xwrite(destfp, buf, separator_pos); /* close current file */ gt_file_delete(destfp); /* open new file */ gt_str_reset(destfilename); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; break; } bytecount = read_bytes - separator_pos; /* reset */ gt_assert(buf[separator_pos] == '>'); gt_file_xwrite(destfp, buf + separator_pos, read_bytes - separator_pos); continue; } } bytecount += read_bytes; gt_file_xwrite(destfp, buf, read_bytes); } } /* free */ gt_str_delete(destfilename); /* close current file */ gt_file_delete(destfp); /* close source file */ gt_file_delete(srcfp); return had_err; }