int ffindex_sort_index(char *index_filename, FILE *index_file) { int ret = FAILURE; rewind(index_file); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { return FAILURE; } if(index->n_entries != 0) { ffindex_sort_index_file(index); if (ftruncate(fileno(index_file), 0) == 0) { if (ffindex_write(index, index_file) == FFINDEX_OK) { ret = SUCCESS; } } } ffindex_index_free(index); return ret; }
int main(int argc, char **argv) { bool iflag, dflag, oflag = false; std::string ffindex_sequence_db_prefix; std::string output; std::string input; int c; while ((c = getopt(argc, argv, "i:d:o:h")) != -1) { switch (c) { case 'i': iflag = 1; input = optarg; break; case 'd': dflag = 1; ffindex_sequence_db_prefix = optarg; break; case 'o': oflag = optarg; output = optarg; break; case 'h': usage(); exit(0); case '?': if (optopt == 'c') fprintf(stderr, "Option -%c requires an argument.\n", optopt); else if (isprint(optopt)) fprintf(stderr, "Unknown option `-%c'.\n", optopt); else fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort(); } } if(!iflag || !dflag || !oflag) { usage(); exit(0); } //prepare ffindex_database std::string sequenceDataFile = ffindex_sequence_db_prefix+".ffdata"; std::string sequenceIndexFile = ffindex_sequence_db_prefix+".ffindex"; FILE *sequence_data_fh = fopen(sequenceDataFile.c_str(), "r"); FILE *sequence_index_fh = fopen(sequenceIndexFile.c_str(), "r"); if (sequence_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex sequence data file! (" << sequenceDataFile << ")!" << std::endl; exit(1); } if(sequence_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex sequence index file! (" << sequenceIndexFile << ")!" << std::endl; exit(1); } size_t sequence_data_size; char* sequence_data = ffindex_mmap_data(sequence_data_fh, &sequence_data_size); ffindex_index_t* sequence_index = ffindex_index_parse(sequence_index_fh, 80000000); if(sequence_index == NULL) { std::cerr << "ERROR: Sequence index could not be loaded!" << std::endl; exit(1); } //prepare input stream std::istream* in; if (input.compare("stdin") != 0) { in = new std::ifstream(input.c_str(), std::ios::binary | std::ios::in); } else { in = &std::cin; } std::stringstream* out_buffer = new std::stringstream(); int ret = compressed_a3m::compress_a3m(in, sequence_index, sequence_data, out_buffer); if(ret) { //prepare output if (output.compare("stdout") != 0) { std::ofstream out(output.c_str(), std::ios::binary | std::ios::out); out << out_buffer->str(); out.close(); } else { std::cout << out_buffer->str(); } return 0; } else { std::cerr << "ERROR: Could not compress A3M! ("<< input << ")" << std::endl; return 1; } }
int main(int argn, char **argv) { int mpi_error, mpi_rank, mpi_num_procs; mpi_error = MPI_Init(&argn, &argv); mpi_error = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); mpi_error = MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs); int opt; char *data_filename_out = NULL, *index_filename_out = NULL; while ((opt = getopt(argn, argv, "d:i:")) != -1) { switch (opt) { case 'd': data_filename_out = optarg; break; case 'i': index_filename_out = optarg; break; } } if(argn - optind < 3) { fprintf(stderr, "Not enough arguments %d.\n", optind - argn); fprintf(stderr, "USAGE: %s -d DATA_FILENAME_OUT -i INDEX_FILENAME_OUT DATA_FILENAME INDEX_FILENAME -- PROGRAM [PROGRAM_ARGS]*\n" "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n", basename(argv[0])); return -1; } read_buffer = malloc(400 * 1024 * 1024); char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; char *program_name = argv[optind]; char **program_argv = argv + optind; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename); exit(EXIT_FAILURE); } FILE *data_file_out = NULL, *index_file_out = NULL; // Setup one output FFindex for each MPI process if(data_filename_out != NULL && index_filename_out != NULL) { char* data_filename_out_rank = malloc(FILENAME_MAX); char* index_filename_out_rank = malloc(FILENAME_MAX); snprintf( data_filename_out_rank, FILENAME_MAX, "%s.%d", data_filename_out, mpi_rank); snprintf(index_filename_out_rank, FILENAME_MAX, "%s.%d", index_filename_out, mpi_rank); data_file_out = fopen(data_filename_out_rank, "w+"); index_file_out = fopen(index_filename_out_rank, "w+"); if( data_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename_out); exit(EXIT_FAILURE); } if(index_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename_out); exit(EXIT_FAILURE); } } int capture_stdout = (data_file_out != NULL); size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); MPI_Finalize(); exit(EXIT_FAILURE); } // Ignore SIGPIPE struct sigaction handler; handler.sa_handler = SIG_IGN; sigemptyset(&handler.sa_mask); handler.sa_flags = 0; sigaction(SIGPIPE, &handler, NULL); size_t batch_size, range_start, range_end; if(index->n_entries >= mpi_num_procs) batch_size = index->n_entries / mpi_num_procs; else batch_size = 0; range_start = mpi_rank * batch_size; range_end = range_start + batch_size; size_t offset = 0; // Foreach entry if(batch_size > 0) for(size_t entry_index = range_start; entry_index < range_end; entry_index++) { ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index); if(entry == NULL) { perror(entry->name); return errno; } int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset); if(error != 0) { perror(entry->name); break; } } ssize_t left_over = index->n_entries - (batch_size * mpi_num_procs); if(mpi_rank < left_over) { size_t left_over_entry_index = (batch_size * mpi_num_procs) + mpi_rank; ffindex_entry_t* entry = ffindex_get_entry_by_index(index, left_over_entry_index); if(entry == NULL) { perror(entry->name); return errno; } //fprintf(stderr, "handling left over: %ld\n", left_over_entry_index); int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset); if(error != 0) perror(entry->name); } if(capture_stdout) fclose(data_file_out); if(index_file_out != NULL) fclose(index_file_out); MPI_Barrier(MPI_COMM_WORLD); // merge FFindexes in master if(data_filename_out != NULL && mpi_rank == 0) { char* merge_command = malloc(FILENAME_MAX * 5); for(int i = 0; i < mpi_num_procs; i++) { snprintf( merge_command, FILENAME_MAX, "ffindex_build -as %s %s -d %s.%d -i %s.%d", data_filename_out, index_filename_out, data_filename_out, i, index_filename_out, i); //puts(merge_command); system(merge_command); } } MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argn, char **argv) { int sort = 0, unlink = 0, version = 0; int err = EXIT_SUCCESS; char* list_filenames[MAX_FILENAME_LIST_FILES]; size_t list_filenames_index = 0; static struct option long_options[] = { { "file", required_argument, NULL, 'f' }, { "sort", no_argument, NULL, 's' }, { "unlink", no_argument, NULL, 'u' }, { "version", no_argument, NULL, 'v' }, { NULL, 0, NULL, 0 } }; int opt; while (1) { int option_index = 0; opt = getopt_long(argn, argv, "suvf:", long_options, &option_index); if (opt == -1) break; switch (opt) { case 'f': list_filenames[list_filenames_index++] = optarg; break; case 's': sort = 1; break; case 'u': unlink = 1; break; case 'v': version = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(version == 1) { /* Don't you dare running it on a platform where byte != 8 bits */ printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8); return EXIT_SUCCESS; } if(optind >= argn) { usage(argv[0]); return EXIT_FAILURE; } char *index_filename = argv[optind++]; FILE *index_file; index_file = fopen(index_filename, "r+"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } size_t entries = ffcount_lines(index_filename); ffindex_index_t* index = ffindex_index_parse(index_file, entries); if(index == NULL) { perror("ffindex_index_parse failed"); return (EXIT_FAILURE); } fclose(index_file); /* Unlink entries */ if(unlink) { /* For each list_file unlink all entries */ for(int i = 0; i < list_filenames_index; i++) { printf("Unlinking entries from '%s'\n", list_filenames[i]); FILE *list_file = fopen(list_filenames[i], "r"); if (list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; } /* unlink entries in file, one per line */ char path[PATH_MAX]; while(fgets(path, PATH_MAX, list_file) != NULL) { index = ffindex_unlink(index, ffnchomp(path, strlen(path))); } } /* unlink entries specified by args */ for(int i = optind; i < argn; i++) { index = ffindex_unlink(index, argv[i]); } /* Sort the index entries and write back */ if(sort) { ffindex_sort_index_file(index); index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); } } /* Write index back */ index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); return err; }
int main(int argc, char **argv) { bool iflag, dflag, oflag, qflag; iflag = dflag = oflag = qflag = false; std::string ffindex_header_db_prefix; std::string ffindex_sequence_db_prefix; std::string ffindex_ca3m_db_prefix; std::string ffindex_a3m_db_prefix; int c; while ((c = getopt(argc, argv, "i:d:o:q:h")) != -1) { switch (c) { case 'i': iflag = 1; ffindex_ca3m_db_prefix = optarg; break; case 'd': dflag = 1; ffindex_sequence_db_prefix = optarg; break; case 'o': oflag = 1; ffindex_a3m_db_prefix = optarg; break; case 'q': qflag = 1; ffindex_header_db_prefix = optarg; break; case 'h': usage(); exit(0); case '?': if (optopt == 'c') fprintf(stderr, "Option -%c requires an argument.\n", optopt); else if (isprint(optopt)) fprintf(stderr, "Unknown option `-%c'.\n", optopt); else fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort(); } } if(!iflag || !dflag || !oflag || !qflag) { std::cerr << "Missing arguments!" << std::endl; usage(); exit(0); } //prepare ffindex a3m database std::string a3mDataFile = ffindex_a3m_db_prefix+".ffdata"; std::string a3mIndexFile = ffindex_a3m_db_prefix+".ffindex"; FILE *a3m_data_fh = fopen(a3mDataFile.c_str(), "w"); FILE *a3m_index_fh = fopen(a3mIndexFile.c_str(), "w"); if (a3m_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m data file! (" << a3mDataFile << ")!" << std::endl; exit(1); } if(a3m_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m index file! (" << a3mIndexFile << ")!" << std::endl; exit(1); } size_t a3m_offset = 0; //prepare ffindex ca3m database std::string ca3mDataFile = ffindex_ca3m_db_prefix+".ffdata"; std::string ca3mIndexFile = ffindex_ca3m_db_prefix+".ffindex"; FILE *ca3m_data_fh = fopen(ca3mDataFile.c_str(), "r"); FILE *ca3m_index_fh = fopen(ca3mIndexFile.c_str(), "r"); if (ca3m_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m data file! (" << ca3mDataFile << ")!" << std::endl; exit(1); } if(ca3m_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m index file! (" << ca3mIndexFile << ")!" << std::endl; exit(1); } size_t ca3m_offset; char* ca3m_data = ffindex_mmap_data(ca3m_data_fh, &ca3m_offset); ffindex_index_t* ca3m_index = ffindex_index_parse(ca3m_index_fh, 0); if(ca3m_index == NULL) { std::cerr << "ERROR: CA3M index (" << ca3mIndexFile << ") could not be loaded!" << std::endl; exit(1); } //prepare ffindex sequence database std::string sequenceDataFile = ffindex_sequence_db_prefix+".ffdata"; std::string sequenceIndexFile = ffindex_sequence_db_prefix+".ffindex"; FILE *sequence_data_fh = fopen(sequenceDataFile.c_str(), "r"); FILE *sequence_index_fh = fopen(sequenceIndexFile.c_str(), "r"); if (sequence_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex sequence data file! (" << sequenceDataFile << ")!" << std::endl; exit(1); } if(sequence_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex sequence index file! (" << sequenceIndexFile << ")!" << std::endl; exit(1); } size_t sequence_data_size; char* sequence_data = ffindex_mmap_data(sequence_data_fh, &sequence_data_size); ffindex_index_t* sequence_index = ffindex_index_parse(sequence_index_fh, 80000000); if(sequence_index == NULL) { std::cerr << "ERROR: Sequence index could not be loaded!" << std::endl; exit(1); } //prepare ffindex header database std::string headerDataFile = ffindex_header_db_prefix + ".ffdata"; std::string headerIndexFile = ffindex_header_db_prefix + ".ffindex"; FILE *header_data_fh = fopen(headerDataFile.c_str(), "r"); FILE *header_index_fh = fopen(headerIndexFile.c_str(), "r"); if (header_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex sequence data file! (" << headerDataFile << ")!" << std::endl; exit(1); } if (header_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex header index file! (" << headerIndexFile << ")!" << std::endl; exit(1); } size_t header_data_size; char* header_data = ffindex_mmap_data(header_data_fh, &header_data_size); ffindex_index_t* header_index = ffindex_index_parse(header_index_fh, 1E8); if (header_index == NULL) { std::cerr << "ERROR: Header index could not be loaded!" << std::endl; exit(1); } //prepare input stream size_t ca3m_range_start = 0; size_t ca3m_range_end = ca3m_index->n_entries; // Foreach entry #pragma omp parallel for shared(ca3m_index, ca3m_data, a3m_data_fh, a3m_index_fh, a3m_offset) for(size_t entry_index = ca3m_range_start; entry_index < ca3m_range_end; entry_index++) { ffindex_entry_t* entry = ffindex_get_entry_by_index(ca3m_index, entry_index); if(entry == NULL) { perror(entry->name); continue; } char* data = ffindex_get_data_by_entry(ca3m_data, entry); std::stringstream* out_buffer = new std::stringstream(); compressed_a3m::extract_a3m(data, entry->length, sequence_index, sequence_data, header_index, header_data, out_buffer); std::string out_string = out_buffer->str(); #pragma omp critical { ffindex_insert_memory(a3m_data_fh, a3m_index_fh, &a3m_offset, const_cast<char*>(out_string.c_str()), out_string.size(), entry->name); } delete out_buffer; } fclose(a3m_data_fh); fclose(a3m_index_fh); ffsort_index(a3mIndexFile.c_str()); }
int main(int argc, char **argv) { bool iflag, sflag, oflag = false; std::string set_file; std::string ffindex_oa3m_db_prefix; std::string ffindex_a3m_db_prefix; int c; while ((c = getopt(argc, argv, "i:s:o:h")) != -1) { switch (c) { case 'i': iflag = 1; ffindex_a3m_db_prefix = optarg; break; case 's': sflag = 1; set_file = optarg; break; case 'o': oflag = optarg; ffindex_oa3m_db_prefix = optarg; break; case 'h': usage(); exit(0); case '?': if (optopt == 'c') fprintf(stderr, "Option -%c requires an argument.\n", optopt); else if (isprint(optopt)) fprintf(stderr, "Unknown option `-%c'.\n", optopt); else fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort(); } } if(!iflag || !sflag || !oflag) { std::cerr << "Missing input!" << std::endl; usage(); exit(1); } //prepare ffindex a3m output database std::string oa3mDataFile = ffindex_oa3m_db_prefix+".ffdata"; std::string oa3mIndexFile = ffindex_oa3m_db_prefix+".ffindex"; FILE *oa3m_data_fh = fopen(oa3mDataFile.c_str(), "w"); FILE *oa3m_index_fh = fopen(oa3mIndexFile.c_str(), "w"); if (oa3m_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex ca3m data file! (" << oa3mDataFile << ")!" << std::endl; exit(1); } if(oa3m_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex ca3m index file! (" << oa3mIndexFile << ")!" << std::endl; exit(1); } size_t oa3m_offset = 0; //prepare ffindex a3m database std::string a3mDataFile = ffindex_a3m_db_prefix+".ffdata"; std::string a3mIndexFile = ffindex_a3m_db_prefix+".ffindex"; FILE *a3m_data_fh = fopen(a3mDataFile.c_str(), "r"); FILE *a3m_index_fh = fopen(a3mIndexFile.c_str(), "r"); if (a3m_data_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m data file! (" << a3mDataFile << ")!" << std::endl; exit(1); } if(a3m_index_fh == NULL) { std::cerr << "ERROR: Could not open ffindex a3m index file! (" << a3mIndexFile << ")!" << std::endl; exit(1); } size_t a3m_offset; char* a3m_data = ffindex_mmap_data(a3m_data_fh, &a3m_offset); ffindex_index_t* a3m_index = ffindex_index_parse(a3m_index_fh, 0); if(a3m_index == NULL) { std::cerr << "ERROR: A3M index could not be loaded!" << std::endl; exit(1); } //prepare filter std::set<std::string> filter; std::ifstream infile(set_file.c_str()); std::string line; while (std::getline(infile, line)) { std::string item = line.substr(0, line.length()); filter.insert(item); } infile.close(); //prepare input stream size_t a3m_range_start = 0; size_t a3m_range_end = a3m_index->n_entries; // Foreach entry #pragma omp parallel for shared(a3m_index, a3m_data, oa3m_data_fh, oa3m_index_fh, oa3m_offset) for(size_t entry_index = a3m_range_start; entry_index < a3m_range_end; entry_index++) { //fprintf(stderr, "index %ld\n", entry_index); ffindex_entry_t* entry = ffindex_get_entry_by_index(a3m_index, entry_index); if(entry == NULL) { perror(entry->name); continue; } char* data = ffindex_get_data_by_entry(a3m_data, entry); std::stringstream* out_buffer = new std::stringstream(); size_t nr_sequences = 0; for(size_t index = 0; index < entry->length; index++) { //write annotation line if(data[index] == '#') { while(data[index] != '\n' && index < entry->length) { out_buffer->put(data[index++]); } out_buffer->put('\n'); } else if(data[index] == '>') { size_t start_index = index; while(index < entry->length && data[index] != '\n') { index++; } //copy line without new line std::string header = std::string(&data[start_index], index - start_index); std::string id = getNameFromHeader(header); bool consensus_flag = isConsensus(id); std::string short_id = getShortIdFromHeader(header); while(index < entry->length - 1 && data[index] != '>') { index++; } if(data[index] == '>' || data[index] == '\0') { index--; } bool passedFilter = false; if(filter.find(short_id) != filter.end()) { nr_sequences++; passedFilter = true; } if(passedFilter || consensus_flag || id.compare("ss_dssp") == 0 || id.compare("sa_dssp") == 0 || id.compare("ss_pred") == 0 || id.compare("ss_conf") == 0) { std::string seq = std::string(&data[start_index], index - start_index); out_buffer->write(seq.c_str(), seq.size()); out_buffer->put('\n'); } } } if(nr_sequences > 0) { std::string out_string = out_buffer->str(); #pragma omp critical { ffindex_insert_memory(oa3m_data_fh, oa3m_index_fh, &oa3m_offset, const_cast<char*>(out_string.c_str()), out_string.size(), entry->name); } } else { std::cerr << "WARNING: No sequences left for cluster " << entry->name << std::endl; } delete out_buffer; } fclose(oa3m_data_fh); }
int main(int argn, char **argv) { int by_index = 0; static struct option long_options[] = { { "byindex", no_argument, NULL, 'n' }, { NULL, 0, NULL, 0 } }; int opt; while (1) { int option_index = 0; opt = getopt_long(argn, argv, "n", long_options, &option_index); if (opt == -1) break; switch (opt) { case 'n': by_index = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(argn < 3) { usage(argv[0]); return EXIT_FAILURE; } char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", index_filename); exit(EXIT_FAILURE); } size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); exit(EXIT_FAILURE); } if(by_index) { for(int i = optind; i < argn; i++) { size_t index_n = atol(argv[i]) - 1; // offset from 0 but specify from 1 ffindex_entry_t* entry = ffindex_get_entry_by_index(index, index_n); if(entry == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]); } else { char *filedata = ffindex_get_data_by_entry(data, entry); if(filedata == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]); } else fwrite(filedata, entry->length - 1, 1, stdout); } } } else // by name { for(int i = optind; i < argn; i++) { char *filename = argv[i]; ffindex_entry_t* entry = ffindex_get_entry_by_name(index, filename); if(entry == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename); } else { char *filedata = ffindex_get_data_by_entry(data, entry); if(filedata == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename); } else fwrite(filedata, entry->length - 1, 1, stdout); } } /* Alternative code using (slower) ffindex_fopen */ /* FILE *file = ffindex_fopen(data, index, filename); if(file == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_fopen file not found in index", filename); } else { char line[LINE_MAX]; while(fgets(line, LINE_MAX, file) != NULL) printf("%s", line); } */ } return 0; }
int main(int argn, char **argv) { int sort = 0, version = 0; int opt, err = EXIT_SUCCESS; while ((opt = getopt(argn, argv, "sv")) != -1) { switch (opt) { case 's': sort = 1; break; case 'v': version = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(version == 1) { /* Don't you dare running it on a platform where byte != 8 bits */ printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8); return EXIT_SUCCESS; } if(argn - optind < 3) { usage(argv[0]); return EXIT_FAILURE; } char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; char *fasta_filename = argv[optind++]; FILE *data_file, *index_file, *fasta_file; struct stat st; if(stat(data_filename, &st) == 0) { errno = EEXIST; perror(data_filename); return EXIT_FAILURE; } data_file = fopen(data_filename, "w"); if( data_file == NULL) { perror(data_filename); return EXIT_FAILURE; } if(stat(index_filename, &st) == 0) { errno = EEXIST; perror(index_filename); return EXIT_FAILURE; } index_file = fopen(index_filename, "w+"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } fasta_file = fopen(fasta_filename, "r"); if(fasta_file == NULL) { perror(fasta_filename); return EXIT_FAILURE; } size_t fasta_size; char *fasta_data = ffindex_mmap_data(fasta_file, &fasta_size); size_t offset = 0; size_t from_length = 0; char name[FFINDEX_MAX_ENTRY_NAME_LENTH]; int seq_id = 1; for(size_t fasta_offset = 1; fasta_offset < fasta_size; fasta_offset++) // position after first ">" { from_length = 1; while(fasta_offset < fasta_size && !(*(fasta_data + fasta_offset) == '>' && *(fasta_data + fasta_offset - 1) == '\n')) { fasta_offset++; from_length++; } sprintf(name, "%d", seq_id++); ffindex_insert_memory(data_file, index_file, &offset, fasta_data + (fasta_offset - from_length), from_length, name); } fclose(data_file); /* Sort the index entries and write back */ if(sort) { rewind(index_file); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { perror("ffindex_index_parse failed"); exit(EXIT_FAILURE); } fclose(index_file); ffindex_sort_index_file(index); index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); } return err; }
int main(int argn, char **argv) { int sort = 0, version = 0; int opt, err = EXIT_SUCCESS; int user_selected_field_index = 1; while ((opt = getopt(argn, argv, "svk:")) != -1) { switch (opt) { case 'k': user_selected_field_index = optind; break; case 's': sort = 1; break; case 'v': version = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(version == 1) { /* Don't you dare running it on a platform where byte != 8 bits */ printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8); return EXIT_SUCCESS; } if(argn - optind < 3) { usage(argv[0]); return EXIT_FAILURE; } char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; char *tsv_filename = argv[optind++]; FILE *data_file, *index_file; size_t offset = 0; /* open ffindex */ err = ffindex_index_open(data_filename, index_filename, "w", &data_file, &index_file, &offset); if(err != EXIT_SUCCESS) return err; FILE* tsv_file = fopen(tsv_filename, "r"); if(tsv_file == NULL) { perror(tsv_filename); return EXIT_FAILURE; } size_t tsv_size; char* tsv_data = ffindex_mmap_data(tsv_file, &tsv_size); char* tsv_current = tsv_data; char* tsv_part_begin = tsv_data; char* tsv_last = tsv_data + tsv_size; char field_current[FFINDEX_MAX_ENTRY_NAME_LENTH + 1]; // + seperator size_t field_current_length = 0; // + seperator char* tsv_selected_field_start = NULL; size_t tsv_selected_field_length = 0; tsv_current = tsv_scan_line_for_field(tsv_current, user_selected_field_index, &tsv_selected_field_start, &tsv_selected_field_length); strncpy(field_current, tsv_selected_field_start, tsv_selected_field_length); //XXX field_current_length = tsv_selected_field_length; field_current[field_current_length] = '\0'; while(tsv_current < tsv_last) { char* tsv_next; tsv_next = tsv_scan_line_for_field(tsv_current, user_selected_field_index, &tsv_selected_field_start, &tsv_selected_field_length); if((tsv_selected_field_length != field_current_length || strncmp(field_current, tsv_selected_field_start, tsv_selected_field_length) != 0)) // XXX got a new field value { ffindex_insert_memory(data_file, index_file, &offset, tsv_part_begin, tsv_current - tsv_part_begin, field_current); strncpy(field_current, tsv_selected_field_start, tsv_selected_field_length); //XXX field_current_length = tsv_selected_field_length; field_current[field_current_length] = '\0'; tsv_part_begin = tsv_current; } tsv_current = tsv_next; } ffindex_insert_memory(data_file, index_file, &offset, tsv_part_begin, tsv_current - tsv_part_begin, field_current); strncpy(field_current, tsv_selected_field_start, tsv_selected_field_length); //XXX field_current_length = tsv_selected_field_length; field_current[field_current_length] = '\0'; fclose(data_file); /* Sort the index entries and write back */ if(sort) { rewind(index_file); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { perror("ffindex_index_parse failed"); exit(EXIT_FAILURE); } fclose(index_file); ffindex_sort_index_file(index); index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); } return err; }
int main(int argn, char **argv) { int sort = 0, unlink = 0, version = 0, use_tree = 1; int opt, err = EXIT_SUCCESS; char* list_filenames[MAX_FILENAME_LIST_FILES]; size_t list_filenames_index = 0; while ((opt = getopt(argn, argv, "stuvf:")) != -1) { switch (opt) { case 'f': list_filenames[list_filenames_index++] = optarg; break; case 's': sort = 1; break; case 't': use_tree = 1; break; case 'u': unlink = 1; break; case 'v': version = 1; break; default: fprintf(stderr, "Option %c not recognized\n", opt); usage(argv[0]); return EXIT_FAILURE; } } if(version == 1) { /* Don't you dare running it on a platform where byte != 8 bits */ printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8); return EXIT_SUCCESS; } if(optind >= argn) { usage(argv[0]); return EXIT_FAILURE; } char *index_filename = argv[optind++]; FILE *index_file; index_file = fopen(index_filename, "r+"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { perror("ffindex_index_parse failed"); return (EXIT_FAILURE); } fclose(index_file); /* Unlink entries */ if(unlink) { if(use_tree) { /* Build tree */ index = ffindex_index_as_tree(index); /* For each list_file unlink all entries */ if(list_filenames_index > 0) for(int i = 0; i < list_filenames_index; i++) { printf("Unlinking entries from '%s'\n", list_filenames[i]); FILE *list_file = fopen(list_filenames[i], "r"); if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; } /* unlink entries in file, one per line */ char path[PATH_MAX]; while(fgets(path, PATH_MAX, list_file) != NULL) index = ffindex_unlink(index, ffnchomp(path, strlen(path))); } /* unlink entries specified by args */ for(int i = optind; i < argn; i++) index = ffindex_unlink(index, argv[i]); } else { char** sorted_names_to_unlink = malloc(FFINDEX_MAX_INDEX_ENTRIES_DEFAULT * sizeof(char *)); if(sorted_names_to_unlink == NULL) fferror_print(__FILE__, __LINE__, __func__, "malloc failed"); /* For each list_file unlink all entries */ if(list_filenames_index > 0) for(int i = 0; i < list_filenames_index; i++) { printf("Unlinking entries from '%s'\n", list_filenames[i]); FILE *list_file = fopen(list_filenames[i], "r"); if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; } /* unlink entries in file, one per line */ char path[PATH_MAX]; while(fgets(path, PATH_MAX, list_file) != NULL) sorted_names_to_unlink[i++] = ffnchomp(strdup(path), strlen(path)); ffindex_unlink_entries(index, sorted_names_to_unlink, i); } /* unlink entries specified by args */ int y = 0; for(int i = optind; i < argn; i++, y++) sorted_names_to_unlink[y] = argv[i]; ffindex_unlink_entries(index, sorted_names_to_unlink, y); /* Sort the index entries and write back */ if(sort) { ffindex_sort_index_file(index); index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); } } } /* Write index back */ index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); return err; }
int main(int argn, char **argv) { if(argn < 4) { fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME PROGRAM [PROGRAM_ARGS]*\n" "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n", argv[0]); return -1; } char *data_filename = argv[1]; char *index_filename = argv[2]; char *program_name = argv[3]; char **program_argv = argv + 3; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename); exit(EXIT_FAILURE); } size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); exit(EXIT_FAILURE); } // Ignore SIGPIPE struct sigaction handler; handler.sa_handler = SIG_IGN; sigemptyset(&handler.sa_mask); handler.sa_flags = 0; sigaction(SIGPIPE, &handler, NULL); size_t range_start = 0; size_t range_end = index->n_entries; // Foreach entry //#pragma omp parallel for for(size_t entry_index = range_start; entry_index < range_end; entry_index++) { //fprintf(stderr, "index %ld\n", entry_index); int ret = 0; ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index); if(entry == NULL) { perror(entry->name); continue; } int pipefd[2]; ret = pipe(pipefd); if(ret != 0) { perror(entry->name); continue; } pid_t child_pid = fork(); if(child_pid == 0) { fclose(data_file); fclose(index_file); close(pipefd[1]); // Make pipe from parent our new stdin int newfd = dup2(pipefd[0], fileno(stdin)); if(newfd < 0) { fprintf(stdout, "%d %d\n", pipefd[0], newfd); perror(entry->name); } close(pipefd[0]); // exec program with the pipe as stdin execvp(program_name, program_argv); // never reached } else if(child_pid > 0) { // Read end is for child only close(pipefd[0]); // Write file data to child's stdin. char *filedata = ffindex_get_data_by_entry(data, entry); ssize_t written = 0; while(written < entry->length) { int w = write(pipefd[1], filedata + written, entry->length - written); if(w < 0 && errno != EPIPE) { perror(entry->name); break; } else if(w == 0 && errno != 0) { perror(entry->name); break; } else written += w; } close(pipefd[1]); // child gets EOF waitpid(child_pid, NULL, 0); } else { perror(entry->name); exit(errno); } } return 0; }