int main (int argc, char *argv[]) { // Program description std::cerr << "map_back: context of significant kmers\n"; // Do parsing and checking of command line params // If no input options, give quick usage rather than full help boost::program_options::variables_map vm; if (argc == 1) { std::cerr << "Usage: map_back -k seer_output.txt -r references.txt > mappings.txt\n\n" << "For full option details run map_back -h\n"; return 0; } else if (parseCommandLine(argc, argv, vm)) { return 1; } // Set number of threads size_t num_threads = vm["threads"].as<size_t>(); if (num_threads < 1) { num_threads = 1; } // Read all sequences into memory as Fasta objects std::cerr << "Reading reference sequences into memory...\n"; std::vector<Fasta> sequence_cache = readSequences(vm["references"].as<std::string>()); // Loop through significant kmers std::cerr << "Now mapping significant kmers...\n"; std::ifstream kmer_file(vm["kmers"].as<std::string>().c_str()); if (!kmer_file) { throw std::runtime_error("Could not open kmer_file " + vm["kmers"].as<std::string>() + "\n"); } else { // Set up list of threads, and mutex lock on std out std::mutex out_mtx; std::list<std::future<void>> threads; // Read the header std::string header; std::getline(kmer_file, header); int num_covar_fields = parseHeader(header); Significant_kmer sig_kmer(num_covar_fields); while(kmer_file) { kmer_file >> sig_kmer; // Check the read into sig_kmer hasn't reached end of file if (!kmer_file.eof()) { assert(num_threads >= 1); std::cout << sig_kmer.sequence(); // sig_kmer samples and sample cache are sorted in the same order, so // can go through linearly std::vector<std::string> search_names = sig_kmer.samples_found(); std::vector<std::string>::iterator search_names_it = search_names.begin(); for (std::vector<Fasta>::iterator all_names_it = sequence_cache.begin(); all_names_it != sequence_cache.end(); ++all_names_it) { // For each sample we know the kmer is in, print all matches to // the kmer if (all_names_it->get_name() == *search_names_it) { // Thread each search (i.e. per sample per kmer) if (num_threads > 1) { // Check if four searches are running. If so, wait for the // next one to finish. Wait (for a max of 100ms) so this // thread doesn't consume processing waitForThreads(threads, num_threads - 1); // Start a new thread asynchronously, at the back of the // queue threads.push_back(std::async(std::launch::async, &Fasta::printMappings, *all_names_it, std::ref(std::cout), sig_kmer.sequence(), std::ref(out_mtx))); } else { all_names_it->printMappings(std::cout, sig_kmer.sequence(), out_mtx); } ++search_names_it; if (search_names_it == search_names.end()) { break; } } } // Tab between every sample, line break after every kmer if (num_threads > 1) { waitForThreads(threads, 0); } std::cout << std::endl; } } std::cerr << "Done.\n"; } }
int main(int argc, char** argv) { std::string read_file = "./test_reads.txt"; //std::string kmer_file_name = "./test_kmers.txt"; std::string kmer_file_name = "./solid_kmers.txt"; std::ifstream input_file(read_file.c_str()); std::ifstream kmer_file(kmer_file_name.c_str()); std::string read_string; std::string line_id; std::string line_misc; std::string quality_string("IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII00000000000000000000000000000000IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"); std::string kmer_string; int32_t read_length=112; int32_t kmer_length=30; int32_t threshold; uint8_t level0; uint8_t level1; uint8_t level2; uint8_t level3; int num_reads_processed = 0; int num_reads_per_iteration = 512; int num_kmers_per_iteration = 512 * 4; char* kmer_space; uint32_t** correction_space; char* candidate_space; char* read_space; int32_t* index_space; struct correction_item* correction_array; //First program solid k-mers into the bloom-filter if (posix_memalign((void**)&kmer_space, 128, num_kmers_per_iteration * 64) != 0) { std::cout << "ERROR!!!" << std::endl; } open_device((uint64_t) 0) uint32_t val; cxl_mmio_read32(afu_h,CONTROL,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,THRESHOLD,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,READ_BASE,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,WRITE_BASE,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,READS_RECEIVED,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,READS_WRITTEN,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,NUM_ITEMS,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,START,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,RESET,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,STATUS,&val); std::cout << val << std::endl; cxl_mmio_read32(afu_h,DDR3_BASE,&val); std::cout << val << std::endl; wait_for_idle(afu_h); clear_status(afu_h); if (!kmer_file.is_open()) { std::cout << "Cannot open k-mer file!!!" << std::endl; return -1; } int32_t num_kmers = 0; while (std::getline(kmer_file, kmer_string)) { memcpy(kmer_space + (num_kmers % num_kmers_per_iteration) * 64, kmer_string.c_str(), kmer_length); num_kmers++; if (num_kmers % num_kmers_per_iteration == 0) { std::cout << "Completed collecting k-mers" << std::endl; set_kmer_program_mode(afu_h, num_kmers_per_iteration, kmer_length, kmer_space); Start; #ifdef DEBUG FILE* debug = fopen("./debug", "w"); for (int x = 0; x < num_kmers_per_iteration; x++) { char* kmerToPrint = kmer_space + x * 64; for (int y = 63; y > 0; y--) { fprintf(debug,"%02x", kmerToPrint[y]); } fprintf(debug, "\n"); } #endif //Wait for IDLE bool success = wait_for_idle(afu_h); if (!success) { std::cout << "Cannot complete AFU transactions. Exiting!!!" << std::endl; return -1; } clear_status(afu_h); std::cout << "Completed iteration" << std::endl; } } if (num_kmers % num_kmers_per_iteration != 0) { std::cout << "The last set of k-mers going to be tested ... " << std::endl; int32_t num_remaining = num_kmers % num_kmers_per_iteration; set_kmer_program_mode(afu_h, num_remaining, kmer_length, kmer_space); Start; bool success = wait_for_idle(afu_h); if (!success) { std::cout << "Cannot complete AFU transactions. Exiting!!!" << std::endl; return -1; } clear_status(afu_h); std::cout << "Completed last iteration" << std::endl; } ////First convert each read to a correction item // if (!(input_file.is_open())) { // std::cout << "Cannot open input file" << std::endl; // return 1; // } // // //256 bytes per read = 2 cache lines = 2048 bits // if (posix_memalign((void**)&read_space, 128, num_reads_per_iteration * 256) != 0) { // std::cout << "ERROR!!! Cannot allocate aligned space for read_space" << std::endl; // } // // //256 bytes per packed index space = 2 cache lines = 2048 bits = 32 * 2 * (32 indices) // if (posix_memalign((void**)&index_space, 128, num_reads_per_iteration * 256) != 0) { // std::cout << "ERROR!!! Cannot allocate aligned space for read_space" << std::endl; // } // // while (std::getline(input_file, read_string)) { // if (!std::getline(input_file, read_string)) { // std::cout << "Error in file format" << std::endl; // return 1; // } // if (!std::getline(input_file, quality_string)) { // std::cout << "Error in file format" << std::endl; // return 1; // } // if (!std::getline(input_file, quality_string)) { // std::cout << "Error in file format" << std::endl; // return 1; // } // // char* read_item = read_space + 256 * (num_reads_processed % num_reads_per_iteration); // memcpy(read_space + 256 * (num_reads_processed % num_reads_per_iteration), read_string.c_str(), read_length); // read_item[255] = read_length; // num_reads_processed++; // // //Flatten out stuff - in case it is all to go back to C // //correction_array[num_reads_processed].read_string = (uint32_t*) read_space[num_reads_processed*2]; // //correction_array[num_reads_processed].quality_string = (uint32_t*) read_space[num_reads_processed*2+1]; // //correction_array[num_reads_processed].read_length = read_string.length(); // //correction_array[num_reads_processed].island_indices = (int32_t*) index_space[num_reads_processed]; // if (num_reads_processed % num_reads_per_iteration == 0) { // int32_t num_corrections = 0; // uint64_t wed = 0; // // std::cout << "Reads processed, proceeding to procure island information ... " << std::endl; // // //1. Profile the reads // set_read_profile_mode(afu_h, num_reads_per_iteration, kmer_length, index_space, read_space); // Start; // // bool success = wait_for_idle(afu_h); // if (!success) { // std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl; // } // clear_status(afu_h); // // //Print the indices // for (int m = 0; m < num_reads_per_iteration; m++) { // int32_t* index_base = index_space + 32 * 2 * m; // std::cout << "For " << m << "-th read" << std::endl; // for (int n = 0; n < 32; n++) { // int position = index_base[2*n]; // int length = index_base[2*n+1]; // //if ((position != -1) && (length != -1)) { // std::cout << "(" << position << "," << length << ")" << std::endl; // //} else { // // break; // //} // } // } // // ////2. Adjust solid islands // ////adjust_solid_islands(index_space,num_reads_per_iteration); // // ////3. Set correction types for each read and collect reads to be corrected in a particular space // //num_corrections = set_correction_types(correction_array, num_reads_per_iteration, candidate_space, correction_space); // // ////4. Correct errors // //set_read_correct_mode(afu_h, num_reads_per_iteration, threshold, kmer_length, level0, level1, level2, level3, candidate_space, correction_space); // //Start; // // ////5. Post process each correction, and then combine // ////post_process_corrections(correction_array, num_reads_per_iteration); // // // ////TBD 9: Write out results // // //for (int k = 0; k < num_corrections; k++) { // // delete[] correction_space[k]; // // for (int m = 0; m < 32; m++) { // // delete[] candidate_space[32*k+m]; // // } // //} // // //for (int k = 0; k < num_reads_per_iteration; k++) { // // delete[] correction_array[k].candidates; // // delete[] correction_array[k].start_position; // // delete[] correction_array[k].end_position; // //} // } // } // // if (num_reads_processed % num_reads_per_iteration != 0) { // int32_t num_corrections = 0; // uint64_t wed = 0; // // std::cout << "Processing last read batch for island information ... " << std::endl; // // //1. Profile the reads // set_read_profile_mode(afu_h, num_reads_processed % num_reads_per_iteration, kmer_length, index_space, read_space); // Start; // // bool success = wait_for_idle(afu_h); // if (!success) { // std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl; // } // clear_status(afu_h); // // for (int m = 0; m < num_reads_processed % num_reads_per_iteration; m++) { // int32_t* index_base = index_space + 32 * 2 * m; // std::cout << "For " << m << "-th read" << std::endl; // for (int n = 0; n < 32; n++) { // int position = index_base[2*n]; // int length = index_base[2*n+1]; // //if ((position != -1) && (length != -1)) { // std::cout << "(" << position << "," << length << ")" << std::endl; // //} else { // // break; // //} // } // } // } // open_device((uint64_t) 0) // clear_status(afu_h); std::ifstream test_file("./stimulus.txt"); if (!test_file.is_open()) { std::cout << "ERROR!! Cannot open stimulus file" << std::endl; } if (posix_memalign((void**)&read_space, 128, num_reads_per_iteration * 256 *2) != 0) { std::cout << "ERROR!!!" << std::endl; } if (posix_memalign((void**)&candidate_space, 128, num_reads_per_iteration * 256 * 32) != 0) { std::cout << "ERROR!!!" << std::endl; } num_reads_processed = 0; char quality_string_c[113]; //= quality_string.c_str(); memcpy(quality_string_c, quality_string.c_str(), read_length); for (int p = 40; p < 70; p++) { quality_string_c[p] = 0; } while (std::getline(test_file, read_string)) { uint32_t start_position, end_position; char read_string_c[113]; sscanf(read_string.c_str(), "%s %d %d", read_string_c, &start_position, &end_position); read_string_c[read_length] = '\0'; std::cout << "Read : " << read_string_c << " start: " << (char) start_position << " end: " << (char) end_position << std::endl; memcpy(read_space + 512 * (num_reads_processed % num_reads_per_iteration), read_string_c, read_length); memcpy(read_space + 512 * (num_reads_processed % num_reads_per_iteration) + 256, quality_string_c, read_length); char* read_item = read_space + 512 * (num_reads_processed % num_reads_per_iteration); read_item[255] = read_length; read_item[254] = start_position; read_item[253] = end_position; num_reads_processed++; if (num_reads_processed % num_reads_per_iteration == 0) { set_read_correct_mode(afu_h, num_reads_per_iteration, 1, kmer_length, 0, 20, 60, 80, candidate_space, read_space); Start; bool success = wait_for_idle(afu_h); if (!success) { std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl; return -1; } clear_status(afu_h); for (int m = 0; m < num_reads_per_iteration; m++) { char* candidate_local_space = candidate_space + m * 256 * 32; char* read = read_space + m * 512; read[read_length] = '\0'; int32_t num_candidates = (int32_t) candidate_local_space[255]; //The last byte of every read provides us with the number of candidates //std::cout << "Read " << read << " has " << num_candidates << " candidates" << std::endl; printf("Candidate for %s is at %lu\n", read, (uint64_t) candidate_local_space); printf("Read %s has %d candidates\n", read, num_candidates); for (int n = 0; n < num_candidates; n++) { char* candidate = candidate_local_space + n * 256; int32_t num_candidates_to_print = (int32_t) candidate[255]; candidate[read_length] = '\0'; //candidate[read_length] = '\0'; printf("Read:%s:%s:%d\n", read,candidate,num_candidates_to_print); } std::cout << "Completed printing candidates ... " << std::endl; } } } if (num_reads_processed % num_reads_per_iteration != 0) { std::cout << "Entering the final iteration" << std::endl; set_read_correct_mode(afu_h, num_reads_processed % num_reads_per_iteration, 1, kmer_length, 0, 20, 60, 80, candidate_space, read_space); Start; bool success = wait_for_idle(afu_h); if (!success) { std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl; return -1; } clear_status(afu_h); for (int m = 0; m < num_reads_processed % num_reads_per_iteration; m++) { char* candidate_local_space = candidate_space + m * 256 * 32; char* read = read_space + m * 512; read[read_length] = '\0'; int32_t num_candidates = (int32_t) candidate_local_space[255]; //The last byte of every read provides us with the number of candidates printf("Candidate for %s is at %lu\n", read, (uint64_t) candidate_local_space); printf("Read %s has %d candidates\n", read, num_candidates); for (int n = 0; n < num_candidates; n++) { char* candidate = candidate_local_space + n * 256; int32_t num_candidates_to_print = (int32_t) candidate[255]; candidate[read_length] = '\0'; //std::cout << "Read " << read << ":" << candidate << ":" << num_candidates << std::endl; printf("Read:%s:%s:%d\n",read,candidate,num_candidates_to_print); } std::cout << "Completed printing candidates ... " << std::endl; } } close_device std::cout << "Closing program ... " << std::endl; return 0; }