Exemplo n.º 1
0
int main (int argc, char *argv[])
{
   // Program description
   std::cerr << "map_back: context of significant kmers\n";

   // Do parsing and checking of command line params
   // If no input options, give quick usage rather than full help
   boost::program_options::variables_map vm;
   if (argc == 1)
   {
      std::cerr << "Usage: map_back -k seer_output.txt -r references.txt > mappings.txt\n\n"
         << "For full option details run map_back -h\n";
      return 0;
   }
   else if (parseCommandLine(argc, argv, vm))
   {
      return 1;
   }

   // Set number of threads
   size_t num_threads = vm["threads"].as<size_t>();
   if (num_threads < 1)
   {
      num_threads = 1;
   }

   // Read all sequences into memory as Fasta objects
   std::cerr << "Reading reference sequences into memory...\n";
   std::vector<Fasta> sequence_cache = readSequences(vm["references"].as<std::string>());

   // Loop through significant kmers
   std::cerr << "Now mapping significant kmers...\n";
   std::ifstream kmer_file(vm["kmers"].as<std::string>().c_str());
   if (!kmer_file)
   {
      throw std::runtime_error("Could not open kmer_file " + vm["kmers"].as<std::string>() + "\n");
   }
   else
   {
      // Set up list of threads, and mutex lock on std out
      std::mutex out_mtx;
      std::list<std::future<void>> threads;

      // Read the header
      std::string header;
      std::getline(kmer_file, header);
      int num_covar_fields = parseHeader(header);

      Significant_kmer sig_kmer(num_covar_fields);
      while(kmer_file)
      {
         kmer_file >> sig_kmer;

         // Check the read into sig_kmer hasn't reached end of file
         if (!kmer_file.eof())
         {
            assert(num_threads >= 1);

            std::cout << sig_kmer.sequence();

            // sig_kmer samples and sample cache are sorted in the same order, so
            // can go through linearly
            std::vector<std::string> search_names = sig_kmer.samples_found();
            std::vector<std::string>::iterator search_names_it = search_names.begin();

            for (std::vector<Fasta>::iterator all_names_it = sequence_cache.begin(); all_names_it != sequence_cache.end(); ++all_names_it)
            {
               // For each sample we know the kmer is in, print all matches to
               // the kmer
               if (all_names_it->get_name() == *search_names_it)
               {
                  // Thread each search (i.e. per sample per kmer)
                  if (num_threads > 1)
                  {
                     // Check if four searches are running. If so, wait for the
                     // next one to finish. Wait (for a max of 100ms) so this
                     // thread doesn't consume processing
                     waitForThreads(threads, num_threads - 1);

                     // Start a new thread asynchronously, at the back of the
                     // queue
                     threads.push_back(std::async(std::launch::async,
                              &Fasta::printMappings, *all_names_it, std::ref(std::cout), sig_kmer.sequence(), std::ref(out_mtx)));
                  }
                  else
                  {
                     all_names_it->printMappings(std::cout, sig_kmer.sequence(), out_mtx);
                  }

                  ++search_names_it;
                  if (search_names_it == search_names.end())
                  {
                     break;
                  }
               }
            }

            // Tab between every sample, line break after every kmer
            if (num_threads > 1)
            {
               waitForThreads(threads, 0);
            }

            std::cout << std::endl;
         }
      }

      std::cerr << "Done.\n";
   }

}
Exemplo n.º 2
0
int main(int argc, char** argv) {

    std::string read_file = "./test_reads.txt";
    //std::string kmer_file_name = "./test_kmers.txt";
    std::string kmer_file_name = "./solid_kmers.txt";
    std::ifstream input_file(read_file.c_str());
    std::ifstream kmer_file(kmer_file_name.c_str());
    std::string read_string;
    std::string line_id;
    std::string line_misc;
    std::string quality_string("IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII00000000000000000000000000000000IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII");
    std::string kmer_string;
    int32_t read_length=112;
    int32_t kmer_length=30;
    int32_t threshold;
    uint8_t level0;
    uint8_t level1;
    uint8_t level2;
    uint8_t level3;
    int num_reads_processed = 0;
    int num_reads_per_iteration = 512;
    int num_kmers_per_iteration = 512 * 4;

    char* kmer_space;
    uint32_t** correction_space;
    char* candidate_space;
    char* read_space;
    int32_t* index_space;
    struct correction_item* correction_array;


//First program solid k-mers into the bloom-filter
    if (posix_memalign((void**)&kmer_space, 128, num_kmers_per_iteration * 64) != 0) {
        std::cout << "ERROR!!!" << std::endl;
    }

    open_device((uint64_t) 0)
    uint32_t val;
    cxl_mmio_read32(afu_h,CONTROL,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,THRESHOLD,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,READ_BASE,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,WRITE_BASE,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,READS_RECEIVED,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,READS_WRITTEN,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,NUM_ITEMS,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,START,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,RESET,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,STATUS,&val);
    std::cout << val << std::endl;
    cxl_mmio_read32(afu_h,DDR3_BASE,&val);
    std::cout << val << std::endl;
    wait_for_idle(afu_h);
    clear_status(afu_h);
    if (!kmer_file.is_open()) {
        std::cout << "Cannot open k-mer file!!!" << std::endl;
        return -1;
    }
    int32_t num_kmers = 0;
    while (std::getline(kmer_file, kmer_string)) {
        memcpy(kmer_space + (num_kmers % num_kmers_per_iteration) * 64, kmer_string.c_str(), kmer_length);
        num_kmers++;
        if (num_kmers % num_kmers_per_iteration == 0) {
            std::cout << "Completed collecting k-mers" << std::endl;
            set_kmer_program_mode(afu_h, num_kmers_per_iteration, kmer_length, kmer_space);
            Start;
#ifdef DEBUG
            FILE* debug = fopen("./debug", "w");
            for (int x = 0; x < num_kmers_per_iteration; x++) {
                char* kmerToPrint = kmer_space + x * 64;
                for (int y = 63; y > 0; y--) {
                    fprintf(debug,"%02x", kmerToPrint[y]);
                }
                fprintf(debug, "\n");
            }
#endif
            //Wait for IDLE
            bool success = wait_for_idle(afu_h);
            if (!success) {
                std::cout << "Cannot complete AFU transactions. Exiting!!!" << std::endl;
                return -1;
            }
            clear_status(afu_h);
            std::cout << "Completed iteration" << std::endl;
        }
    }

    if (num_kmers % num_kmers_per_iteration != 0) {
        std::cout << "The last set of k-mers going to be tested ... " << std::endl;
        int32_t num_remaining = num_kmers % num_kmers_per_iteration;
        set_kmer_program_mode(afu_h, num_remaining, kmer_length, kmer_space);
        Start;
        bool success = wait_for_idle(afu_h);
        if (!success) {
            std::cout << "Cannot complete AFU transactions. Exiting!!!" << std::endl;
            return -1;
        }
        clear_status(afu_h);
        std::cout << "Completed last iteration" << std::endl;
    }

////First convert each read to a correction item
//    if (!(input_file.is_open())) {
//        std::cout << "Cannot open input file" << std::endl;
//        return 1;
//    }
//
//    //256 bytes per read = 2 cache lines = 2048 bits
//    if (posix_memalign((void**)&read_space, 128, num_reads_per_iteration * 256) != 0) {
//        std::cout << "ERROR!!! Cannot allocate aligned space for read_space" << std::endl;
//    }
//
//    //256 bytes per packed index space = 2 cache lines = 2048 bits = 32 * 2 * (32 indices)
//    if (posix_memalign((void**)&index_space, 128, num_reads_per_iteration * 256) != 0) {
//        std::cout << "ERROR!!! Cannot allocate aligned space for read_space" << std::endl;
//    }
//    
//    while (std::getline(input_file, read_string)) {
//        if (!std::getline(input_file, read_string)) {
//            std::cout << "Error in file format" << std::endl;
//            return 1;
//        }
//        if (!std::getline(input_file, quality_string)) {
//            std::cout << "Error in file format" << std::endl;
//            return 1;
//        }
//        if (!std::getline(input_file, quality_string)) {
//            std::cout << "Error in file format" << std::endl;
//            return 1;
//        }
//
//        char* read_item = read_space + 256 * (num_reads_processed % num_reads_per_iteration);
//        memcpy(read_space + 256 * (num_reads_processed % num_reads_per_iteration), read_string.c_str(), read_length);
//        read_item[255] = read_length;
//        num_reads_processed++;
//
//        //Flatten out stuff - in case it is all to go back to C
//        //correction_array[num_reads_processed].read_string    = (uint32_t*) read_space[num_reads_processed*2];
//        //correction_array[num_reads_processed].quality_string = (uint32_t*) read_space[num_reads_processed*2+1];
//        //correction_array[num_reads_processed].read_length    = read_string.length();
//        //correction_array[num_reads_processed].island_indices = (int32_t*) index_space[num_reads_processed];
//        if (num_reads_processed % num_reads_per_iteration == 0) {
//            int32_t num_corrections = 0;
//            uint64_t wed = 0;
//
//            std::cout << "Reads processed, proceeding to procure island information ... " << std::endl;
//           
//            //1. Profile the reads
//            set_read_profile_mode(afu_h, num_reads_per_iteration, kmer_length, index_space, read_space);
//            Start;
//
//            bool success = wait_for_idle(afu_h);
//            if (!success) {
//                std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl;
//            }
//            clear_status(afu_h);
//
//            //Print the indices
//            for (int m = 0; m < num_reads_per_iteration; m++) { 
//                int32_t* index_base = index_space + 32 * 2 * m;
//                std::cout << "For " << m << "-th read" << std::endl;
//                for (int n = 0; n < 32; n++) {
//                    int position = index_base[2*n];
//                    int length = index_base[2*n+1];
//                    //if ((position != -1) && (length != -1)) {
//                        std::cout << "(" << position << "," << length << ")" << std::endl;
//                    //} else {
//                      //  break;
//                    //}
//                }
//            }
//            
//            ////2. Adjust solid islands
//            ////adjust_solid_islands(index_space,num_reads_per_iteration);
//
//            ////3. Set correction types for each read and collect reads to be corrected in a particular space
//            //num_corrections = set_correction_types(correction_array, num_reads_per_iteration, candidate_space, correction_space);
//
//            ////4. Correct errors
//            //set_read_correct_mode(afu_h, num_reads_per_iteration, threshold, kmer_length, level0, level1, level2, level3, candidate_space, correction_space);
//            //Start;
//
//            ////5. Post process each correction, and then combine
//            ////post_process_corrections(correction_array, num_reads_per_iteration);
//            //
//            ////TBD 9: Write out results
//
//            //for (int k = 0; k < num_corrections; k++) {
//            //    delete[] correction_space[k];
//            //    for (int m = 0; m < 32; m++) {
//            //        delete[] candidate_space[32*k+m];
//            //    }
//            //}
//
//            //for (int k = 0; k < num_reads_per_iteration; k++) {
//            //    delete[] correction_array[k].candidates;
//            //    delete[] correction_array[k].start_position;
//            //    delete[] correction_array[k].end_position;
//            //}
//        }
//    }
//
//    if (num_reads_processed % num_reads_per_iteration != 0) {
//        int32_t num_corrections = 0;
//        uint64_t wed = 0;
//
//        std::cout << "Processing last read batch for island information ... " << std::endl;
//       
//        //1. Profile the reads
//        set_read_profile_mode(afu_h, num_reads_processed % num_reads_per_iteration, kmer_length, index_space, read_space);
//        Start;
//
//        bool success = wait_for_idle(afu_h);
//        if (!success) {
//            std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl;
//        }
//        clear_status(afu_h);
//
//        for (int m = 0; m < num_reads_processed % num_reads_per_iteration; m++) { 
//            int32_t* index_base = index_space + 32 * 2 * m;
//            std::cout << "For " << m << "-th read" << std::endl;
//            for (int n = 0; n < 32; n++) {
//                int position = index_base[2*n];
//                int length = index_base[2*n+1];
//                //if ((position != -1) && (length != -1)) {
//                    std::cout << "(" << position << "," << length << ")" << std::endl;
//                //} else {
//                 //   break;
//                //}
//            }
//        }
//    }

    
//    open_device((uint64_t) 0)
//    clear_status(afu_h);
 
    std::ifstream test_file("./stimulus.txt");
    if (!test_file.is_open()) {
        std::cout << "ERROR!! Cannot open stimulus file" << std::endl;
    }

    if (posix_memalign((void**)&read_space, 128, num_reads_per_iteration * 256 *2) != 0) {
        std::cout << "ERROR!!!" << std::endl;
    }

    if (posix_memalign((void**)&candidate_space, 128, num_reads_per_iteration * 256 * 32) != 0) {
        std::cout << "ERROR!!!" << std::endl;
    }

    num_reads_processed = 0;
    char quality_string_c[113]; //= quality_string.c_str();
    memcpy(quality_string_c, quality_string.c_str(), read_length);
    for (int p = 40; p < 70; p++) {
        quality_string_c[p] = 0;
    }
    while (std::getline(test_file, read_string)) {
        uint32_t start_position, end_position;
        char read_string_c[113];
        sscanf(read_string.c_str(), "%s %d %d", read_string_c, &start_position, &end_position); read_string_c[read_length] = '\0';
        std::cout << "Read : " << read_string_c << " start: " << (char) start_position << " end: " << (char) end_position << std::endl;

        memcpy(read_space + 512 * (num_reads_processed % num_reads_per_iteration), read_string_c, read_length);
        memcpy(read_space + 512 * (num_reads_processed % num_reads_per_iteration) + 256, quality_string_c, read_length);

        char* read_item = read_space + 512 * (num_reads_processed % num_reads_per_iteration);
 
        read_item[255] = read_length;
        read_item[254] = start_position;
        read_item[253] = end_position;
       
        num_reads_processed++;

        if (num_reads_processed % num_reads_per_iteration == 0) {
            set_read_correct_mode(afu_h, num_reads_per_iteration, 1, kmer_length, 0, 20, 60, 80, candidate_space, read_space);
            Start;
            bool success = wait_for_idle(afu_h);
            if (!success) {
                std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl;
                return -1;
            }
            clear_status(afu_h);

            for (int m = 0; m < num_reads_per_iteration; m++) {
                char* candidate_local_space = candidate_space + m * 256 * 32;
                char* read = read_space + m * 512; read[read_length] = '\0';
                int32_t num_candidates = (int32_t) candidate_local_space[255]; //The last byte of every read provides us with the number of candidates
                //std::cout << "Read " << read << " has " << num_candidates << " candidates" << std::endl;
                printf("Candidate for %s is at %lu\n", read, (uint64_t) candidate_local_space);
                printf("Read %s has %d candidates\n", read, num_candidates);
                for (int n = 0; n < num_candidates; n++) {
                    char* candidate = candidate_local_space + n * 256;
                    int32_t num_candidates_to_print = (int32_t) candidate[255];
                    candidate[read_length] = '\0';
                    //candidate[read_length] = '\0';
                    printf("Read:%s:%s:%d\n", read,candidate,num_candidates_to_print);
                }
                std::cout << "Completed printing candidates ... " << std::endl;
            }
        }
    }

    if (num_reads_processed % num_reads_per_iteration != 0) {
        std::cout << "Entering the final iteration" << std::endl;
        set_read_correct_mode(afu_h, num_reads_processed % num_reads_per_iteration, 1, kmer_length, 0, 20, 60, 80, candidate_space, read_space);
        Start;
        bool success = wait_for_idle(afu_h);
        if (!success) {
            std::cout << "ERROR! Read profile doesn't complete!!!" << std::endl;
            return -1;
        }
        clear_status(afu_h);

        for (int m = 0; m < num_reads_processed % num_reads_per_iteration; m++) {
            char* candidate_local_space = candidate_space + m * 256 * 32;
            char* read = read_space + m * 512; read[read_length] = '\0';
            int32_t num_candidates = (int32_t) candidate_local_space[255]; //The last byte of every read provides us with the number of candidates
            printf("Candidate for %s is at %lu\n", read, (uint64_t) candidate_local_space);
            printf("Read %s has %d candidates\n", read, num_candidates);
            for (int n = 0; n < num_candidates; n++) {
                char* candidate = candidate_local_space + n * 256; 
                int32_t num_candidates_to_print = (int32_t) candidate[255];
                candidate[read_length] = '\0';
                //std::cout << "Read " << read << ":" << candidate << ":" << num_candidates << std::endl;
                printf("Read:%s:%s:%d\n",read,candidate,num_candidates_to_print);
            }
            std::cout << "Completed printing candidates ... " << std::endl;
        }
    }

    close_device

    std::cout << "Closing program ... " << std::endl;
    return 0;
}