/** * A function that dump reads' correction into a given file */ void dumpReads(string orgFNm, string crrFNm) { ifstream fastq(orgFNm.c_str()); if (fastq.is_open()) { ofstream outFile(crrFNm.c_str(), ofstream::out); if (outFile.is_open()) { string line; int lnId = 1; int rIdx = -1; while (getline(fastq, line)) { if (lnId == 2) {//sequence rIdx += 1; line = readsPtr[rIdx].seq; } if (lnId == seqLn) { lnId = 0; } lnId++; outFile << line << endl; } fastq.close(); outFile.close(); cout << "Reads are corrected and saved in " << crrFNm << " file!" << endl; } else { cout << "Can't create the corrections file " << crrFNm << endl; } } else { cout << "Can't re-open the data file " << orgFNm << endl; } }
void SimBax::operator()() { uniform_int_distribution<int> readStart(0, t.size()); ofstream fastq(fastqFilename); resetStrings(); const int numReads = t.size() * depth / readLen; int nextOut = numReads/10; int pct = 0; chrono::time_point<chrono::system_clock> start; start = chrono::system_clock::now(); cout << "Inilisation finished, simulating reads.\n"; for (int readNum = 0 ; readNum < numReads ; readNum++ ) { long addedSoFar = baseCall.size(); string fastqPhreds; makeRead(readLen, readStart(gen), fastqPhreds); const int lengthRead = baseCall.size() - addedSoFar; reads.push_back(lengthRead); writeFastq(baseCall.substr(addedSoFar, lengthRead) ,fastqPhreds ,fastq ,readNum); if (readNum == nextOut) { chrono::time_point<chrono::system_clock> current; current = chrono::system_clock::now(); chrono::duration<double> elapsed = start - current; nextOut += numReads/10; pct += 10; cout << pct << "% done. Time ellapsed :" << elapsed.count() << " seconds\n"; } } cout << "Wrinting data to file.\n"; BaxH5 baxh5(baxFilename); baxh5.writeReads(baseCall ,deletionQV ,deletionTag ,insertionQV ,mergeQV ,preBaseFrame ,pulseIndex ,substitutionQV ,subsititutionTag ,qualityValue ,widthInFrame ,reads); }
/** * A function that loads reads from file into memory * @param String orgFNm The original reads file name * @return int 0 for success & 1 for failure */ int getReads(string orgFNm) { ifstream fastq(orgFNm.c_str()); if (fastq.is_open()) { int lnId = 1; string line; while (getline(fastq, line)) { if (lnId == 2) { //len if (readsCnt == 0) { readsPtr = (struct Read *) malloc((readsCnt + 1) * sizeof (struct Read)); } else { readsPtr = (struct Read *) realloc(readsPtr, (readsCnt + 1) * sizeof (struct Read)); } if (readsPtr == 0) { printf("ERROR in allocating/reallocating readsPtr: Out of memory\n"); return 1; } readsPtr[readsCnt].len = line.length(); readsPtr[readsCnt].seq = (char *) malloc((readsPtr[readsCnt].len + 1) * sizeof (char)); if (readsPtr[readsCnt].seq == 0) { printf("ERROR in allocating a seq: Out of memory\n"); return 1; } strcpy(readsPtr[readsCnt].seq, line.c_str()); }else if(lnId == 4){ readsPtr[readsCnt].qv = (char *) malloc((readsPtr[readsCnt].len + 1) * sizeof (char)); if (readsPtr[readsCnt].qv == 0) { printf("ERROR in allocating a qv: Out of memory\n"); return 1; } strcpy(readsPtr[readsCnt].qv, line.c_str()); } if (lnId == seqLn) { readsCnt += 1; lnId = 0; } lnId++; } fastq.close(); } else { cout << "Can't open the data file " << orgFNm << endl; } return 0; }
int main(int argc, char** argv) { if(argc != 6) { std::cerr << "usage: ./fastq_trimer trim_5p_len trim_3p_len is_rc[0|1] input_file output_file" << std::endl; return 0; } int trim5_len = std::stoi(argv[1]); //3 len barcode int trim3_len = std::stoi(argv[2]); //30 len adapter int is_rc = std::stoi(argv[3]); // is reverse complement std::string infn (argv[4]); //input file name std::string outfn (argv[5]); //output file name std::cout << "trim5_len: " << trim5_len << " trim3_len: " << trim3_len << " is_rc: " << is_rc << std::endl; std::ifstream in(infn); std::ofstream out(outfn); std::string fastq(""); bool is_N(false); std::string line; int fastq_count = 0; while(!in.eof()) { //fastq 1st line std::getline(in, line); if(line == "") break; if(fastq_count == 0) fastq += line; else fastq += "\n"+line; //fastq 2st line std::getline(in, line); line = line.substr(trim5_len, line.size() - trim5_len - trim3_len); if(is_rc) { is_N = cop(line); std::reverse(line.begin(), line.end()); } fastq += "\n"+line; //fastq 3st line std::getline(in, line); fastq += "\n"+line; //fastq 4st line std::getline(in, line); line = line.substr(trim5_len, line.size() - trim5_len - trim3_len); if(is_rc) { std::reverse(line.begin(), line.end()); } fastq += "\n"+line; if(!is_N) { out << fastq; is_N = false; fastq = ""; } ++fastq_count; } std::cout << "fastq count: " << fastq_count << std::endl; in.close(); out.close(); return 0; }