void RecalibrationHandler::ReadRecalibrationFromComments(const SamHeader &samHeader, int max_flows_protect) { // Read comment lines from Sam header // this will grab json files if (samHeader.HasComments()) { // parse the comment lines for (unsigned int i_co=0; i_co<samHeader.Comments.size(); i_co++) { // try printing for now //cout << samHeader.Comments[i_co] << endl; // might be all sorts of comments in the file // therefore must find the unlikely magic code in the line before trying to parse string magic_code = "6d5b9d29ede5f176a4711d415d769108"; // md5hash "This uniquely identifies json comments for recalibration." bool valid_line = false; std::size_t found = samHeader.Comments[i_co].find(magic_code); if (found !=std::string::npos) valid_line = true; if (valid_line) { // very likely to be a properly formatted json object coming from basecaller Json::Value recal_params(Json::objectValue); Json::Reader recal_reader; bool parse_success = recal_reader.parse(samHeader.Comments[i_co], recal_params); if (!parse_success) { cout << "failed to parse comment line" << recal_reader.getFormattedErrorMessages() << endl; } else { // you are a recalibration object waiting to happen // let us parse you // basic ID //cout << my_members[0] << endl; string my_block_key = recal_params["MasterKey"].asCString(); //cout << my_block_key << "\t" << recal_params[my_block_key]["modelParameters"].size() << endl; recalModel.InitializeFromJSON(recal_params, my_block_key, false,max_flows_protect); // don't spam here // add a map to this entry bam_header_recalibration.insert(pair<string,RecalibrationModel>(my_block_key, recalModel)); // parse out important information from the block key // must look like <runid>.block_X%d_Y%d int end_runid = my_block_key.find("."); int bloc_loc = my_block_key.find("block_X")+7; int y_loc = my_block_key.find("_Y"); // glorified assembly language string runid = my_block_key.substr(0,end_runid); int x_coord = atoi(my_block_key.substr(bloc_loc,y_loc-bloc_loc).c_str()); int y_coord = atoi(my_block_key.substr(y_loc+2, my_block_key.size()-y_loc+2).c_str()); //cout << runid << "\t" << x_coord << "\t" << y_coord << endl; block_hash.insert(pair<string, pair<int,int > >(runid,pair<int,int>(x_coord,y_coord))); is_live = true; // found at least one recalibration entry } } } } // okay, now, avoid spamming with possibly large number of lines if (is_live){ cout << "Recalibration was detected from comment lines in bam file(s)" << endl; cout << bam_header_recalibration.size() << " unique blocks of recalibration info detected." << endl; } }
void RecalibrationHandler::ReadRecalibrationFromComments(const SamHeader &samHeader, const map<string, int> &max_flows_by_run_id) { if (not samHeader.HasComments()) return; unsigned int num_parsing_errors = 0; // Read comment lines from Sam header for (unsigned int i_co=0; i_co<samHeader.Comments.size(); i_co++) { // There might be all sorts of comments in the file // therefore must find the unlikely magic code in the line before trying to parse string magic_code = "6d5b9d29ede5f176a4711d415d769108"; // md5hash "This uniquely identifies json comments for recalibration." if (samHeader.Comments[i_co].find(magic_code) == std::string::npos) { //cout << endl << "No magic code found in comment line "<< i_co <<endl; //cout << samHeader.Comments.at(i_co) << endl; continue; } // Parse recalibration Json object Json::Value recal_params(Json::objectValue); Json::Reader recal_reader; if (not recal_reader.parse(samHeader.Comments[i_co], recal_params)) { cerr << "Failed to parse recalibration comment line " << recal_reader.getFormattedErrorMessages() << endl; num_parsing_errors++; continue; } string my_block_key = recal_params["MasterKey"].asString(); // Assumes that the MasterKey is written in the format <run_id>.block_X<x_offset>_Y<y_offset> int end_runid = my_block_key.find("."); int x_loc = my_block_key.find("block_X")+7; int y_loc = my_block_key.find("_Y"); // glorified assembly language string runid = my_block_key.substr(0,end_runid); int x_coord = atoi(my_block_key.substr(x_loc,y_loc-x_loc).c_str()); int y_coord = atoi(my_block_key.substr(y_loc+2, my_block_key.size()-y_loc+2).c_str()); // Protection against not having a flow order for a specified recalibration run id std::map<string, int>::const_iterator n_flows = max_flows_by_run_id.find(runid); if (n_flows == max_flows_by_run_id.end()) { cerr << "TVC ERROR: Recalibration information found for run id " << runid << " but there is no matching read group with this run id in the bam header." << endl; exit(EXIT_FAILURE); } //recalModel.InitializeFromJSON(recal_params, my_block_key, false, max_flows_by_run_id.at(runid)); // void RecalibrationModel::InitializeFromJSON(Json::Value &recal_params, string &my_block_key, bool spam_enabled, int over_flow_protect) { // The calibration comment line contains info about the hp threshold used during base calling, so set to zero here // XXX FIXME: The number of flows in the TVC group can be larger than the one specified in the calibration block. recalModel.InitializeModelFromJson(recal_params, n_flows->second); bam_header_recalibration.insert(pair<string,LinearCalibrationModel>(my_block_key, recalModel)); block_hash.insert(pair<string, pair<int,int > >(runid,pair<int,int>(x_coord,y_coord))); is_live = true; // found at least one recalibration entry } // Verbose output if (is_live){ cout << "Recalibration was detected from comment lines in bam file(s):" << endl; cout << bam_header_recalibration.size() << " unique blocks of recalibration info detected." << endl; } if (num_parsing_errors > 0) { cout << "Failed to parse " << num_parsing_errors << " recalibration comment lines." << endl; } }