void RecalibrationHandler::ReadRecalibrationFromComments(const SamHeader &samHeader, int max_flows_protect) {
    // Read comment lines from Sam header
    // this will grab json files
    if (samHeader.HasComments()) {
        // parse the comment lines
        for (unsigned int i_co=0; i_co<samHeader.Comments.size(); i_co++) {
            // try printing for now
            //cout << samHeader.Comments[i_co] << endl;
            // might be all sorts of comments in the file
            // therefore must find the unlikely magic code in the line before trying to parse
            string magic_code = "6d5b9d29ede5f176a4711d415d769108"; // md5hash "This uniquely identifies json comments for recalibration."
            bool valid_line = false;
            std::size_t found = samHeader.Comments[i_co].find(magic_code);
            if (found !=std::string::npos)
                valid_line = true;

            if (valid_line) {
                // very likely to be a properly formatted json object coming from basecaller
                Json::Value recal_params(Json::objectValue);
                Json::Reader recal_reader;
                bool parse_success = recal_reader.parse(samHeader.Comments[i_co], recal_params);
                if (!parse_success) {
                    cout << "failed to parse comment line" << recal_reader.getFormattedErrorMessages() << endl;
                } else {
                    // you are a recalibration object waiting to happen
                    // let us parse you
                    // basic ID

                    //cout << my_members[0] << endl;
                    string my_block_key = recal_params["MasterKey"].asCString();
                    //cout << my_block_key << "\t" << recal_params[my_block_key]["modelParameters"].size() << endl;
                    recalModel.InitializeFromJSON(recal_params, my_block_key, false,max_flows_protect);  // don't spam here
                    // add a map to this entry
                    bam_header_recalibration.insert(pair<string,RecalibrationModel>(my_block_key, recalModel));
                    // parse out important information from the block key
                    // must look like <runid>.block_X%d_Y%d
                    int end_runid = my_block_key.find(".");
                    int bloc_loc = my_block_key.find("block_X")+7;
                    int y_loc = my_block_key.find("_Y");
                    // glorified assembly language
                    string runid = my_block_key.substr(0,end_runid);
                    int x_coord = atoi(my_block_key.substr(bloc_loc,y_loc-bloc_loc).c_str());
                    int y_coord = atoi(my_block_key.substr(y_loc+2, my_block_key.size()-y_loc+2).c_str());
                    //cout << runid << "\t" << x_coord << "\t" << y_coord << endl;
                    block_hash.insert(pair<string, pair<int,int > >(runid,pair<int,int>(x_coord,y_coord)));
                    is_live = true; // found at least one recalibration entry
                }
            }
        }
    }

    // okay, now, avoid spamming with possibly large number of lines
    if (is_live){
      cout << "Recalibration was detected from comment lines in bam file(s)" << endl;
      cout << bam_header_recalibration.size() << " unique blocks of recalibration info detected." << endl;
    }
}
Exemplo n.º 2
0
void RecalibrationHandler::ReadRecalibrationFromComments(const SamHeader &samHeader, const map<string, int> &max_flows_by_run_id) {


  if (not samHeader.HasComments())
    return;

  unsigned int num_parsing_errors = 0;
  // Read comment lines from Sam header
  for (unsigned int i_co=0; i_co<samHeader.Comments.size(); i_co++) {

    // There might be all sorts of comments in the file
    // therefore must find the unlikely magic code in the line before trying to parse
    string magic_code = "6d5b9d29ede5f176a4711d415d769108"; // md5hash "This uniquely identifies json comments for recalibration."

    if (samHeader.Comments[i_co].find(magic_code) == std::string::npos) {
      //cout << endl << "No magic code found in comment line "<< i_co <<endl;
      //cout << samHeader.Comments.at(i_co) << endl;
      continue;
    }

    // Parse recalibration Json object
    Json::Value recal_params(Json::objectValue);
    Json::Reader recal_reader;
    if (not recal_reader.parse(samHeader.Comments[i_co], recal_params)) {
      cerr << "Failed to parse recalibration comment line " << recal_reader.getFormattedErrorMessages() << endl;
      num_parsing_errors++;
      continue;
    }

    string my_block_key = recal_params["MasterKey"].asString();

    // Assumes that the MasterKey is written in the format <run_id>.block_X<x_offset>_Y<y_offset>
    int end_runid = my_block_key.find(".");
    int x_loc     = my_block_key.find("block_X")+7;
    int y_loc     = my_block_key.find("_Y");

    // glorified assembly language
    string runid = my_block_key.substr(0,end_runid);
    int x_coord = atoi(my_block_key.substr(x_loc,y_loc-x_loc).c_str());
    int y_coord = atoi(my_block_key.substr(y_loc+2, my_block_key.size()-y_loc+2).c_str());

    // Protection against not having a flow order for a specified recalibration run id
    std::map<string, int>::const_iterator n_flows = max_flows_by_run_id.find(runid);
    if (n_flows == max_flows_by_run_id.end()) {
      cerr << "TVC ERROR: Recalibration information found for run id " << runid
    	   << " but there is no matching read group with this run id in the bam header." << endl;
      exit(EXIT_FAILURE);
    }

    //recalModel.InitializeFromJSON(recal_params, my_block_key, false, max_flows_by_run_id.at(runid));
    // void RecalibrationModel::InitializeFromJSON(Json::Value &recal_params, string &my_block_key, bool spam_enabled, int over_flow_protect) {
    // The calibration comment line contains  info about the hp threshold used during base calling, so set to zero here
    // XXX FIXME: The number of flows in the TVC group can be larger than the one specified in the calibration block.
    recalModel.InitializeModelFromJson(recal_params, n_flows->second);
    bam_header_recalibration.insert(pair<string,LinearCalibrationModel>(my_block_key, recalModel));
    block_hash.insert(pair<string, pair<int,int > >(runid,pair<int,int>(x_coord,y_coord)));
    is_live = true; // found at least one recalibration entry
  }

  // Verbose output
  if (is_live){
    cout << "Recalibration was detected from comment lines in bam file(s):" << endl;
    cout << bam_header_recalibration.size() << " unique blocks of recalibration info detected." << endl;
  }
  if (num_parsing_errors > 0) {
    cout << "Failed to parse " << num_parsing_errors << " recalibration comment lines." << endl;
  }
}