bool ExtendedReadInfo::UnpackThisRead(InputStructures &global_context, const string &local_contig_sequence, int DEBUG) { //is_happy_read = CheckHappyRead(global_context, variant_start_pos, int DEBUG); is_happy_read = true; // start working to unpack the read data we need start_flow = 0; start_pos = alignment.Position; is_forward_strand = !alignment.IsReverseStrand(); GetUsefulTags(DEBUG); if (is_happy_read) { CreateFlowIndex(global_context.flowOrder); UnpackAlignmentInfo(local_contig_sequence, alignment.Position); } return(is_happy_read); // happy to have unpacked this read }
void UnpackOnLoad(Alignment *rai, const InputStructures &global_context) { // No need to waste time if the read is filtered if (rai->filtered) return; rai->is_reverse_strand = rai->alignment.IsReverseStrand(); // Parse read name, run id & flow order index rai->runid.clear(); if (not rai->alignment.Name.empty()) { rai->well_rowcol.resize(2); ion_readname_to_rowcol(rai->alignment.Name.c_str(), &rai->well_rowcol[0], &rai->well_rowcol[1]); // extract runid while we are at it rai->runid = rai->alignment.Name.substr(0,rai->alignment.Name.find(":")); } if (rai->runid.empty()){ cerr << "WARNING: Unable to determine run id of read " << rai->alignment.Name << endl; rai->filtered = true; return; } std::map<string,int>::const_iterator fo_it = global_context.flow_order_index_by_run_id.find(rai->runid); if (fo_it == global_context.flow_order_index_by_run_id.end()){ cerr << "WARNING: No matching flow oder found for read " << rai->alignment.Name << endl; rai->filtered = true; return; } rai->flow_order_index = fo_it->second; const ion::FlowOrder & flow_order = global_context.flow_order_vector.at(rai->flow_order_index); // Retrieve measurements from ZM tag vector<int16_t> quantized_measurements; if (not rai->alignment.GetTag("ZM", quantized_measurements)) { cerr << "ERROR: Normalized measurements ZM:tag is not present in read " << rai->alignment.Name << endl; exit(1); } if ((int)quantized_measurements.size() > global_context.num_flows_by_run_id.at(rai->runid)) { cerr << "ERROR: Normalized measurements ZM:tag length " << quantized_measurements.size() << " exceeds flow order length " << global_context.num_flows_by_run_id.at(rai->runid) <<" in read " << rai->alignment.Name << endl; exit(1); } rai->measurements.assign(global_context.num_flows_by_run_id.at(rai->runid), 0.0); for (size_t counter = 0; counter < quantized_measurements.size(); ++counter) rai->measurements[counter] = (float)quantized_measurements[counter]/256; rai->measurements_length = quantized_measurements.size(); // Retrieve phasing parameters from ZP tag if (not rai->alignment.GetTag("ZP", rai->phase_params)) { cerr << "ERROR: Phasing Parameters ZP:tag is not present in read " << rai->alignment.Name << endl; exit(1); } if (rai->phase_params.size() != 3) { cerr << "ERROR: Phasing Parameters ZP:tag does not have 3 phase parameters in read " << rai->alignment.Name << endl; exit(1); } if (rai->phase_params[0] < 0 or rai->phase_params[0] > 1 or rai->phase_params[1] < 0 or rai->phase_params[1] > 1 or rai->phase_params[2] < 0 or rai->phase_params[2] > 1) { cerr << "ERROR: Phasing Parameters ZP:tag outside of [0,1] range in read " << rai->alignment.Name << endl; exit(1); } rai->phase_params[2] = 0.0f; // ad-hoc corrector: zero droop // Populate read_bases (bases without rev-comp on reverse-mapped reads) and flow_index rai->read_bases = rai->alignment.QueryBases; if (rai->is_reverse_strand) RevComplementInPlace(rai->read_bases); if (rai->read_bases.empty()){ cerr << "WARNING: Ignoring length zero read " << rai->alignment.Name << endl; rai->filtered = true; return; } // Unpack alignment rai->pretty_aln.reserve(global_context.num_flows_by_run_id.at(rai->runid)); UnpackAlignmentInfo(rai); if (rai->is_reverse_strand) rai->start_sc = rai->right_sc; else rai->start_sc = rai->left_sc; // Generate flow index rai->start_flow = 0; if (not rai->alignment.GetTag("ZF", rai->start_flow)) { uint8_t start_flow_byte = 0; if (not rai->alignment.GetTag("ZF", start_flow_byte)) { cerr << "ERROR: Start Flow ZF:tag not found in read " << rai->alignment.Name << endl; exit(1); } rai->start_flow = (int)start_flow_byte; } if (rai->start_flow == 0) { cerr << "WARNING: Start Flow ZF:tag has zero value in read " << rai->alignment.Name << endl; rai->filtered = true; return; } CreateFlowIndex(rai, flow_order); if (global_context.resolve_clipped_bases) { // Increment start flow to first aligned base rai->start_flow = rai->flow_index[rai->start_sc]; } // Check validity of input arguments if (rai->start_flow < 0 or rai->start_flow >= global_context.num_flows_by_run_id.at(rai->runid)) { cerr << "ERROR: Start flow outside of [0,num_flows) range in read " << rai->alignment.Name << endl; cerr << "Start flow: " << rai->start_flow << " Number of flows: " << global_context.flow_order_vector.at(rai->flow_order_index).num_flows(); exit(1); } // Retrieve read group name & generate prefix flow if (not rai->alignment.GetTag("RG",rai->read_group)) { cerr << "WARNING: No read group found in read " << rai->alignment.Name << endl; // No big problem, we'll just have to solve the prefix like it's 2013! rai->read_group.clear(); } // Get read prefix - hard clipped start of the read: [KS][ZT][ZE] rai->prefix_flow = -1; std::map<string,string>::const_iterator key_it = global_context.key_by_read_group.find(rai->read_group); if (key_it != global_context.key_by_read_group.end()) { rai->prefix_bases = key_it->second; string temp_zt, temp_ze; if (rai->alignment.GetTag("ZT", temp_zt)) rai->prefix_bases += temp_zt; if (rai->alignment.GetTag("ZE", temp_ze)) rai->prefix_bases += temp_ze; if (not rai->prefix_bases.empty()) GetPrefixFlow(rai, rai->prefix_bases, flow_order); } // Check consistency of prefix_flow and start_flow - maybe we don't have all info about hard clipped bases if (rai->prefix_flow >= 0) { int check_start_flow = rai->prefix_flow; while (check_start_flow < flow_order.num_flows() and flow_order.nuc_at(check_start_flow) != rai->read_bases.at(0)) check_start_flow++; if (check_start_flow != rai->start_flow) { rai->prefix_flow = -1; rai->prefix_bases.clear(); } } }