Пример #1
0
WriteFlowDataClass::WriteFlowDataClass(unsigned int saveQueueSize, CommandLineOpts &inception_state, ImageSpecClass &my_image_spec, const RawWells & rawWells)
:queueSize(0),packQueuePtr(NULL),writeQueuePtr(NULL)
{
  queueSize = saveQueueSize;

  if(queueSize > 0){
    size_t flowDepth = inception_state.bkg_control.signal_chunks.save_wells_flow;
    size_t spaceSize = my_image_spec.rows * my_image_spec.cols;

    packQueuePtr = new SemQueue();
    writeQueuePtr = new SemQueue();
    assert(packQueuePtr != NULL && writeQueuePtr != NULL);
    packQueuePtr->init(queueSize);
    writeQueuePtr->init(queueSize);
    stepSize = rawWells.GetStepSize();
    unsigned int bufferSize = stepSize * stepSize * flowDepth;
    for(unsigned int item = 0; item < queueSize; ++item) {
      ChunkFlowData* chunkData = new ChunkFlowData(spaceSize, flowDepth, bufferSize);
      packQueuePtr->enQueue(chunkData);
    }

    filePath = rawWells.GetHdf5FilePath();
    numCols = my_image_spec.cols;
    saveAsUShort = inception_state.sys_context.well_convert;

  }
}
Пример #2
0
void SetChipTypeFromWells(RawWells &rawWells)
{
  if (rawWells.OpenMetaData())   //Use chip type stored in the wells file
  {
    if (rawWells.KeyExists("ChipType"))
    {
      string chipType;
      rawWells.GetValue("ChipType", chipType);
      ChipIdDecoder::SetGlobalChipId(chipType.c_str());
    }
  }
}
Пример #3
0
void output_debug_info(std::ostream &out, RawWells &wells) {
  int rowStart, height, colStart, width;
  wells.GetRegion(rowStart,height, colStart, width);
  out << "w:" << rowStart << std::endl;
  out << "w:" << height << std::endl;
  out << "w:" << colStart << std::endl;
  out << "w:" << width << std::endl;
}
Пример #4
0
void GetMetaDataForWells(char *dirExt, RawWells &rawWells, const char *chipType)
{
  const char * paramsToKeep[] = {"Project","Sample","Start Time","Experiment Name","User Name","Serial Number","Oversample","Frame Time", "Num Frames", "Cycles", "Flows", "LibraryKeySequence", "ChipTemperature", "PGMTemperature", "PGMPressure","W2pH","W1pH","Cal Chip High/Low/InRange"};
  std::string logFile = getExpLogPath(dirExt);
  char* paramVal = NULL;
  for (size_t pIx = 0; pIx < sizeof(paramsToKeep)/sizeof(char *); pIx++)
  {
    if ((paramVal = GetExpLogParameter(logFile.c_str(), paramsToKeep[pIx])) != NULL)
    {
      string value = paramVal;
      size_t pos = value.find_last_not_of("\n\r \t");
      if (pos != string::npos)
      {
        value = value.substr(0,pos+1);
      }
      rawWells.SetValue(paramsToKeep[pIx], value);
    }
  }
  rawWells.SetValue("ChipType", chipType);
}
Пример #5
0
void SetWellsToLiveBeadsOnly(RawWells &rawWells, Mask *maskPtr)
{
  // Get subset of wells we want to track, live only...
  vector<int> subset;
  size_t maskWells = maskPtr->H() * maskPtr->W();
  subset.reserve(maskWells);
  for (size_t i = 0; i < maskWells; i++) {
    if (maskPtr->Match(i, MaskLive)) {
      subset.push_back(i);
    }
  }
  rawWells.SetSubsetToWrite(subset);
}
Пример #6
0
void CreateWellsFileForWriting (RawWells &rawWells, Mask *maskPtr,
                                CommandLineOpts &inception_state,
                                int numFlows,
                                int numRows, int numCols,
                                const char *chipType)
{
  // set up wells data structure
  MemUsage ("BeforeWells");
  //rawWells.SetFlowChunkSize(flowChunk);
  rawWells.SetCompression (inception_state.bkg_control.signal_chunks.wellsCompression);
  rawWells.SetRows (numRows);
  rawWells.SetCols (numCols);
  rawWells.SetFlows (numFlows);
  rawWells.SetFlowOrder (inception_state.flow_context.flowOrder); // 6th duplicated code
  SetWellsToLiveBeadsOnly (rawWells,maskPtr);
  // any model outputs a wells file of this nature
  GetMetaDataForWells ((char*)(inception_state.sys_context.explog_path.c_str()),rawWells,chipType);
  
  rawWells.OpenForWrite();
  rawWells.WriteRanks(); // dummy, written for completeness
  rawWells.WriteInfo();  // metadata written, do not need to rewrite
  rawWells.Close(); // just create in this routine
  MemUsage ("AfterWells");
}
Пример #7
0
void count_sample(filter_counts& counts, deque<float>& ppf, deque<float>& ssq, Mask& mask, RawWells& wells, const vector<int>& key_ionogram)
{
    // Take sample of reads from a RawWells file, and apply some simple
    // filters to identify problem reads.
    // Record number of reads in sample, and number of reads caught by
    // each filter.
    well_set sample = sample_lib(mask, counts._nsamp);
    WellData data;
    unsigned int nflows = wells.NumFlows();
    vector<float> nrm(nflows);
    int flow0 = mixed_first_flow();
    int flow1 = mixed_last_flow();
    wells.ResetCurrentRegionWell();
    
    // Some temporary code for comparing clonal filter in background model:
    ofstream out("basecaller_ppf_ssq.txt");
    assert(out);

    while(!wells.ReadNextRegionData(&data)){
        // Skip if this is not in the sample:
        well_coord wc(data.y, data.x);
        if(sample.find(wc) == sample.end())
            continue;

        // Skip wells with infinite signal:
        bool finite = all_finite(data.flowValues, data.flowValues+nflows);
        if(not finite){
            ++counts._ninf;
            continue;
        }

        // Key-normalize:
        float normalizer = ComputeNormalizerKeyFlows(data.flowValues, &key_ionogram[0], key_ionogram.size());
        transform(data.flowValues, data.flowValues+nflows, nrm.begin(), bind2nd(divides<float>(),normalizer));

        // Skip wells with bad key:
        bool good_key = key_is_good(nrm.begin(), key_ionogram.begin(), key_ionogram.end());
        if(not good_key){
            ++counts._nbad_key;
            continue;
        }

        // Skip possible super-mixed beads:
        float perc_pos = percent_positive(nrm.begin()+flow0, nrm.begin()+flow1);;
        if(perc_pos > mixed_ppf_cutoff()){
            ++counts._nsuper;
            continue;
        }

        // Record ppf and ssq:
        float sum_frac = sum_fractional_part(nrm.begin()+flow0, nrm.begin()+flow1);
        ppf.push_back(perc_pos);
        ssq.push_back(sum_frac);

        // Some temporary code for comparing clonal filter in background model:
        out << setw(6) << data.y
            << setw(6) << data.x
            << setw(8) << setprecision(2) << fixed << perc_pos
            << setw(8) << setprecision(2) << fixed << sum_frac
            << setw(8) << setprecision(2) << fixed << normalizer
            << endl;
    }
    assert(ppf.size() == ssq.size());
}
Пример #8
0
void * BasecallerWorker(void *input)
{
    BaseCallerContext& bc = *static_cast<BaseCallerContext*>(input);

    RawWells wells ("", bc.filename_wells.c_str());
    pthread_mutex_lock(&bc.mutex);
    wells.OpenForIncrementalRead();
    pthread_mutex_unlock(&bc.mutex);

    vector<float> residual(bc.flow_order.num_flows(), 0);
    vector<float> scaled_residual(bc.flow_order.num_flows(), 0);
    vector<float> wells_measurements(bc.flow_order.num_flows(), 0);
    vector<float> local_noise(bc.flow_order.num_flows(), 0);
    vector<float> minus_noise_overlap(bc.flow_order.num_flows(), 0);
    vector<float> homopolymer_rank(bc.flow_order.num_flows(), 0);
    vector<float> neighborhood_noise(bc.flow_order.num_flows(), 0);
    vector<float> phasing_parameters(3);
    vector<uint16_t>  flowgram(bc.flow_order.num_flows());
    vector<int16_t>   flowgram2(bc.flow_order.num_flows());
    vector<int16_t> filtering_details(13,0);

    vector<char> abParams;
    abParams.reserve(256);

    vector<uint8_t>   quality(3*bc.flow_order.num_flows());
    vector<int>       base_to_flow (3*bc.flow_order.num_flows());             //!< Flow of in-phase incorporation of each base.

    TreephaserSSE treephaser_sse(bc.flow_order, bc.windowSize);
    DPTreephaser  treephaser(bc.flow_order, bc.windowSize);
    treephaser.SetStateProgression(bc.diagonal_state_prog);
    treephaser.SkipRecalDuringNormalization(bc.skip_recal_during_norm);
    treephaser_sse.SkipRecalDuringNormalization(bc.skip_recal_during_norm);


    while (true) {

        //
        // Step 1. Retrieve next unprocessed region
        //

        pthread_mutex_lock(&bc.mutex);

        int current_region, begin_x, begin_y, end_x, end_y;
        if (not bc.chip_subset.GetCurrentRegionAndIncrement(current_region, begin_x, end_x, begin_y, end_y)) {
           wells.Close();
           pthread_mutex_unlock(&bc.mutex);
           return NULL;
        }

        int num_usable_wells = 0;
        for (int y = begin_y; y < end_y; ++y)
            for (int x = begin_x; x < end_x; ++x)
                if (bc.class_map[x + y * bc.chip_subset.GetChipSizeX()] >= 0)
                    num_usable_wells++;

        if      (begin_x == 0)            printf("\n% 5d/% 5d: ", begin_y, bc.chip_subset.GetChipSizeY());
        if      (num_usable_wells ==   0) printf("  ");
        else if (num_usable_wells <  750) printf(". ");
        else if (num_usable_wells < 1500) printf("o ");
        else if (num_usable_wells < 2250) printf("# ");
        else                              printf("##");
        fflush(NULL);

        if (begin_x == 0)
            SaveBaseCallerProgress(10 + (80*begin_y)/bc.chip_subset.GetChipSizeY(), bc.output_directory);

        pthread_mutex_unlock(&bc.mutex);

        // Process the data
        deque<ProcessedRead> lib_reads;                // Collection of template library reads
        deque<ProcessedRead> tf_reads;                 // Collection of test fragment reads
        deque<ProcessedRead> calib_reads;              // Collection of calibration library reads
        deque<ProcessedRead> unfiltered_reads;         // Random subset of lib_reads
        deque<ProcessedRead> unfiltered_trimmed_reads; // Random subset of lib_reads

        if (num_usable_wells == 0) { // There is nothing in this region. Don't even bother reading it
            bc.lib_writer.WriteRegion(current_region, lib_reads);
            if (bc.have_calibration_panel)
                bc.calib_writer.WriteRegion(current_region, calib_reads);
            if (bc.process_tfs)
                bc.tf_writer.WriteRegion(current_region, tf_reads);
            if (!bc.unfiltered_set.empty()) {
                bc.unfiltered_writer.WriteRegion(current_region,unfiltered_reads);
                bc.unfiltered_trimmed_writer.WriteRegion(current_region,unfiltered_trimmed_reads);
            }
            continue;
        }

        wells.SetChunk(begin_y, end_y-begin_y, begin_x, end_x-begin_x, 0, bc.flow_order.num_flows());
        wells.ReadWells();

        for (int y = begin_y; y < end_y; ++y)
            for (int x = begin_x; x < end_x; ++x) {   // Loop over wells within current region

                //
                // Step 2. Retrieve additional information needed to process this read
                //

                unsigned int read_index = x + y * bc.chip_subset.GetChipSizeX();
                int read_class = bc.class_map[read_index];
                if (read_class < 0)
                    continue;
                bool is_random_calibration_read = false;
                if (read_class == 2){
                  is_random_calibration_read = true;
                  read_class = 0; // Calibration reads are library beads;
                }
                bool is_random_unfiltered  = bc.unfiltered_set.count(read_index) > 0;

                if (not is_random_unfiltered and bc.only_process_unfiltered_set)
                  continue;

                bc.filters->SetValid(read_index); // Presume valid until some filter proves otherwise

                if (read_class == 0)
                    lib_reads.push_back(ProcessedRead(bc.barcodes->NoBarcodeReadGroup()));
                else
                    tf_reads.push_back(ProcessedRead(0));
                ProcessedRead& processed_read = (read_class==0) ? lib_reads.back() : tf_reads.back();

                // Respect filter decisions from Background Model
                if (bc.mask->Match(read_index, MaskFilteredBadResidual))
                    bc.filters->SetBkgmodelHighPPF(read_index, processed_read.filter);

                if (bc.mask->Match(read_index, MaskFilteredBadPPF))
                    bc.filters->SetBkgmodelPolyclonal(read_index, processed_read.filter);

                if (bc.mask->Match(read_index, MaskFilteredBadKey))
                    bc.filters->SetBkgmodelFailedKeypass(read_index, processed_read.filter);

                if (!is_random_unfiltered and !bc.filters->IsValid(read_index)) // No reason to waste more time
                    continue;

                float cf = bc.estimator.GetWellCF(x,y);
                float ie = bc.estimator.GetWellIE(x,y);
                float dr = bc.estimator.GetWellDR(x,y);

                for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow)
                    wells_measurements[flow] = wells.At(y,x,flow);

                // Sanity check. If there are NaNs in this read, print warning
                vector<int> nanflow;
                for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow) {
                    if (!isnan(wells_measurements[flow]))
                        continue;
                    wells_measurements[flow] = 0;
                    nanflow.push_back(flow);
                }
                if (nanflow.size() > 0) {
                    fprintf(stderr, "ERROR: BaseCaller read NaNs from wells file, x=%d y=%d flow=%d", x, y, nanflow[0]);
                    for (unsigned int flow=1; flow < nanflow.size(); flow++) {
                        fprintf(stderr, ",%d", nanflow[flow]);
                    }
                    fprintf(stderr, "\n");
                    fflush(stderr);
                }

                //
                // Step 3. Perform base calling and quality value calculation
                //

                BasecallerRead read;
                bool key_pass = true;
                if (bc.keynormalizer == "keynorm-new") {
                  key_pass = read.SetDataAndKeyNormalizeNew(&wells_measurements[0], wells_measurements.size(), bc.keys[read_class].flows(), bc.keys[read_class].flows_length() - 1, false);
                } else { // if (bc.keynormalizer == "keynorm-old") {
                  key_pass = read.SetDataAndKeyNormalize(&wells_measurements[0], wells_measurements.size(), bc.keys[read_class].flows(), bc.keys[read_class].flows_length() - 1);
                }

                // Get rid of outliers quickly
                bc.filters->FilterHighPPFAndPolyclonal (read_index, read_class, processed_read.filter, read.raw_measurements, bc.polyclonal_filter);
                if (not key_pass)
                  bc.filters->FilterFailedKeypass (read_index, read_class, processed_read.filter, read.sequence);
                if (!is_random_unfiltered and !bc.filters->IsValid(read_index)) // No reason to waste more time
                  continue;

                // Check if this read is either from the calibration panel or from the random calibration set
                if(bc.calibration_training and bc.have_calibration_panel) {
                  if (!is_random_calibration_read and !bc.calibration_barcodes->MatchesBarcodeSignal(read)) {
                	bc.filters->SetFiltered(read_index, read_class, processed_read.filter); // Set as filtered
                    continue;  // And move on along
                  }
                }

                // Equal recalibration opportunity for everybody! (except TFs!)
                const vector<vector<vector<float> > > * aPtr = 0;
                const vector<vector<vector<float> > > * bPtr = 0;
                if (bc.recalModel.is_enabled() && read_class == 0) { //do not recalibrate TF read bc.chip_subset.GetChipSizeX()
                  aPtr = bc.recalModel.getAs(x+bc.chip_subset.GetColOffset(), y+bc.chip_subset.GetRowOffset());
                  bPtr = bc.recalModel.getBs(x+bc.chip_subset.GetColOffset(), y+bc.chip_subset.GetRowOffset());
                }

                // Execute the iterative solving-normalization routine - switch by specified algorithm
                if (bc.dephaser == "treephaser-sse") {
                  treephaser_sse.SetAsBs(aPtr, bPtr);  // Set/delete recalibration model for this read
                  treephaser_sse.SetModelParameters(cf, ie); // sse version has no hookup for droop.
                  treephaser_sse.NormalizeAndSolve(read);
                  treephaser.SetModelParameters(cf, ie); // Adapter trimming uses the cpp treephaser

                } else { // Setup cpp treephaser
                  if (bc.skip_droop)
                    treephaser.SetModelParameters(cf, ie);
                  else
                    treephaser.SetModelParameters(cf, ie, dr);
                  treephaser.SetAsBs(aPtr, bPtr); // Set/delete recalibration model for this read

                  if (bc.dephaser == "dp-treephaser") {
                    // Single parameter gain estimation
                    treephaser.NormalizeAndSolve_GainNorm(read, bc.flow_order.num_flows());
                  } else if (bc.dephaser == "treephaser-adaptive") {
                    // Adaptive nortmalization - resolving read from start in each iteration
                    treephaser.NormalizeAndSolve_Adaptive(read, bc.flow_order.num_flows());
                  } else { //if (bc.dephaser == "treephaser-swan") {
                    // Default corresponding to (approximately) what the sse version is doing
                	// Adaptive normalization - sliding window without resolving start
                	treephaser.NormalizeAndSolve_SWnorm(read, bc.flow_order.num_flows());
                  }

                  // Need this function to calculate inphase population for cpp version
                  treephaser.ComputeQVmetrics(read);
                }

                // If recalibration is enabled, generate adjusted sequence and normalized_measurements, and recompute QV metrics
                bool calibrate_read = (bc.recalibration.is_enabled() && read_class == 0); //do not recalibrate TF read
                if (calibrate_read) {
                	// Change base sequence for low hps
                    bc.recalibration.CalibrateRead(x+bc.chip_subset.GetColOffset(),y+bc.chip_subset.GetRowOffset(),read.sequence, read.normalized_measurements, read.prediction, read.state_inphase);
                    if (bc.dephaser == "treephaser-sse")
                      treephaser_sse.ComputeQVmetrics(read);
                    else
                      treephaser.ComputeQVmetrics(read);
                } else if (bc.dephaser == "treephaser-sse") {
                  // in case we didn't calibrate low hps, still want to have QV metrics for sse output
                  treephaser_sse.ComputeQVmetrics(read);
                }

                // Misc data management: Generate residual, scaled_residual
                for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow) {
                    residual[flow] = read.normalized_measurements[flow] - read.prediction[flow];
                    scaled_residual[flow] = residual[flow] / read.state_inphase[flow];
                }

                // Misc data management: Put base calls in proper string form
                processed_read.filter.n_bases = read.sequence.size();
                processed_read.filter.is_called = true;

                // Misc data management: Generate base_to_flow

                base_to_flow.clear();
                base_to_flow.reserve(processed_read.filter.n_bases);
                for (int base = 0, flow = 0; base < processed_read.filter.n_bases; ++base) {
                    while (flow < bc.flow_order.num_flows() and read.sequence[base] != bc.flow_order[flow])
                        flow++;
                    base_to_flow.push_back(flow);
                }


                // Misc data management: Populate some trivial read properties

                char read_name[256];
                sprintf(read_name, "%s:%05d:%05d", bc.run_id.c_str(), bc.chip_subset.GetRowOffset() + y, bc.chip_subset.GetColOffset() + x);
                processed_read.bam.Name = read_name;
                processed_read.bam.SetIsMapped(false);

                phasing_parameters[0] = cf;
                phasing_parameters[1] = ie;
                phasing_parameters[2] = dr;
                processed_read.bam.AddTag("ZP", phasing_parameters);


                // Calculation of quality values
                // Predictor 1 - Treephaser residual penalty
                // Predictor 2 - Local noise/flowalign - 'noise' in the input base's measured val.  Noise is max[abs(val - round(val))] within +-1 BASES
                // Predictor 3 - Read Noise/Overlap - mean & stdev of the 0-mers & 1-mers in the read
                // Predictor 3 (new) - Beverly Events
                // Predictor 4 - Transformed homopolymer length
                // Predictor 5 - Treephaser: Penalty indicating deletion after the called base
                // Predictor 6 - Neighborhood noise - mean of 'noise' +-5 BASES around a base.  Noise is mean{abs(val - round(val))}

                int num_predictor_bases = min(bc.flow_order.num_flows(), processed_read.filter.n_bases);

                PerBaseQual::PredictorLocalNoise(local_noise, num_predictor_bases, base_to_flow, read.normalized_measurements, read.prediction);
                PerBaseQual::PredictorNeighborhoodNoise(neighborhood_noise, num_predictor_bases, base_to_flow, read.normalized_measurements, read.prediction);
                //PerBaseQual::PredictorNoiseOverlap(minus_noise_overlap, num_predictor_bases, read.normalized_measurements, read.prediction);
                PerBaseQual::PredictorBeverlyEvents(minus_noise_overlap, num_predictor_bases, base_to_flow, scaled_residual);
                PerBaseQual::PredictorHomopolymerRank(homopolymer_rank, num_predictor_bases, read.sequence);

                quality.clear();
                bc.quality_generator.GenerateBaseQualities(processed_read.bam.Name, processed_read.filter.n_bases, bc.flow_order.num_flows(),
                        read.penalty_residual, local_noise, minus_noise_overlap, // <- predictors 1,2,3
                        homopolymer_rank, read.penalty_mismatch, neighborhood_noise, // <- predictors 4,5,6
                        base_to_flow, quality,
                        read.additive_correction,
                        read.multiplicative_correction,
                        read.state_inphase);

                //
                // Step 4a. Barcode classification of library reads
                //

                if (processed_read.filter.n_bases_filtered == -1)
                    processed_read.filter.n_bases_filtered = processed_read.filter.n_bases;

                processed_read.filter.n_bases_key = min(bc.keys[read_class].bases_length(), processed_read.filter.n_bases);
                processed_read.filter.n_bases_prefix = processed_read.filter.n_bases_key;

                processed_read.barcode_n_errors = 0;
                if (read_class == 0)
                {   // Library beads - first separate out calibration barcodes
                	processed_read.read_group_index = -1;
                	if (bc.have_calibration_panel){
                	  bc.calibration_barcodes->ClassifyAndTrimBarcode(read_index, processed_read, read, base_to_flow);
                	  processed_read.is_control_barcode = (processed_read.read_group_index >= 0);
                	}
                    if (processed_read.read_group_index < 0)
                      bc.barcodes->ClassifyAndTrimBarcode(read_index, processed_read, read, base_to_flow);
                }

                //
                // Step 4b. Custom mod: Trim extra bases after key and barcode. Make it look like barcode trimming.
                //

                if (bc.extra_trim_left > 0)
                    processed_read.filter.n_bases_prefix = min(processed_read.filter.n_bases_prefix + bc.extra_trim_left, processed_read.filter.n_bases);


                //
                // Step 4. Calculate/save read metrics and apply filters
                //

                bc.filters->FilterZeroBases     (read_index, read_class, processed_read.filter);
                bc.filters->FilterShortRead     (read_index, read_class, processed_read.filter);
                bc.filters->FilterFailedKeypass (read_index, read_class, processed_read.filter, read.sequence);
                bc.filters->FilterHighResidual  (read_index, read_class, processed_read.filter, residual);
                bc.filters->FilterBeverly       (read_index, read_class, processed_read.filter, scaled_residual, base_to_flow);
                bc.filters->FilterQuality       (read_index, read_class, processed_read.filter, quality);
                bc.filters->TrimAdapter         (read_index, read_class, processed_read, scaled_residual, base_to_flow, treephaser, read);
                bc.filters->TrimQuality         (read_index, read_class, processed_read.filter, quality);
                bc.filters->TrimAvalanche       (read_index, read_class, processed_read.filter, quality);

                //! New mechanism for dumping potentially useful metrics.
                if (bc.metric_saver->save_anything() and (is_random_unfiltered or !bc.metric_saver->save_subset_only())) {
                    pthread_mutex_lock(&bc.mutex);

                    bc.metric_saver->SaveRawMeasurements          (y,x,read.raw_measurements);
                    bc.metric_saver->SaveAdditiveCorrection       (y,x,read.additive_correction);
                    bc.metric_saver->SaveMultiplicativeCorrection (y,x,read.multiplicative_correction);
                    bc.metric_saver->SaveNormalizedMeasurements   (y,x,read.normalized_measurements);
                    bc.metric_saver->SavePrediction               (y,x,read.prediction);
                    bc.metric_saver->SaveStateInphase             (y,x,read.state_inphase);
                    bc.metric_saver->SaveStateTotal               (y,x,read.state_total);
                    bc.metric_saver->SavePenaltyResidual          (y,x,read.penalty_residual);
                    bc.metric_saver->SavePenaltyMismatch          (y,x,read.penalty_mismatch);
                    bc.metric_saver->SaveLocalNoise               (y,x,local_noise);
                    bc.metric_saver->SaveNoiseOverlap             (y,x,minus_noise_overlap);
                    bc.metric_saver->SaveHomopolymerRank          (y,x,homopolymer_rank);
                    bc.metric_saver->SaveNeighborhoodNoise        (y,x,neighborhood_noise);

                    pthread_mutex_unlock(&bc.mutex);
                }


                //
                // Step 4b. Add flow signal information to ZM tag in BAM record.
                //

                flowgram2.clear();
                int max_flow = min(bc.flow_order.num_flows(),16);
                if (processed_read.filter.n_bases_filtered > 0)
                    max_flow = min(bc.flow_order.num_flows(), base_to_flow[processed_read.filter.n_bases_filtered-1] + 16);

                vector<int> out_of_boud_flows;
                for (int flow = 0; flow < max_flow; ++flow){
                    float temp_flowgram = 128*read.normalized_measurements[flow];
                    if (temp_flowgram < -16383.0f or temp_flowgram > 16383.0f) {
                        out_of_boud_flows.push_back(flow);
                        temp_flowgram = min(max(-16383.0f,temp_flowgram), 16383.0f);
                    }
                    //flowgram2.push_back(2*(int16_t)(128*read.normalized_measurements[flow]));
                    flowgram2.push_back(2*(int16_t)temp_flowgram);
                }
                // Do not spam stderr
                /*if (out_of_boud_flows.size() > 0) {
                  cerr << "BaseCaller WARNING: Normalized signal out of bounds in well y="
                       << y << ", x=" << x << ", in flows ";
                  for (unsigned int flow = 0; flow < out_of_boud_flows.size()-1; ++flow)
                    cerr << out_of_boud_flows.at(flow) << ',';
                  cerr << out_of_boud_flows.at(out_of_boud_flows.size()-1) << endl;
                } */
                processed_read.bam.AddTag("ZM", flowgram2);
                //flowgram2.push_back(1*(int16_t)(256*read.normalized_measurements[flow]));
                //flowgram2.push_back(2*(int16_t)(128*read.normalized_measurements[flow]));
                //flowgram2.push_back(4*(int16_t)(64*read.normalized_measurements[flow]));
                //flowgram2.push_back(8*(int16_t)(32*read.normalized_measurements[flow]));

                //
                // Step 4c. Populate FZ tag in BAM record.
                //

                flowgram.clear();
                if (bc.flow_signals_type == "wells") {
                    for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow)
                        flowgram.push_back(max(0,(int)(100.0*wells_measurements[flow]+0.5)));
                    processed_read.bam.AddTag("FZ", flowgram); // Will be phased out soon

                } else if (bc.flow_signals_type == "key-normalized") {
                    for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow)
                        flowgram.push_back(max(0,(int)(100.0*read.raw_measurements[flow]+0.5)));
                    processed_read.bam.AddTag("FZ", flowgram); // Will be phased out soon

                } else if (bc.flow_signals_type == "adaptive-normalized") {
                    for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow)
                        flowgram.push_back(max(0,(int)(100.0*read.normalized_measurements[flow]+0.5)));
                    processed_read.bam.AddTag("FZ", flowgram); // Will be phased out soon

                } else if (bc.flow_signals_type == "residual") {
                    for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow)
                        flowgram.push_back(max(0,(int)(1000 + 100*residual[flow])));
                    processed_read.bam.AddTag("FZ", flowgram); // Will be phased out soon

                } else if (bc.flow_signals_type == "scaled-residual") { // This settings is necessary part of calibration training
                    for (int flow = 0; flow < bc.flow_order.num_flows(); ++flow) {
                        //between 0 and 98
                        float adjustment = min(0.49f, max(-0.49f, scaled_residual[flow]));
                        flowgram.push_back(max(0,(int)(49.5 + 100*adjustment)));
                    }
                    processed_read.bam.AddTag("FZ", flowgram);
                }

                //
                // Step 5. Pass basecalled reads to appropriate writers
                //

                // Create BAM entries
                if (processed_read.filter.n_bases > 0) {
                    processed_read.bam.QueryBases.reserve(processed_read.filter.n_bases);
                    processed_read.bam.Qualities.reserve(processed_read.filter.n_bases);
                    for (int base = processed_read.filter.n_bases_prefix; base < processed_read.filter.n_bases_filtered; ++base) {
                        processed_read.bam.QueryBases.push_back(read.sequence[base]);
                        processed_read.bam.Qualities.push_back(quality[base] + 33);
                    }
                    processed_read.bam.AddTag("ZF","i", base_to_flow[processed_read.filter.n_bases_prefix]);
                } else
                    processed_read.bam.AddTag("ZF","i", 0);

                // Randomly selected library beads - excluding calibration reads
                if (is_random_unfiltered and (not processed_read.is_control_barcode)) {
                    unfiltered_trimmed_reads.push_back(processed_read);
                    unfiltered_reads.push_back(processed_read);

                    ProcessedRead& untrimmed_read = unfiltered_reads.back();

                    processed_read.filter.GenerateZDVector(filtering_details);
                    untrimmed_read.bam.AddTag("ZD", filtering_details);

                    if (processed_read.filter.n_bases > 0) {
                        untrimmed_read.bam.QueryBases.reserve(processed_read.filter.n_bases);
                        untrimmed_read.bam.Qualities.reserve(processed_read.filter.n_bases);
                        for (int base = max(processed_read.filter.n_bases_filtered,processed_read.filter.n_bases_prefix); base < processed_read.filter.n_bases; ++base) {
                            untrimmed_read.bam.QueryBases.push_back(read.sequence[base]);
                            untrimmed_read.bam.Qualities.push_back(quality[base] + 33);
                        }
                    }

                    // Temporary workaround: provide fake FZ tag for unfiltered.trimmed and unfiltered.untrimmed sets.
                    if (bc.flow_signals_type == "none") {
                        flowgram.assign(1,0);
                        unfiltered_reads.back().bam.AddTag("FZ", flowgram);
                        unfiltered_trimmed_reads.back().bam.AddTag("FZ", flowgram);
                    }


                    // If this read was supposed to have "early filtering", make sure we emulate that here
                    if (processed_read.filter.n_bases_after_bkgmodel_bad_key >= 0 or
                            processed_read.filter.n_bases_after_bkgmodel_high_ppf >= 0 or
                            processed_read.filter.n_bases_after_bkgmodel_polyclonal >= 0 or
                            processed_read.filter.n_bases_after_high_ppf >= 0 or
                            processed_read.filter.n_bases_after_polyclonal >= 0)
                        processed_read.filter.n_bases = -1;
                }

                // Move read from lib_reads stack to calib_reads if necessary
                // This invalidates the processed_read reference and needs to be at the very end
                if (processed_read.is_control_barcode) {
                  calib_reads.push_back(processed_read);
                  lib_reads.pop_back();
                }
            }

        bc.lib_writer.WriteRegion(current_region, lib_reads);
        if (bc.have_calibration_panel)
            bc.calib_writer.WriteRegion(current_region, calib_reads);
        if (bc.process_tfs)
            bc.tf_writer.WriteRegion(current_region, tf_reads);
        if (!bc.unfiltered_set.empty()) {
            bc.unfiltered_writer.WriteRegion(current_region,unfiltered_reads);
            bc.unfiltered_trimmed_writer.WriteRegion(current_region,unfiltered_trimmed_reads);
        }
    }
}
Пример #9
0
int main (int argc, const char *argv[])
{
    BaseCallerSalute();

    time_t analysis_start_time;
    time(&analysis_start_time);

    Json::Value basecaller_json(Json::objectValue);
    DumpStartingStateOfProgram (argc,argv,analysis_start_time, basecaller_json["BaseCaller"]);

    //
    // Step 1. Process Command Line Options & Initialize Modules
    //

    BaseCallerParameters bc_params;
    OptArgs opts, null_opts;
    opts.ParseCmdLine(argc, argv);

    if (opts.GetFirstBoolean('h', "help", false) or argc == 1)
    	bc_params.PrintHelp();
    if (opts.GetFirstBoolean('v', "version", false)) {
        fprintf (stdout, "%s", IonVersion::GetFullVersion ("BaseCaller").c_str());
        exit (EXIT_SUCCESS);
    }

    // Command line processing *** Main directories and file locations first
    bc_params.InitializeFilesFromOptArgs(opts);
    bc_params.InitContextVarsFromOptArgs(opts);

    // Command line processing *** Options that have default values retrieved from wells or mask files
    RawWells wells ("", bc_params.GetFiles().filename_wells.c_str());
    if (!wells.OpenMetaData()) {
        fprintf (stderr, "Failed to retrieve metadata from %s\n", bc_params.GetFiles().filename_wells.c_str());
        exit (EXIT_FAILURE);
    }
    Mask mask (1, 1);
    if (mask.SetMask (bc_params.GetFiles().filename_mask.c_str()))
        exit (EXIT_FAILURE);

    string chip_type = "unknown";
    if (wells.KeyExists("ChipType"))
        wells.GetValue("ChipType", chip_type);

    // Command line processing *** Various general option and opts to classify and sample wells
    BaseCallerContext bc;
    bc.mask = &mask;
    bc.SetKeyAndFlowOrder(opts, wells.FlowOrder(), wells.NumFlows());
    bc.chip_subset.InitializeChipSubsetFromOptArgs(opts, mask.W(), mask.H());

    // Sampling options may reset command line arguments & change context
    bc_params.InitializeSamplingFromOptArgs(opts, bc.chip_subset.NumWells());
    bc_params.SetBaseCallerContextVars(bc);
    ClassifyAndSampleWells(bc, bc_params.GetSamplingOpts());


    // *** Setup for different datasets
    BarcodeDatasets datasets_calibration(bc.run_id, bc_params.GetFiles().calibration_panel_file);
    datasets_calibration.SetIonControl(bc.run_id);
    datasets_calibration.GenerateFilenames("IonControl","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets(bc.run_id, bc_params.GetFiles().lib_datasets_file);
    // Check if any of the template barcodes is equal to a control barcode
    if (datasets_calibration.DatasetInUse())
      datasets.RemoveControlBarcodes(datasets_calibration.json());
    datasets.GenerateFilenames("Library","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets_tf(bc.run_id);
    datasets_tf.SetTF(bc.process_tfs);
    datasets_tf.GenerateFilenames("TF","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets_unfiltered_untrimmed(datasets);
    BarcodeDatasets datasets_unfiltered_trimmed(datasets);


    // *** Initialize remaining modules of BaseCallerContext
    vector<string> bam_comments;
    BaseCallerFilters filters(opts, bam_comments, bc.run_id, bc.flow_order, bc.keys, mask);
    bc.filters = &filters;

    BaseCallerMetricSaver metric_saver(opts, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY(), bc.flow_order.num_flows(),
                                bc.chip_subset.GetRegionSizeX(), bc.chip_subset.GetRegionSizeY(), bc_params.GetFiles().output_directory);
    bc.metric_saver = &metric_saver;

    // Calibration modules
    bc.recalibration.Initialize(opts, bc.flow_order);
    bc.recalModel.Initialize(opts, bam_comments, bc.run_id, bc.chip_subset);
    // initialize the per base quality score generator - dependent on calibration
    bc.quality_generator.Init(opts, chip_type, bc_params.GetFiles().input_directory, bc_params.GetFiles().output_directory, bc.recalibration.is_enabled());

    // Phase estimator
    bc.estimator.InitializeFromOptArgs(opts, bc.chip_subset, bc.keynormalizer);
    // Barcode classification
    BarcodeClassifier barcodes(opts, datasets, bc.flow_order, bc.keys, bc_params.GetFiles().output_directory,
    		                   bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY());
    bc.barcodes = &barcodes;
    // Make sure calibration barcodes are initialized with default parameters
    BarcodeClassifier calibration_barcodes(null_opts, datasets_calibration, bc.flow_order, bc.keys,
                          bc_params.GetFiles().output_directory, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY());
    bc.calibration_barcodes = &calibration_barcodes;

    // Command line parsing officially over. Detect unknown options.
    opts.CheckNoLeftovers();

    // Save some run info into our handy json file
    bc_params.SaveParamsToJson(basecaller_json, bc, chip_type);
    SaveBaseCallerProgress(0, bc_params.GetFiles().output_directory);

    MemUsage("RawWellsBasecalling");


    //
    // Step 2. Filter training and phase estimation
    //

    // Find distribution of clonal reads for use in read filtering:
    filters.TrainClonalFilter(bc_params.GetFiles().output_directory, wells, mask, bc.polyclonal_filter);
    MemUsage("ClonalPopulation");
    ReportState(analysis_start_time,"Polyclonal Filter Training Complete");

    // Library phasing parameter estimation
    MemUsage("BeforePhaseEstimation");
    if (not bc.estimator.HaveEstimates()) {
      wells.OpenForIncrementalRead();
      bc.estimator.DoPhaseEstimation(&wells, &mask, bc.flow_order, bc.keys, (bc_params.NumThreads() == 1));
      wells.Close();
    }
    bc.estimator.ExportResultsToJson(basecaller_json["Phasing"]);
    bc.estimator.ExportTrainSubsetToJson(basecaller_json["TrainSubset"]);

    SaveJson(basecaller_json, bc_params.GetFiles().filename_json);
    SaveBaseCallerProgress(10, bc_params.GetFiles().output_directory);  // Phase estimation assumed to be 10% of the work

    // Initialize Barcode Classifier(s) - dependent on phase estimates
    bc.barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR());
    bc.calibration_barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR());

    MemUsage("AfterPhaseEstimation");
    ReportState(analysis_start_time,"Phase Parameter Estimation Complete");
    MemUsage("BeforeBasecalling");


    //
    // Step 3. Open wells and output BAM files & initialize writers
    //

    // Library data set writer - always
    bc.lib_writer.Open(bc_params.GetFiles().output_directory, datasets, 0, bc.chip_subset.NumRegions(),
                 bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                 bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Calibration reads data set writer - if applicable
    if (bc.have_calibration_panel)
      bc.calib_writer.Open(bc_params.GetFiles().output_directory, datasets_calibration, 0, bc.chip_subset.NumRegions(),
                     bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                     bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Test fragments data set writer - if applicable
    if (bc.process_tfs)
      bc.tf_writer.Open(bc_params.GetFiles().output_directory, datasets_tf, 1, bc.chip_subset.NumRegions(),
                  bc.flow_order, bc.keys[1].bases(), filters.GetTFBeadAdapters(),
                  bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Unfiltered / unfiltered untrimmed data set writers - if applicable
    if (!bc.unfiltered_set.empty()) {
    	bc.unfiltered_writer.Open(bc_params.GetFiles().unfiltered_untrimmed_directory, datasets_unfiltered_untrimmed, -1,
                      bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                      bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

        bc.unfiltered_trimmed_writer.Open(bc_params.GetFiles().unfiltered_trimmed_directory, datasets_unfiltered_trimmed, -1,
                              bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                              bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);
    }

    //
    // Step 4. Execute threaded basecalling
    //

    time_t basecall_start_time;
    time(&basecall_start_time);

    pthread_mutex_init(&bc.mutex, NULL);

    pthread_t worker_id[bc_params.NumThreads()];
    for (int worker = 0; worker < bc_params.NumThreads(); worker++)
        if (pthread_create(&worker_id[worker], NULL, BasecallerWorker, &bc)) {
            printf("*Error* - problem starting thread\n");
            exit (EXIT_FAILURE);
        }

    for (int worker = 0; worker < bc_params.NumThreads(); worker++)
        pthread_join(worker_id[worker], NULL);

    pthread_mutex_destroy(&bc.mutex);

    time_t basecall_end_time;
    time(&basecall_end_time);


    //
    // Step 5. Close files and print out some statistics
    //

    printf("\n\nBASECALLING: called %d of %u wells in %1.0lf seconds with %d threads\n\n",
           filters.NumWellsCalled(), bc.chip_subset.NumWells(),
           difftime(basecall_end_time,basecall_start_time), bc_params.NumThreads());

    bc.lib_writer.Close(datasets, "Library");
    if (bc.have_calibration_panel)
    	bc.calib_writer.Close(datasets_calibration, "IonControl");
    if (bc.process_tfs)
        bc.tf_writer.Close(datasets_tf, "Test Fragments");

    filters.TransferFilteringResultsToMask(mask);

    if (!bc.unfiltered_set.empty()) {

        // Must happen after filters transferred to mask
        bc.WriteUnfilteredFilterStatus(bc_params.GetFiles());

        bc.unfiltered_writer.Close(datasets_unfiltered_untrimmed);
        bc.unfiltered_trimmed_writer.Close(datasets_unfiltered_trimmed);

        datasets_unfiltered_untrimmed.SaveJson(bc_params.GetFiles().unfiltered_untrimmed_directory+"/datasets_basecaller.json");
        datasets_unfiltered_trimmed.SaveJson(bc_params.GetFiles().unfiltered_trimmed_directory+"/datasets_basecaller.json");
    }

    metric_saver.Close();
    barcodes.Close(datasets);
    calibration_barcodes.Close(datasets_calibration);
    if (bc.have_calibration_panel) {
      datasets.json()["IonControl"]["datasets"] = datasets_calibration.json()["datasets"];
      datasets.json()["IonControl"]["read_groups"] = datasets_calibration.read_groups();
    }
    datasets.SaveJson(bc_params.GetFiles().output_directory+"/datasets_basecaller.json");
    if (bc.process_tfs)
        datasets_tf.SaveJson(bc_params.GetFiles().output_directory+"/datasets_tf.json");

    // Generate BaseCaller.json

    bc.lib_writer.SaveFilteringStats(basecaller_json, "lib", true);
    if (bc.have_calibration_panel)
      bc.calib_writer.SaveFilteringStats(basecaller_json, "control", false);
    if (bc.process_tfs)
      bc.tf_writer.SaveFilteringStats(basecaller_json, "tf", false);

    time_t analysis_end_time;
    time(&analysis_end_time);

    basecaller_json["BaseCaller"]["end_time"] = get_time_iso_string(analysis_end_time);
    basecaller_json["BaseCaller"]["total_duration"] = (int)difftime(analysis_end_time,analysis_start_time);
    basecaller_json["BaseCaller"]["basecalling_duration"] = (int)difftime(basecall_end_time,basecall_start_time);

    basecaller_json["Filtering"]["qv_histogram"] = Json::arrayValue;
    for (int qv = 0; qv < 50; ++qv)
        basecaller_json["Filtering"]["qv_histogram"][qv] = (Json::UInt64)bc.lib_writer.qv_histogram()[qv];

    SaveJson(basecaller_json, bc_params.GetFiles().filename_json);
    SaveBaseCallerProgress(100, bc_params.GetFiles().output_directory);

    mask.WriteRaw (bc_params.GetFiles().filename_filter_mask.c_str());
    mask.validateMask();

    MemUsage("AfterBasecalling");
    ReportState(analysis_start_time,"Basecalling Complete");

    return EXIT_SUCCESS;
}
Пример #10
0
int main (int argc, const char *argv[])
{

  if (argc == 1) {
    printf ("BaseCallerLite - Bare bone basecaller\n");
    printf ("\n");
    printf ("Usage:\n");
    printf ("BaseCallerLite [options]\n");
    printf ("\tOptions:\n");
    printf ("\t\tComing soon\n");
    printf ("\n");
    return 1;
  }

  string libKey = "TCAG";
  string inputDirectory = ".";
  string outputDirectory = ".";
  bool singleCoreCafie = false;

  BaseCallerLite basecaller;
  basecaller.regionXSize = 50;
  basecaller.regionYSize = 50;
  basecaller.runId = "BCLTE";
  basecaller.CF = 0.0;
  basecaller.IE = 0.0;
  basecaller.numWellsCalled = 0;
  basecaller.nextRegionX = 0;
  basecaller.nextRegionY = 0;


  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  opts.GetOption(basecaller.CF, "0.0", '-',  "cf");
  opts.GetOption(basecaller.IE, "0.0", '-',  "ie");
  opts.GetOption(inputDirectory, ".", '-',  "input-dir");
  opts.GetOption(outputDirectory, ".", '-',  "output-dir");
  opts.GetOption(singleCoreCafie, "false", '-',  "singlecorecafie");

  int numWorkers = 2*numCores();
  if (singleCoreCafie)
    numWorkers = 1;


  Mask mask (1, 1);
  if (mask.SetMask ((inputDirectory + "/bfmask.bin").c_str()))
    exit (EXIT_FAILURE);
  RawWells wells (inputDirectory.c_str(),"1.wells");
  //SetWellsToLiveBeadsOnly(wells,&mask);
  wells.OpenForIncrementalRead();

  basecaller.maskPtr = &mask;
  basecaller.wellsPtr = &wells;
  basecaller.rows = mask.H();
  basecaller.cols = mask.W();
  basecaller.flowOrder.SetFlowOrder(wells.FlowOrder(), wells.NumFlows());
  basecaller.numFlows = wells.NumFlows();


  basecaller.numRegionsX = (basecaller.cols +  basecaller.regionXSize - 1) / basecaller.regionXSize;
  basecaller.numRegionsY = (basecaller.rows +  basecaller.regionYSize - 1) / basecaller.regionYSize;
  basecaller.numRegions = basecaller.numRegionsX * basecaller.numRegionsY;

  basecaller.libKeyFlows.assign(basecaller.numFlows,0);
  basecaller.libNumKeyFlows = basecaller.flowOrder.BasesToFlows(libKey, &basecaller.libKeyFlows[0], basecaller.numFlows);

  basecaller.libSFF.Open(outputDirectory+"/rawlib.sff", basecaller.numRegions,
      basecaller.flowOrder, libKey);


  time_t startBasecall;
  time(&startBasecall);

  pthread_mutex_init(&basecaller.wellsAccessMutex, NULL);

  pthread_t worker_id[numWorkers];
  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    if (pthread_create(&worker_id[iWorker], NULL, BasecallerWorkerWrapper, &basecaller)) {
      printf("*Error* - problem starting thread\n");
      return 1;
    }

  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    pthread_join(worker_id[iWorker], NULL);

  pthread_mutex_destroy(&basecaller.wellsAccessMutex);

  time_t endBasecall;
  time(&endBasecall);

  basecaller.libSFF.Close();

  printf("\nBASECALLING: called %d of %d wells in %1.1f seconds with %d threads\n",
      basecaller.numWellsCalled, basecaller.rows*basecaller.cols, difftime(endBasecall,startBasecall), numWorkers);
  printf("Generated library SFF with %d reads\n", basecaller.libSFF.num_reads());

  return 0;
}
Пример #11
0
void BaseCallerLite::BasecallerWorker()
{

  while (true) {

    deque<int> wellX;
    deque<int> wellY;
    deque<vector<float> > wellMeasurements;

    pthread_mutex_lock(&wellsAccessMutex);

    if (nextRegionY >= numRegionsY) {
      pthread_mutex_unlock(&wellsAccessMutex);
      return;
    }

    int currentRegionX = nextRegionX;
    int currentRegionY = nextRegionY;
    int currentRegion = currentRegionX + numRegionsX * currentRegionY;


    int beginY = currentRegionY * regionYSize;
    int beginX = currentRegionX * regionXSize;
    int endY = min((currentRegionY+1) * regionYSize,rows);
    int endX = min((currentRegionX+1) * regionXSize,cols);
    wellsPtr->SetChunk(beginY, endY-beginY, beginX, endX-beginX, 0, numFlows);
    wellsPtr->ReadWells();
    for (int y = beginY; y < endY; y++) {
      for (int x = beginX; x < endX; x++) {
        if (!maskPtr->Match(x, y, MaskLib))
          continue;

        wellX.push_back(x);
        wellY.push_back(y);
        wellMeasurements.push_back(vector<float>());
        wellMeasurements.back().resize(numFlows);

        const WellData *w = wellsPtr->ReadXY(x, y);
        copy(w->flowValues, w->flowValues + numFlows, wellMeasurements.back().begin());
      }
    }

    if (currentRegionX == 0)
      printf("% 5d/% 5d: ", currentRegionY*regionYSize, rows);
    if (wellX.size() == 0)
      printf("  ");
    else if (wellX.size() < 750)
      printf(". ");
    else if (wellX.size() < 1500)
      printf("o ");
    else if (wellX.size() < 2250)
      printf("# ");
    else
      printf("$ ");

    nextRegionX++;
    if (nextRegionX == numRegionsX) {
      nextRegionX = 0;
      nextRegionY++;
      printf("\n");
    }
    fflush(NULL);

    pthread_mutex_unlock(&wellsAccessMutex);


    BasecallerRead currentRead;
    DPTreephaser dpTreephaser(flowOrder);
    dpTreephaser.SetModelParameters(CF, IE, 0);

    // Process the data
    deque<SFFEntry> libReads;

    deque<int>::iterator x = wellX.begin();
    deque<int>::iterator y = wellY.begin();
    deque<std::vector<float> >::iterator measurements = wellMeasurements.begin();

    for (; x != wellX.end() ; x++, y++, measurements++) {

      if (!maskPtr->Match(*x, *y, (MaskType)(MaskLib|MaskKeypass), MATCH_ALL))
        continue;

      libReads.push_back(SFFEntry());
      SFFEntry& readResults = libReads.back();
      stringstream wellNameStream;
      wellNameStream << runId << ":" << (*y) << ":" << (*x);
      readResults.name = wellNameStream.str();
      readResults.clip_qual_left = 4; // TODO
      readResults.clip_qual_right = 0;
      readResults.clip_adapter_left = 0;
      readResults.clip_adapter_right = 0;
      readResults.flowgram.resize(numFlows);

      int minReadLength = 8; // TODO

      currentRead.SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, &libKeyFlows[0], libNumKeyFlows - 1);

      dpTreephaser.NormalizeAndSolve5(currentRead, numFlows); // sliding window adaptive normalization

      readResults.n_bases = 0;
      for (int iFlow = 0; iFlow < numFlows; iFlow++) {
        readResults.flowgram[iFlow] = 100 * currentRead.solution[iFlow];
        readResults.n_bases += currentRead.solution[iFlow];
      }

      if(readResults.n_bases < minReadLength) {
        libReads.pop_back();
        continue;
      }

      bool isFailKeypass = false;
      for (int iFlow = 0; iFlow < (libNumKeyFlows-1); iFlow++)
        if (libKeyFlows[iFlow] != currentRead.solution[iFlow])
          isFailKeypass = true;

      if(isFailKeypass) {
        libReads.pop_back();
        continue;
      }

      readResults.flow_index.reserve(readResults.n_bases);
      readResults.bases.reserve(readResults.n_bases);
      readResults.quality.reserve(readResults.n_bases);

      unsigned int prev_used_flow = 0;
      for (int iFlow = 0; iFlow < numFlows; iFlow++) {
        for (hpLen_t hp = 0; hp < currentRead.solution[iFlow]; hp++) {
          readResults.flow_index.push_back(1 + iFlow - prev_used_flow);
          readResults.bases.push_back(flowOrder[iFlow]);
          readResults.quality.push_back(20); // BaseCallerLite is stripped of QV generator
          prev_used_flow = iFlow + 1;
        }
      }

    }

    libSFF.WriteRegion(currentRegion,libReads);
  }
}
Пример #12
0
void DoThreadedSignalProcessing ( CommandLineOpts &inception_state, ComplexMask &from_beadfind_mask,  char *chipType,
                                  ImageSpecClass &my_image_spec, SlicedPrequel &my_prequel_setup,SeqListClass &my_keys, bool pass_tau,
				  BkgFitterTracker *bkg_fitter_tracker)
{

  MemUsage ( "StartingBackground" );
  time_t init_start;
  time ( &init_start );

  bool restart = not inception_state.bkg_control.restart_from.empty();
  
  BkgFitterTracker GlobalFitter ( my_prequel_setup.num_regions );
  const std::string wellsFile = string(inception_state.sys_context.wellsFilePath) + "/" + inception_state.sys_context.wellsFileName;

  MakeDecisionOnGpuMultiFlowFit(inception_state);

  if( restart ){
    GlobalFitter = *bkg_fitter_tracker;
  }
  else {
    GlobalFitter.global_defaults.flow_global.SetFlowOrder ( inception_state.flow_context.flowOrder ); // @TODO: 2nd duplicated code instance
    // Build everything
    SetBkgModelGlobalDefaults ( GlobalFitter.global_defaults, inception_state.bkg_control,chipType,inception_state.sys_context.GetResultsFolder() );
    // >does not open wells file<
    fprintf(stdout, "Opening wells file %s ... ", wellsFile.c_str());
    RawWells preWells ( inception_state.sys_context.wellsFilePath, inception_state.sys_context.wellsFileName );
    fprintf(stdout, "done\n");
    CreateWellsFileForWriting ( preWells,from_beadfind_mask.my_mask, inception_state, NUMFB,
                              inception_state.flow_context.GetNumFlows(), my_image_spec.rows, my_image_spec.cols, chipType );
    // build trace tracking
    GlobalFitter.SetUpTraceTracking ( my_prequel_setup, inception_state, my_image_spec, from_beadfind_mask );
    GlobalFitter.AllocateRegionData(my_prequel_setup.region_list.size());


  }
  
  TinyInitializeUglyStaticForSignalProcessing ( GlobalFitter.global_defaults , inception_state);

  // plan (this happens whether we're from-disk or not):
  GlobalFitter.PlanComputation ( inception_state.bkg_control );

  // do we have a wells file?
  ION_ASSERT( isFile(wellsFile.c_str()), "Wells file "+ wellsFile + " does not exist" );

  RawWells rawWells ( inception_state.sys_context.wellsFilePath, inception_state.sys_context.wellsFileName );
  // plan (this happens whether we're from-disk or not):
  GlobalFitter.ThreadedInitialization ( rawWells, inception_state, from_beadfind_mask, inception_state.sys_context.GetResultsFolder(), my_image_spec,
					my_prequel_setup.smooth_t0_est,my_prequel_setup.region_list, my_prequel_setup.region_timing, my_keys, restart);

  MemUsage ( "AfterBgInitialization" );
  time_t init_end;
  time ( &init_end );

  fprintf ( stdout, "InitModel: %0.3lf sec.\n", difftime ( init_end,init_start ) );
  
  // Image Loading thread setup to grab flows in the background

  // ImageTracker constructed to load flows
  // must contact the GlobalFitter data that it will be associated with

  // from thin air each time:
  ImageTracker my_img_set ( inception_state.flow_context.getFlowSpan(),inception_state.img_control.ignoreChecksumErrors,inception_state.img_control.doSdat,inception_state.img_control.total_timeout );
  my_img_set.SetUpImageLoaderInfo ( inception_state, from_beadfind_mask, my_image_spec );
  my_img_set.DecideOnRawDatsToBufferForThisFlowBlock();
  my_img_set.FireUpThreads();


  // Now do threaded solving, going through all the flows

  GlobalFitter.SpinUp();
  // need to have initialized the regions for this
  GlobalFitter.SetRegionProcessOrder ();

  // determine maximum beads in a region for gpu memory allocations
  GlobalFitter.DetermineMaxLiveBeadsAndFramesAcrossAllRegionsForGpu();

  // ideally these are part of the rawWells object itself
  int write_well_flow_interval = inception_state.bkg_control.saveWellsFrequency*NUMFB; // goes with rawWells
  int flow_to_write_wells = -1000; // never happens unless we set it to happen
  
  // process all flows...
  // using actual flow values
  Timer flow_block_timer;
  Timer signal_proc_timer;
  for ( int flow = inception_state.flow_context.startingFlow; flow < (int)inception_state.flow_context.endingFlow; flow++ )
  {
    if ((flow % NUMFB) == 0)
      flow_block_timer.restart();

    // coordinate with the ImageLoader threads for this flow to be read in
    // WaitForFlowToLoad guarantees all flows up this one have been read in
    my_img_set.WaitForFlowToLoad ( flow );

    // ----- handle set up for processing this flow before we do anything needing
    bool last_flow = ( ( flow ) == ( inception_state.flow_context.GetNumFlows()- 1 ) ); // actually the literal >last< flow, not just the flow in a chunk, so we can handle not having a full chunk.

    // always write intervals starting at wherever we are starting
    // logic here:  open wells file at startingFlow, tell at what flow we need to write things out.
    if (NeedToOpenWellChunk(flow-inception_state.flow_context.startingFlow, write_well_flow_interval))
    {
      // chunk size is flow interval unless we run out of things to do in this interval
      int chunk_depth = FigureChunkDepth(flow,inception_state.flow_context.endingFlow,write_well_flow_interval);
      OpenExistingWellsForOneChunk(rawWells,flow,chunk_depth); // start
      flow_to_write_wells = flow+chunk_depth-1; 
    }
    
    // done with set up for anything this flow needs   
    signal_proc_timer.restart();

    // computation that modifies data
    GlobalFitter.ExecuteFitForFlow ( flow,my_img_set,last_flow ); // isolate this object so it can carry out actions in any order it chooses.
    ApplyClonalFilter ( *from_beadfind_mask.my_mask, inception_state.sys_context.GetResultsFolder(), GlobalFitter.sliced_chip,inception_state.bkg_control.enableBkgModelClonalFilter, flow );

    // no more computation
   
    signal_proc_timer.elapsed(); 
    fprintf ( stdout, "SigProc: pure compute time for flow %d: %.1f sec.\n", flow, signal_proc_timer.elapsed());
    MemUsage ( "Memory_Flow: " + ToStr ( flow ) );

    // capture the regional parameters every 20 flows, plus one bead per region at "random"
    // @TODO replace with clean hdf5 interface for sampling beads and region parameters
    GlobalFitter.DumpBkgModelRegionInfo ( inception_state.sys_context.GetResultsFolder(),flow,last_flow );
    GlobalFitter.DumpBkgModelBeadInfo ( inception_state.sys_context.GetResultsFolder(),flow,last_flow, inception_state.bkg_control.debug_bead_only>0 );
    WriteSampleRegion(inception_state.sys_context.GetResultsFolder(), GlobalFitter, flow, inception_state.bkg_control.region_vfrc_debug);

        
    // variables should be >captured< at the end of fitting
    //  and then the hdf5 dump happens across all threads as we synchronize
    GlobalFitter.all_params_hdf.IncrementalWrite (  flow,  last_flow );

    // done capturing parameters, close out this flow

    // logic here: wells file knows when it needs to write something out
    if (flow==flow_to_write_wells)
      WriteOneChunkAndClose(rawWells);

    // Needed for 318 chips. Decide how many DATs to read ahead for every block of NUMFB flows
    // also report timing for block of 20 flows from reading dat to writing 1.wells for this block 
    if ((flow % NUMFB) == (NUMFB - 1))
      my_img_set.DecideOnRawDatsToBufferForThisFlowBlock();

    // report timing for block of 20 flows from reading dat to writing 1.wells for this block 
    if (((flow % NUMFB) == (NUMFB - 1)) || last_flow)
      fprintf ( stdout, "Flow Block compute time for flow %d to %d: %.1f sec.\n",
              ((flow + 1) - NUMFB), flow, flow_block_timer.elapsed());

    // coordinate with the ImageLoader threads that this flow is done with
    // and release resources associated with this image
    // my_img_set knows what buffer is associated with the absolute flow
    my_img_set.FinishFlow ( flow );

    // stop GPU thread computing doing fitting of first block of flows
    if (flow == (NUMFB - 1))
      GlobalFitter.UnSpinMultiFlowFitGpuThreads();
  }
  
  if ( not inception_state.bkg_control.restart_next.empty() ){
    string filePath = inception_state.sys_context.analysisLocation + inception_state.bkg_control.restart_next;
    ofstream outStream(filePath.c_str(), ios_base::trunc);
    assert(outStream.good());
    //boost::archive::text_oarchive outArchive(outStream);
    boost::archive::binary_oarchive outArchive(outStream);

    // get region associated objects on disk first

    time_t begin_save_time;
    time ( &begin_save_time );

    ComplexMask *from_beadfind_mask_ptr = &from_beadfind_mask;
    BkgFitterTracker *GlobalFitter_ptr = &GlobalFitter;
    string svn_rev = IonVersion::GetSvnRev();
    
    outArchive
      << svn_rev
      << my_prequel_setup
      << from_beadfind_mask_ptr
      << GlobalFitter_ptr;    
    outStream.close();

    time_t finish_save_time;
    time ( &finish_save_time );
    fprintf ( stdout, "Writing restart state to archive %s took %0.1f secs",
	      filePath.c_str(), difftime ( finish_save_time, begin_save_time ));
  }
  rawWells.Close();

  GlobalFitter.UnSpinSingleFlowFitGpuThreads ();

  TinyDestroyUglyStaticForSignalProcessing();

  if ( inception_state.bkg_control.updateMaskAfterBkgModel )
    from_beadfind_mask.pinnedInFlow->UpdateMaskWithPinned ( from_beadfind_mask.my_mask ); //update maskPtr

  from_beadfind_mask.pinnedInFlow->DumpSummaryPinsPerFlow ( inception_state.sys_context.GetResultsFolder() );
}