Example #1
0
int main (int argc, const char *argv[])
{
    BaseCallerSalute();

    time_t analysis_start_time;
    time(&analysis_start_time);

    Json::Value basecaller_json(Json::objectValue);
    DumpStartingStateOfProgram (argc,argv,analysis_start_time, basecaller_json["BaseCaller"]);

    //
    // Step 1. Process Command Line Options & Initialize Modules
    //

    BaseCallerParameters bc_params;
    OptArgs opts, null_opts;
    opts.ParseCmdLine(argc, argv);

    if (opts.GetFirstBoolean('h', "help", false) or argc == 1)
    	bc_params.PrintHelp();
    if (opts.GetFirstBoolean('v', "version", false)) {
        fprintf (stdout, "%s", IonVersion::GetFullVersion ("BaseCaller").c_str());
        exit (EXIT_SUCCESS);
    }

    // Command line processing *** Main directories and file locations first
    bc_params.InitializeFilesFromOptArgs(opts);
    bc_params.InitContextVarsFromOptArgs(opts);

    // Command line processing *** Options that have default values retrieved from wells or mask files
    RawWells wells ("", bc_params.GetFiles().filename_wells.c_str());
    if (!wells.OpenMetaData()) {
        fprintf (stderr, "Failed to retrieve metadata from %s\n", bc_params.GetFiles().filename_wells.c_str());
        exit (EXIT_FAILURE);
    }
    Mask mask (1, 1);
    if (mask.SetMask (bc_params.GetFiles().filename_mask.c_str()))
        exit (EXIT_FAILURE);

    string chip_type = "unknown";
    if (wells.KeyExists("ChipType"))
        wells.GetValue("ChipType", chip_type);

    // Command line processing *** Various general option and opts to classify and sample wells
    BaseCallerContext bc;
    bc.mask = &mask;
    bc.SetKeyAndFlowOrder(opts, wells.FlowOrder(), wells.NumFlows());
    bc.chip_subset.InitializeChipSubsetFromOptArgs(opts, mask.W(), mask.H());

    // Sampling options may reset command line arguments & change context
    bc_params.InitializeSamplingFromOptArgs(opts, bc.chip_subset.NumWells());
    bc_params.SetBaseCallerContextVars(bc);
    ClassifyAndSampleWells(bc, bc_params.GetSamplingOpts());


    // *** Setup for different datasets
    BarcodeDatasets datasets_calibration(bc.run_id, bc_params.GetFiles().calibration_panel_file);
    datasets_calibration.SetIonControl(bc.run_id);
    datasets_calibration.GenerateFilenames("IonControl","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets(bc.run_id, bc_params.GetFiles().lib_datasets_file);
    // Check if any of the template barcodes is equal to a control barcode
    if (datasets_calibration.DatasetInUse())
      datasets.RemoveControlBarcodes(datasets_calibration.json());
    datasets.GenerateFilenames("Library","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets_tf(bc.run_id);
    datasets_tf.SetTF(bc.process_tfs);
    datasets_tf.GenerateFilenames("TF","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory);

    BarcodeDatasets datasets_unfiltered_untrimmed(datasets);
    BarcodeDatasets datasets_unfiltered_trimmed(datasets);


    // *** Initialize remaining modules of BaseCallerContext
    vector<string> bam_comments;
    BaseCallerFilters filters(opts, bam_comments, bc.run_id, bc.flow_order, bc.keys, mask);
    bc.filters = &filters;

    BaseCallerMetricSaver metric_saver(opts, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY(), bc.flow_order.num_flows(),
                                bc.chip_subset.GetRegionSizeX(), bc.chip_subset.GetRegionSizeY(), bc_params.GetFiles().output_directory);
    bc.metric_saver = &metric_saver;

    // Calibration modules
    bc.recalibration.Initialize(opts, bc.flow_order);
    bc.recalModel.Initialize(opts, bam_comments, bc.run_id, bc.chip_subset);
    // initialize the per base quality score generator - dependent on calibration
    bc.quality_generator.Init(opts, chip_type, bc_params.GetFiles().input_directory, bc_params.GetFiles().output_directory, bc.recalibration.is_enabled());

    // Phase estimator
    bc.estimator.InitializeFromOptArgs(opts, bc.chip_subset, bc.keynormalizer);
    // Barcode classification
    BarcodeClassifier barcodes(opts, datasets, bc.flow_order, bc.keys, bc_params.GetFiles().output_directory,
    		                   bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY());
    bc.barcodes = &barcodes;
    // Make sure calibration barcodes are initialized with default parameters
    BarcodeClassifier calibration_barcodes(null_opts, datasets_calibration, bc.flow_order, bc.keys,
                          bc_params.GetFiles().output_directory, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY());
    bc.calibration_barcodes = &calibration_barcodes;

    // Command line parsing officially over. Detect unknown options.
    opts.CheckNoLeftovers();

    // Save some run info into our handy json file
    bc_params.SaveParamsToJson(basecaller_json, bc, chip_type);
    SaveBaseCallerProgress(0, bc_params.GetFiles().output_directory);

    MemUsage("RawWellsBasecalling");


    //
    // Step 2. Filter training and phase estimation
    //

    // Find distribution of clonal reads for use in read filtering:
    filters.TrainClonalFilter(bc_params.GetFiles().output_directory, wells, mask, bc.polyclonal_filter);
    MemUsage("ClonalPopulation");
    ReportState(analysis_start_time,"Polyclonal Filter Training Complete");

    // Library phasing parameter estimation
    MemUsage("BeforePhaseEstimation");
    if (not bc.estimator.HaveEstimates()) {
      wells.OpenForIncrementalRead();
      bc.estimator.DoPhaseEstimation(&wells, &mask, bc.flow_order, bc.keys, (bc_params.NumThreads() == 1));
      wells.Close();
    }
    bc.estimator.ExportResultsToJson(basecaller_json["Phasing"]);
    bc.estimator.ExportTrainSubsetToJson(basecaller_json["TrainSubset"]);

    SaveJson(basecaller_json, bc_params.GetFiles().filename_json);
    SaveBaseCallerProgress(10, bc_params.GetFiles().output_directory);  // Phase estimation assumed to be 10% of the work

    // Initialize Barcode Classifier(s) - dependent on phase estimates
    bc.barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR());
    bc.calibration_barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR());

    MemUsage("AfterPhaseEstimation");
    ReportState(analysis_start_time,"Phase Parameter Estimation Complete");
    MemUsage("BeforeBasecalling");


    //
    // Step 3. Open wells and output BAM files & initialize writers
    //

    // Library data set writer - always
    bc.lib_writer.Open(bc_params.GetFiles().output_directory, datasets, 0, bc.chip_subset.NumRegions(),
                 bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                 bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Calibration reads data set writer - if applicable
    if (bc.have_calibration_panel)
      bc.calib_writer.Open(bc_params.GetFiles().output_directory, datasets_calibration, 0, bc.chip_subset.NumRegions(),
                     bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                     bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Test fragments data set writer - if applicable
    if (bc.process_tfs)
      bc.tf_writer.Open(bc_params.GetFiles().output_directory, datasets_tf, 1, bc.chip_subset.NumRegions(),
                  bc.flow_order, bc.keys[1].bases(), filters.GetTFBeadAdapters(),
                  bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

    // Unfiltered / unfiltered untrimmed data set writers - if applicable
    if (!bc.unfiltered_set.empty()) {
    	bc.unfiltered_writer.Open(bc_params.GetFiles().unfiltered_untrimmed_directory, datasets_unfiltered_untrimmed, -1,
                      bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                      bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);

        bc.unfiltered_trimmed_writer.Open(bc_params.GetFiles().unfiltered_trimmed_directory, datasets_unfiltered_trimmed, -1,
                              bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(),
                              bc_params.NumBamWriterThreads(), basecaller_json, bam_comments);
    }

    //
    // Step 4. Execute threaded basecalling
    //

    time_t basecall_start_time;
    time(&basecall_start_time);

    pthread_mutex_init(&bc.mutex, NULL);

    pthread_t worker_id[bc_params.NumThreads()];
    for (int worker = 0; worker < bc_params.NumThreads(); worker++)
        if (pthread_create(&worker_id[worker], NULL, BasecallerWorker, &bc)) {
            printf("*Error* - problem starting thread\n");
            exit (EXIT_FAILURE);
        }

    for (int worker = 0; worker < bc_params.NumThreads(); worker++)
        pthread_join(worker_id[worker], NULL);

    pthread_mutex_destroy(&bc.mutex);

    time_t basecall_end_time;
    time(&basecall_end_time);


    //
    // Step 5. Close files and print out some statistics
    //

    printf("\n\nBASECALLING: called %d of %u wells in %1.0lf seconds with %d threads\n\n",
           filters.NumWellsCalled(), bc.chip_subset.NumWells(),
           difftime(basecall_end_time,basecall_start_time), bc_params.NumThreads());

    bc.lib_writer.Close(datasets, "Library");
    if (bc.have_calibration_panel)
    	bc.calib_writer.Close(datasets_calibration, "IonControl");
    if (bc.process_tfs)
        bc.tf_writer.Close(datasets_tf, "Test Fragments");

    filters.TransferFilteringResultsToMask(mask);

    if (!bc.unfiltered_set.empty()) {

        // Must happen after filters transferred to mask
        bc.WriteUnfilteredFilterStatus(bc_params.GetFiles());

        bc.unfiltered_writer.Close(datasets_unfiltered_untrimmed);
        bc.unfiltered_trimmed_writer.Close(datasets_unfiltered_trimmed);

        datasets_unfiltered_untrimmed.SaveJson(bc_params.GetFiles().unfiltered_untrimmed_directory+"/datasets_basecaller.json");
        datasets_unfiltered_trimmed.SaveJson(bc_params.GetFiles().unfiltered_trimmed_directory+"/datasets_basecaller.json");
    }

    metric_saver.Close();
    barcodes.Close(datasets);
    calibration_barcodes.Close(datasets_calibration);
    if (bc.have_calibration_panel) {
      datasets.json()["IonControl"]["datasets"] = datasets_calibration.json()["datasets"];
      datasets.json()["IonControl"]["read_groups"] = datasets_calibration.read_groups();
    }
    datasets.SaveJson(bc_params.GetFiles().output_directory+"/datasets_basecaller.json");
    if (bc.process_tfs)
        datasets_tf.SaveJson(bc_params.GetFiles().output_directory+"/datasets_tf.json");

    // Generate BaseCaller.json

    bc.lib_writer.SaveFilteringStats(basecaller_json, "lib", true);
    if (bc.have_calibration_panel)
      bc.calib_writer.SaveFilteringStats(basecaller_json, "control", false);
    if (bc.process_tfs)
      bc.tf_writer.SaveFilteringStats(basecaller_json, "tf", false);

    time_t analysis_end_time;
    time(&analysis_end_time);

    basecaller_json["BaseCaller"]["end_time"] = get_time_iso_string(analysis_end_time);
    basecaller_json["BaseCaller"]["total_duration"] = (int)difftime(analysis_end_time,analysis_start_time);
    basecaller_json["BaseCaller"]["basecalling_duration"] = (int)difftime(basecall_end_time,basecall_start_time);

    basecaller_json["Filtering"]["qv_histogram"] = Json::arrayValue;
    for (int qv = 0; qv < 50; ++qv)
        basecaller_json["Filtering"]["qv_histogram"][qv] = (Json::UInt64)bc.lib_writer.qv_histogram()[qv];

    SaveJson(basecaller_json, bc_params.GetFiles().filename_json);
    SaveBaseCallerProgress(100, bc_params.GetFiles().output_directory);

    mask.WriteRaw (bc_params.GetFiles().filename_filter_mask.c_str());
    mask.validateMask();

    MemUsage("AfterBasecalling");
    ReportState(analysis_start_time,"Basecalling Complete");

    return EXIT_SUCCESS;
}
Example #2
0
void SetExcludeMask (SpatialContext &loc_context, Mask *maskPtr, char *chipType, int rows, int cols)
{

  bool applyExclusionMask = true;

  /*
   *  Determine if this is a cropped dataset
   *  3 types:
   *    wholechip image dataset - rows,cols should be == to chip_len_x,chip_len_y
   *    cropped image dataset - above test is false AND chip_offset_x == -1
   *    blocked image dataset - above test is false AND chip_offset_x != -1
   */
  if ( (rows == loc_context.chip_len_y) && (cols == loc_context.chip_len_x))
  {
    //This is a wholechip dataset
    applyExclusionMask = true;
    fprintf (stderr, "This is a wholechip dataset so the exclusion mask will be applied Chip %s\n",chipType);
  }
  else
  {
    if (loc_context.chip_offset_x == -1)
    {
      applyExclusionMask = false;
      fprintf (stderr, "This is a cropped dataset so the exclusion mask will not be applied\n");
      fprintf (stderr, "rows=%d, chip_rows=%d,cols=%d, chip_cols=%d  for chip %s\n",rows, loc_context.chip_len_y,cols, loc_context.chip_len_x, chipType);
    }
    else
    {
      applyExclusionMask = false;
      fprintf (stderr, "This is a block dataset so the exclusion mask will not be applied\n");

    }
  }
  /*
   *  If we get a cropped region definition from the command line, we want the whole chip to be MaskExclude
   *  except for the defined crop region(s) which are marked MaskEmpty.  If no cropRegion defined on command line,
   *  then we proceed with marking the entire chip MaskEmpty
   */
  if (loc_context.numCropRegions == 0)
  {
    maskPtr->Init (cols, rows, MaskEmpty);
  }
  else
  {
    maskPtr->Init (cols, rows, MaskExclude);

    // apply one or more cropRegions, mark them MaskEmpty
    for (int q = 0; q < loc_context.numCropRegions; q++)
    {
      maskPtr->MarkRegion (loc_context.cropRegions[q], MaskEmpty);
    }
  }

  /*
   * Apply exclude mask from file
   */
  loc_context.exclusionMaskSet = false;
  if (chipType && applyExclusionMask)
  {
    char *exclusionMaskFileName = NULL;
    char filename[64] = { 0 };
    
    sprintf (filename, "exclusionMask_%s.bin", chipType);

    exclusionMaskFileName = GetIonConfigFile (filename);
    fprintf (stderr, "Exclusion Mask File = '%s'\n", exclusionMaskFileName);
    if (exclusionMaskFileName)
    {
      loc_context.exclusionMaskSet = true;

      Mask excludeMask (1, 1);
      excludeMask.SetMask (exclusionMaskFileName);
      free (exclusionMaskFileName);
      //--- Mark beadfind masks with MaskExclude bits from exclusionMaskFile
      maskPtr->SetThese (&excludeMask, MaskExclude);

    }
    else
    {
      fprintf (stderr, "WARNING: Exclusion Mask %s not applied\n", filename);
    }
  }
}
Example #3
0
int main (int argc, const char *argv[])
{

  if (argc == 1) {
    printf ("BaseCallerLite - Bare bone basecaller\n");
    printf ("\n");
    printf ("Usage:\n");
    printf ("BaseCallerLite [options]\n");
    printf ("\tOptions:\n");
    printf ("\t\tComing soon\n");
    printf ("\n");
    return 1;
  }

  string libKey = "TCAG";
  string inputDirectory = ".";
  string outputDirectory = ".";
  bool singleCoreCafie = false;

  BaseCallerLite basecaller;
  basecaller.regionXSize = 50;
  basecaller.regionYSize = 50;
  basecaller.runId = "BCLTE";
  basecaller.CF = 0.0;
  basecaller.IE = 0.0;
  basecaller.numWellsCalled = 0;
  basecaller.nextRegionX = 0;
  basecaller.nextRegionY = 0;


  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  opts.GetOption(basecaller.CF, "0.0", '-',  "cf");
  opts.GetOption(basecaller.IE, "0.0", '-',  "ie");
  opts.GetOption(inputDirectory, ".", '-',  "input-dir");
  opts.GetOption(outputDirectory, ".", '-',  "output-dir");
  opts.GetOption(singleCoreCafie, "false", '-',  "singlecorecafie");

  int numWorkers = 2*numCores();
  if (singleCoreCafie)
    numWorkers = 1;


  Mask mask (1, 1);
  if (mask.SetMask ((inputDirectory + "/bfmask.bin").c_str()))
    exit (EXIT_FAILURE);
  RawWells wells (inputDirectory.c_str(),"1.wells");
  //SetWellsToLiveBeadsOnly(wells,&mask);
  wells.OpenForIncrementalRead();

  basecaller.maskPtr = &mask;
  basecaller.wellsPtr = &wells;
  basecaller.rows = mask.H();
  basecaller.cols = mask.W();
  basecaller.flowOrder.SetFlowOrder(wells.FlowOrder(), wells.NumFlows());
  basecaller.numFlows = wells.NumFlows();


  basecaller.numRegionsX = (basecaller.cols +  basecaller.regionXSize - 1) / basecaller.regionXSize;
  basecaller.numRegionsY = (basecaller.rows +  basecaller.regionYSize - 1) / basecaller.regionYSize;
  basecaller.numRegions = basecaller.numRegionsX * basecaller.numRegionsY;

  basecaller.libKeyFlows.assign(basecaller.numFlows,0);
  basecaller.libNumKeyFlows = basecaller.flowOrder.BasesToFlows(libKey, &basecaller.libKeyFlows[0], basecaller.numFlows);

  basecaller.libSFF.Open(outputDirectory+"/rawlib.sff", basecaller.numRegions,
      basecaller.flowOrder, libKey);


  time_t startBasecall;
  time(&startBasecall);

  pthread_mutex_init(&basecaller.wellsAccessMutex, NULL);

  pthread_t worker_id[numWorkers];
  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    if (pthread_create(&worker_id[iWorker], NULL, BasecallerWorkerWrapper, &basecaller)) {
      printf("*Error* - problem starting thread\n");
      return 1;
    }

  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    pthread_join(worker_id[iWorker], NULL);

  pthread_mutex_destroy(&basecaller.wellsAccessMutex);

  time_t endBasecall;
  time(&endBasecall);

  basecaller.libSFF.Close();

  printf("\nBASECALLING: called %d of %d wells in %1.1f seconds with %d threads\n",
      basecaller.numWellsCalled, basecaller.rows*basecaller.cols, difftime(endBasecall,startBasecall), numWorkers);
  printf("Generated library SFF with %d reads\n", basecaller.libSFF.num_reads());

  return 0;
}