int main (int argc, const char *argv[]) { BaseCallerSalute(); time_t analysis_start_time; time(&analysis_start_time); Json::Value basecaller_json(Json::objectValue); DumpStartingStateOfProgram (argc,argv,analysis_start_time, basecaller_json["BaseCaller"]); // // Step 1. Process Command Line Options & Initialize Modules // BaseCallerParameters bc_params; OptArgs opts, null_opts; opts.ParseCmdLine(argc, argv); if (opts.GetFirstBoolean('h', "help", false) or argc == 1) bc_params.PrintHelp(); if (opts.GetFirstBoolean('v', "version", false)) { fprintf (stdout, "%s", IonVersion::GetFullVersion ("BaseCaller").c_str()); exit (EXIT_SUCCESS); } // Command line processing *** Main directories and file locations first bc_params.InitializeFilesFromOptArgs(opts); bc_params.InitContextVarsFromOptArgs(opts); // Command line processing *** Options that have default values retrieved from wells or mask files RawWells wells ("", bc_params.GetFiles().filename_wells.c_str()); if (!wells.OpenMetaData()) { fprintf (stderr, "Failed to retrieve metadata from %s\n", bc_params.GetFiles().filename_wells.c_str()); exit (EXIT_FAILURE); } Mask mask (1, 1); if (mask.SetMask (bc_params.GetFiles().filename_mask.c_str())) exit (EXIT_FAILURE); string chip_type = "unknown"; if (wells.KeyExists("ChipType")) wells.GetValue("ChipType", chip_type); // Command line processing *** Various general option and opts to classify and sample wells BaseCallerContext bc; bc.mask = &mask; bc.SetKeyAndFlowOrder(opts, wells.FlowOrder(), wells.NumFlows()); bc.chip_subset.InitializeChipSubsetFromOptArgs(opts, mask.W(), mask.H()); // Sampling options may reset command line arguments & change context bc_params.InitializeSamplingFromOptArgs(opts, bc.chip_subset.NumWells()); bc_params.SetBaseCallerContextVars(bc); ClassifyAndSampleWells(bc, bc_params.GetSamplingOpts()); // *** Setup for different datasets BarcodeDatasets datasets_calibration(bc.run_id, bc_params.GetFiles().calibration_panel_file); datasets_calibration.SetIonControl(bc.run_id); datasets_calibration.GenerateFilenames("IonControl","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory); BarcodeDatasets datasets(bc.run_id, bc_params.GetFiles().lib_datasets_file); // Check if any of the template barcodes is equal to a control barcode if (datasets_calibration.DatasetInUse()) datasets.RemoveControlBarcodes(datasets_calibration.json()); datasets.GenerateFilenames("Library","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory); BarcodeDatasets datasets_tf(bc.run_id); datasets_tf.SetTF(bc.process_tfs); datasets_tf.GenerateFilenames("TF","basecaller_bam",".basecaller.bam",bc_params.GetFiles().output_directory); BarcodeDatasets datasets_unfiltered_untrimmed(datasets); BarcodeDatasets datasets_unfiltered_trimmed(datasets); // *** Initialize remaining modules of BaseCallerContext vector<string> bam_comments; BaseCallerFilters filters(opts, bam_comments, bc.run_id, bc.flow_order, bc.keys, mask); bc.filters = &filters; BaseCallerMetricSaver metric_saver(opts, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY(), bc.flow_order.num_flows(), bc.chip_subset.GetRegionSizeX(), bc.chip_subset.GetRegionSizeY(), bc_params.GetFiles().output_directory); bc.metric_saver = &metric_saver; // Calibration modules bc.recalibration.Initialize(opts, bc.flow_order); bc.recalModel.Initialize(opts, bam_comments, bc.run_id, bc.chip_subset); // initialize the per base quality score generator - dependent on calibration bc.quality_generator.Init(opts, chip_type, bc_params.GetFiles().input_directory, bc_params.GetFiles().output_directory, bc.recalibration.is_enabled()); // Phase estimator bc.estimator.InitializeFromOptArgs(opts, bc.chip_subset, bc.keynormalizer); // Barcode classification BarcodeClassifier barcodes(opts, datasets, bc.flow_order, bc.keys, bc_params.GetFiles().output_directory, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY()); bc.barcodes = &barcodes; // Make sure calibration barcodes are initialized with default parameters BarcodeClassifier calibration_barcodes(null_opts, datasets_calibration, bc.flow_order, bc.keys, bc_params.GetFiles().output_directory, bc.chip_subset.GetChipSizeX(), bc.chip_subset.GetChipSizeY()); bc.calibration_barcodes = &calibration_barcodes; // Command line parsing officially over. Detect unknown options. opts.CheckNoLeftovers(); // Save some run info into our handy json file bc_params.SaveParamsToJson(basecaller_json, bc, chip_type); SaveBaseCallerProgress(0, bc_params.GetFiles().output_directory); MemUsage("RawWellsBasecalling"); // // Step 2. Filter training and phase estimation // // Find distribution of clonal reads for use in read filtering: filters.TrainClonalFilter(bc_params.GetFiles().output_directory, wells, mask, bc.polyclonal_filter); MemUsage("ClonalPopulation"); ReportState(analysis_start_time,"Polyclonal Filter Training Complete"); // Library phasing parameter estimation MemUsage("BeforePhaseEstimation"); if (not bc.estimator.HaveEstimates()) { wells.OpenForIncrementalRead(); bc.estimator.DoPhaseEstimation(&wells, &mask, bc.flow_order, bc.keys, (bc_params.NumThreads() == 1)); wells.Close(); } bc.estimator.ExportResultsToJson(basecaller_json["Phasing"]); bc.estimator.ExportTrainSubsetToJson(basecaller_json["TrainSubset"]); SaveJson(basecaller_json, bc_params.GetFiles().filename_json); SaveBaseCallerProgress(10, bc_params.GetFiles().output_directory); // Phase estimation assumed to be 10% of the work // Initialize Barcode Classifier(s) - dependent on phase estimates bc.barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR()); bc.calibration_barcodes->BuildPredictedSignals(bc.estimator.GetAverageCF(), bc.estimator.GetAverageIE(), bc.estimator.GetAverageDR()); MemUsage("AfterPhaseEstimation"); ReportState(analysis_start_time,"Phase Parameter Estimation Complete"); MemUsage("BeforeBasecalling"); // // Step 3. Open wells and output BAM files & initialize writers // // Library data set writer - always bc.lib_writer.Open(bc_params.GetFiles().output_directory, datasets, 0, bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(), bc_params.NumBamWriterThreads(), basecaller_json, bam_comments); // Calibration reads data set writer - if applicable if (bc.have_calibration_panel) bc.calib_writer.Open(bc_params.GetFiles().output_directory, datasets_calibration, 0, bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(), bc_params.NumBamWriterThreads(), basecaller_json, bam_comments); // Test fragments data set writer - if applicable if (bc.process_tfs) bc.tf_writer.Open(bc_params.GetFiles().output_directory, datasets_tf, 1, bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[1].bases(), filters.GetTFBeadAdapters(), bc_params.NumBamWriterThreads(), basecaller_json, bam_comments); // Unfiltered / unfiltered untrimmed data set writers - if applicable if (!bc.unfiltered_set.empty()) { bc.unfiltered_writer.Open(bc_params.GetFiles().unfiltered_untrimmed_directory, datasets_unfiltered_untrimmed, -1, bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(), bc_params.NumBamWriterThreads(), basecaller_json, bam_comments); bc.unfiltered_trimmed_writer.Open(bc_params.GetFiles().unfiltered_trimmed_directory, datasets_unfiltered_trimmed, -1, bc.chip_subset.NumRegions(), bc.flow_order, bc.keys[0].bases(), filters.GetLibBeadAdapters(), bc_params.NumBamWriterThreads(), basecaller_json, bam_comments); } // // Step 4. Execute threaded basecalling // time_t basecall_start_time; time(&basecall_start_time); pthread_mutex_init(&bc.mutex, NULL); pthread_t worker_id[bc_params.NumThreads()]; for (int worker = 0; worker < bc_params.NumThreads(); worker++) if (pthread_create(&worker_id[worker], NULL, BasecallerWorker, &bc)) { printf("*Error* - problem starting thread\n"); exit (EXIT_FAILURE); } for (int worker = 0; worker < bc_params.NumThreads(); worker++) pthread_join(worker_id[worker], NULL); pthread_mutex_destroy(&bc.mutex); time_t basecall_end_time; time(&basecall_end_time); // // Step 5. Close files and print out some statistics // printf("\n\nBASECALLING: called %d of %u wells in %1.0lf seconds with %d threads\n\n", filters.NumWellsCalled(), bc.chip_subset.NumWells(), difftime(basecall_end_time,basecall_start_time), bc_params.NumThreads()); bc.lib_writer.Close(datasets, "Library"); if (bc.have_calibration_panel) bc.calib_writer.Close(datasets_calibration, "IonControl"); if (bc.process_tfs) bc.tf_writer.Close(datasets_tf, "Test Fragments"); filters.TransferFilteringResultsToMask(mask); if (!bc.unfiltered_set.empty()) { // Must happen after filters transferred to mask bc.WriteUnfilteredFilterStatus(bc_params.GetFiles()); bc.unfiltered_writer.Close(datasets_unfiltered_untrimmed); bc.unfiltered_trimmed_writer.Close(datasets_unfiltered_trimmed); datasets_unfiltered_untrimmed.SaveJson(bc_params.GetFiles().unfiltered_untrimmed_directory+"/datasets_basecaller.json"); datasets_unfiltered_trimmed.SaveJson(bc_params.GetFiles().unfiltered_trimmed_directory+"/datasets_basecaller.json"); } metric_saver.Close(); barcodes.Close(datasets); calibration_barcodes.Close(datasets_calibration); if (bc.have_calibration_panel) { datasets.json()["IonControl"]["datasets"] = datasets_calibration.json()["datasets"]; datasets.json()["IonControl"]["read_groups"] = datasets_calibration.read_groups(); } datasets.SaveJson(bc_params.GetFiles().output_directory+"/datasets_basecaller.json"); if (bc.process_tfs) datasets_tf.SaveJson(bc_params.GetFiles().output_directory+"/datasets_tf.json"); // Generate BaseCaller.json bc.lib_writer.SaveFilteringStats(basecaller_json, "lib", true); if (bc.have_calibration_panel) bc.calib_writer.SaveFilteringStats(basecaller_json, "control", false); if (bc.process_tfs) bc.tf_writer.SaveFilteringStats(basecaller_json, "tf", false); time_t analysis_end_time; time(&analysis_end_time); basecaller_json["BaseCaller"]["end_time"] = get_time_iso_string(analysis_end_time); basecaller_json["BaseCaller"]["total_duration"] = (int)difftime(analysis_end_time,analysis_start_time); basecaller_json["BaseCaller"]["basecalling_duration"] = (int)difftime(basecall_end_time,basecall_start_time); basecaller_json["Filtering"]["qv_histogram"] = Json::arrayValue; for (int qv = 0; qv < 50; ++qv) basecaller_json["Filtering"]["qv_histogram"][qv] = (Json::UInt64)bc.lib_writer.qv_histogram()[qv]; SaveJson(basecaller_json, bc_params.GetFiles().filename_json); SaveBaseCallerProgress(100, bc_params.GetFiles().output_directory); mask.WriteRaw (bc_params.GetFiles().filename_filter_mask.c_str()); mask.validateMask(); MemUsage("AfterBasecalling"); ReportState(analysis_start_time,"Basecalling Complete"); return EXIT_SUCCESS; }
void count_sample(filter_counts& counts, deque<float>& ppf, deque<float>& ssq, Mask& mask, RawWells& wells, const vector<int>& key_ionogram) { // Take sample of reads from a RawWells file, and apply some simple // filters to identify problem reads. // Record number of reads in sample, and number of reads caught by // each filter. well_set sample = sample_lib(mask, counts._nsamp); WellData data; unsigned int nflows = wells.NumFlows(); vector<float> nrm(nflows); int flow0 = mixed_first_flow(); int flow1 = mixed_last_flow(); wells.ResetCurrentRegionWell(); // Some temporary code for comparing clonal filter in background model: ofstream out("basecaller_ppf_ssq.txt"); assert(out); while(!wells.ReadNextRegionData(&data)){ // Skip if this is not in the sample: well_coord wc(data.y, data.x); if(sample.find(wc) == sample.end()) continue; // Skip wells with infinite signal: bool finite = all_finite(data.flowValues, data.flowValues+nflows); if(not finite){ ++counts._ninf; continue; } // Key-normalize: float normalizer = ComputeNormalizerKeyFlows(data.flowValues, &key_ionogram[0], key_ionogram.size()); transform(data.flowValues, data.flowValues+nflows, nrm.begin(), bind2nd(divides<float>(),normalizer)); // Skip wells with bad key: bool good_key = key_is_good(nrm.begin(), key_ionogram.begin(), key_ionogram.end()); if(not good_key){ ++counts._nbad_key; continue; } // Skip possible super-mixed beads: float perc_pos = percent_positive(nrm.begin()+flow0, nrm.begin()+flow1);; if(perc_pos > mixed_ppf_cutoff()){ ++counts._nsuper; continue; } // Record ppf and ssq: float sum_frac = sum_fractional_part(nrm.begin()+flow0, nrm.begin()+flow1); ppf.push_back(perc_pos); ssq.push_back(sum_frac); // Some temporary code for comparing clonal filter in background model: out << setw(6) << data.y << setw(6) << data.x << setw(8) << setprecision(2) << fixed << perc_pos << setw(8) << setprecision(2) << fixed << sum_frac << setw(8) << setprecision(2) << fixed << normalizer << endl; } assert(ppf.size() == ssq.size()); }
int main (int argc, const char *argv[]) { if (argc == 1) { printf ("BaseCallerLite - Bare bone basecaller\n"); printf ("\n"); printf ("Usage:\n"); printf ("BaseCallerLite [options]\n"); printf ("\tOptions:\n"); printf ("\t\tComing soon\n"); printf ("\n"); return 1; } string libKey = "TCAG"; string inputDirectory = "."; string outputDirectory = "."; bool singleCoreCafie = false; BaseCallerLite basecaller; basecaller.regionXSize = 50; basecaller.regionYSize = 50; basecaller.runId = "BCLTE"; basecaller.CF = 0.0; basecaller.IE = 0.0; basecaller.numWellsCalled = 0; basecaller.nextRegionX = 0; basecaller.nextRegionY = 0; OptArgs opts; opts.ParseCmdLine(argc, argv); opts.GetOption(basecaller.CF, "0.0", '-', "cf"); opts.GetOption(basecaller.IE, "0.0", '-', "ie"); opts.GetOption(inputDirectory, ".", '-', "input-dir"); opts.GetOption(outputDirectory, ".", '-', "output-dir"); opts.GetOption(singleCoreCafie, "false", '-', "singlecorecafie"); int numWorkers = 2*numCores(); if (singleCoreCafie) numWorkers = 1; Mask mask (1, 1); if (mask.SetMask ((inputDirectory + "/bfmask.bin").c_str())) exit (EXIT_FAILURE); RawWells wells (inputDirectory.c_str(),"1.wells"); //SetWellsToLiveBeadsOnly(wells,&mask); wells.OpenForIncrementalRead(); basecaller.maskPtr = &mask; basecaller.wellsPtr = &wells; basecaller.rows = mask.H(); basecaller.cols = mask.W(); basecaller.flowOrder.SetFlowOrder(wells.FlowOrder(), wells.NumFlows()); basecaller.numFlows = wells.NumFlows(); basecaller.numRegionsX = (basecaller.cols + basecaller.regionXSize - 1) / basecaller.regionXSize; basecaller.numRegionsY = (basecaller.rows + basecaller.regionYSize - 1) / basecaller.regionYSize; basecaller.numRegions = basecaller.numRegionsX * basecaller.numRegionsY; basecaller.libKeyFlows.assign(basecaller.numFlows,0); basecaller.libNumKeyFlows = basecaller.flowOrder.BasesToFlows(libKey, &basecaller.libKeyFlows[0], basecaller.numFlows); basecaller.libSFF.Open(outputDirectory+"/rawlib.sff", basecaller.numRegions, basecaller.flowOrder, libKey); time_t startBasecall; time(&startBasecall); pthread_mutex_init(&basecaller.wellsAccessMutex, NULL); pthread_t worker_id[numWorkers]; for (int iWorker = 0; iWorker < numWorkers; iWorker++) if (pthread_create(&worker_id[iWorker], NULL, BasecallerWorkerWrapper, &basecaller)) { printf("*Error* - problem starting thread\n"); return 1; } for (int iWorker = 0; iWorker < numWorkers; iWorker++) pthread_join(worker_id[iWorker], NULL); pthread_mutex_destroy(&basecaller.wellsAccessMutex); time_t endBasecall; time(&endBasecall); basecaller.libSFF.Close(); printf("\nBASECALLING: called %d of %d wells in %1.1f seconds with %d threads\n", basecaller.numWellsCalled, basecaller.rows*basecaller.cols, difftime(endBasecall,startBasecall), numWorkers); printf("Generated library SFF with %d reads\n", basecaller.libSFF.num_reads()); return 0; }