void BundlerMatcher::open(const std::string& inputPath, const std::string& inputFilename, const std::string& outMatchFilename) { mInputPath = inputPath; if (!mIsInitialized) { std::cout << "Error : can not initialize opengl context for SiftGPU" <<std::endl; return; } if (!parseListFile(inputFilename)) { std::cout << "Error : can not open file : " <<inputFilename.c_str() <<std::endl; return; } //Sift Feature Extraction for (unsigned int i=0; i<mFilenames.size(); ++i) { int percent = (int)(((i+1)*100.0f) / (1.0f*mFilenames.size())); int nbFeature = extractSiftFeature(i); clearScreen(); std::cout << "[Extracting Sift Feature : " << percent << "%] - ("<<i+1<<"/"<<mFilenames.size()<<", #"<< nbFeature <<" features)"; } clearScreen(); std::cout << "[Sift Feature extracted]"<<std::endl; for (unsigned int i=0; i<mFilenames.size(); ++i) { int percent = (int)(((i+1)*100.0f) / (1.0f*mFilenames.size())); saveAsciiKeyFile(i); if (mBinaryKeyFileWritingEnabled) saveBinaryKeyFile(i); clearScreen(); std::cout << "[Saving Sift Key files: " << percent << "%] - ("<<i+1<<"/"<<mFilenames.size()<<")"; } saveVector(); clearScreen(); std::cout << "[Sift Key files saved]"<<std::endl; delete mSift; mSift = NULL; mMatcher->VerifyContextGL(); //Sift Matching int currentIteration = 0; if (mSequenceMatchingEnabled) //sequence matching (video input) { std::cout << "[Sequence matching enabled: length " << mSequenceMatchingLength << "]" << std::endl; int maxIterations = (int) (mFilenames.size()-mSequenceMatchingLength)*mSequenceMatchingLength + mSequenceMatchingLength*(mSequenceMatchingLength-1)/2; // (N-m).m + m(m-1)/2 for (unsigned int i=0; i<mFilenames.size()-1; ++i) { for (int j=1; j<=mSequenceMatchingLength; ++j) { int indexA = i; int indexB = i+j; if (indexB >= mFilenames.size()) continue; else { clearScreen(); int percent = (int) (currentIteration*100.0f / maxIterations*1.0f); std::cout << "[Matching Sift Feature : " << percent << "%] - (" << indexA << "/" << indexB << ")"; matchSiftFeature(indexA, indexB); currentIteration++; } } } } else //classic quadratic matching { int maxIterations = (int) mFilenames.size()*((int) mFilenames.size()-1)/2; // Sum(1 -> n) = n(n-1)/2 for (unsigned int i=0; i<mFilenames.size(); ++i) { for (unsigned int j=i+1; j<mFilenames.size(); ++j) { clearScreen(); int percent = (int) (currentIteration*100.0f / maxIterations*1.0f); std::cout << "[Matching Sift Feature : " << percent << "%] - (" << i << "/" << j << ")"; matchSiftFeature(i, j); currentIteration++; } } } clearScreen(); std::cout << "[Sift Feature matched]"<<std::endl; delete mMatcher; mMatcher = NULL; saveMatches(outMatchFilename); saveMatrix(); }
// main function int MergeBam::execute(int argc, char ** argv) { static struct option getopt_long_options[] = { // Input options { "list", required_argument, NULL, 'l'}, { "in", required_argument, NULL, 'i'}, { "out", required_argument, NULL, 'o'}, { "verbose", no_argument, NULL, 'v'}, { "log", required_argument, NULL, 'L'}, { NULL, 0, NULL, 0 }, }; // Adjust the arguments since it is called as ./bam mergeBam instead of // just mergeBam. --argc; ++argv; int n_option_index = 0; char c; bool b_verbose = false; vector<std::string> vs_in_bam_files; // input BAM files std::string s_list, s_out, s_logger; while ( ( c = getopt_long(argc, argv, "l:i:o:vL:", getopt_long_options, &n_option_index) ) != -1 ) { switch(c) { case 'i': vs_in_bam_files.push_back(optarg); break; case 'l': s_list = optarg; break; case 'o': s_out = optarg; break; case 'v': b_verbose = true; break; case 'L': s_logger = optarg; break; default: fprintf(stderr,"Unrecognized option %s",getopt_long_options[n_option_index].name); abort(); } } if ( s_logger.empty() ) { if(s_out.empty()) { s_logger = "-"; } else { s_logger = s_out + ".log"; } } // create a logger object, now possible to write logs/warnings/errors Logger::gLogger = new Logger(s_logger.c_str(), b_verbose); // every argument must correspond to an option if ( optind < argc ) { usage(); Logger::gLogger->error("non-option argument exist"); } // check the required arguments are nonempty if ( (vs_in_bam_files.empty() && s_list.empty()) || s_out.empty() ) { usage(); Logger::gLogger->error("At least one of the required argument is missing"); } if(!vs_in_bam_files.empty() && !s_list.empty()) { Logger::gLogger->error("Cannot specify both --in/-i and --list/-l"); } if(!s_list.empty()) { Logger::gLogger->writeLog("Input list file : %s",s_list.c_str()); } else { std::string bamList = ""; for(unsigned int i = 0; i < vs_in_bam_files.size(); i++) { if(i != 0) { bamList += ", "; } bamList += vs_in_bam_files[i]; } Logger::gLogger->writeLog("Input list file : %s", bamList.c_str()); } Logger::gLogger->writeLog("Output BAM file : %s",s_out.c_str()); Logger::gLogger->writeLog("Output log file : %s",s_logger.c_str()); Logger::gLogger->writeLog("Verbose mode : %s",b_verbose ? "On" : "Off"); vector<ReadGroup> v_readgroups; // readGroups corresponding to BAM file vector<ReadGroup> v_uniq_readgroups; // unique readGroups written to header // If the list file is being used instead of the individual bams, parse it. if(!s_list.empty()) { // parse the list file and fill the vectors above if ( parseListFile(s_list, vs_in_bam_files, v_readgroups, v_uniq_readgroups) == false ) { Logger::gLogger->error("Error in parsing the list file %s",s_list.c_str()); } if ( vs_in_bam_files.size() != v_readgroups.size() ) { Logger::gLogger->error("parseListFile gave different size for vs_in_bam_files, v_readgroups: %d, %d", vs_in_bam_files.size(), v_readgroups.size()); } } // sanity check uint32_t n_bams = vs_in_bam_files.size(); Logger::gLogger->writeLog("Total of %d BAM files are being merged",n_bams); if ( n_bams < 2 ) { Logger::gLogger->error("At least two BAM files must be specified for merging"); } // create SamFile and SamFileHeader object for each BAM file SamFile *p_in_bams = new SamFile[n_bams]; SamFileHeader *p_headers = new SamFileHeader[n_bams]; // read each BAM file and its header, // making sure that the headers are identical std::string firstHeaderNoRGPG = ""; std::string headerNoRGPG = ""; SamFileHeader newHeader; std::string firstHeaderString = ""; for(uint32_t i=0; i < n_bams; ++i) { if ( ! p_in_bams[i].OpenForRead(vs_in_bam_files[i].c_str()) ) { Logger::gLogger->error("Cannot open BAM file %s for reading",vs_in_bam_files[i].c_str()); } p_in_bams[i].setSortedValidation(SamFile::COORDINATE); p_in_bams[i].ReadHeader(p_headers[i]); // Extract the RGs from this header. if(i == 0) { // First header, so store it as the first header newHeader = p_headers[i]; // Determine the header without RG. parseOutRG(p_headers[i], firstHeaderNoRGPG, NULL); } else { parseOutRG(p_headers[i], headerNoRGPG, &newHeader); if(firstHeaderNoRGPG != headerNoRGPG) { Logger::gLogger->error("The headers are not identical at index %d",i); } if(newHeader.getReferenceInfo() != p_headers[i].getReferenceInfo()) { Logger::gLogger->error("The headers are not identical at index %d",i); } } } // first header will be the new header to be written to output // adding all possible readGroups to the new header for(uint32_t i=0; i < v_uniq_readgroups.size(); ++i) { addReadGroupToHeader(newHeader, v_uniq_readgroups[i]); } // Write an output file with new headers SamFile bam_out; if ( !bam_out.OpenForWrite(s_out.c_str()) ) { Logger::gLogger->error("Cannot open BAM file %s for writing",s_out.c_str()); } bam_out.setSortedValidation(SamFile::COORDINATE); bam_out.WriteHeader(newHeader); // create SamRecords and GenomicCoordinates for each input BAM file SamRecord* p_records = new SamRecord[n_bams]; uint64_t* p_gcoordinates = new uint64_t[n_bams]; // read the first record for every input BAM file for(uint32_t i=0; i < n_bams; ++i) { if ( p_in_bams[i].ReadRecord(p_headers[i],p_records[i]) ) { if ( p_records[i].isValid(p_headers[i]) ) { p_gcoordinates[i] = getGenomicCoordinate(p_records[i]); } else { Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure())); } } else { if ( p_in_bams[i].GetFailure() == SamStatus::NO_MORE_RECS ) { // the BAM file has no record p_gcoordinates[i] = MAX_GENOMIC_COORDINATE; } else { Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure())); } } } // Routine for writing output BAM file uint32_t nWrittenRecords = 0; // number of written BAM records while(true) { // scan the minimum index of genomic coordinate int min_idx = -1; uint64_t min_gcoordinate = MAX_GENOMIC_COORDINATE; for(uint32_t i=0; i < n_bams; ++i) { if ( min_gcoordinate > p_gcoordinates[i] ) { min_gcoordinate = p_gcoordinates[i]; min_idx = static_cast<int>(i); } } // If every file reached EOF, exit the loop if ( min_idx < 0 ) break; // If adding read groups, add the tag. if(!v_readgroups.empty()) { // add readGroup tag to the record to write and write to output BAM file //Logger::gLogger->writeLog("%d",min_idx); addReadGroupTag(p_records[min_idx], v_readgroups[min_idx]); } bam_out.WriteRecord(newHeader, p_records[min_idx]); ++nWrittenRecords; if ( nWrittenRecords % 1000000 == 0 ) { Logger::gLogger->writeLog("Writing %u records to the output file",nWrittenRecords); } // Read a record from the input BAM file if ( p_in_bams[min_idx].ReadRecord(p_headers[min_idx], p_records[min_idx]) ) { if ( p_records[min_idx].isValid(p_headers[min_idx]) ) { p_gcoordinates[min_idx] = getGenomicCoordinate(p_records[min_idx]); } else { // if invalid record found Logger::gLogger->error("Invalid record found at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure())); } } else { if ( p_in_bams[min_idx].GetFailure() == SamStatus::NO_MORE_RECS ) { p_gcoordinates[min_idx] = MAX_GENOMIC_COORDINATE; // Mark that all record has been read } else { Logger::gLogger->error("Cannot read record at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure())); } } } // close files and free allocated memory Logger::gLogger->writeLog("Finished writing %d records into the output BAM file",bam_out.GetCurrentRecordCount()); bam_out.Close(); for(uint32_t i=0; i < n_bams; ++i) { p_in_bams[i].Close(); } delete[] p_records; delete[] p_in_bams; delete[] p_headers; delete[] p_gcoordinates; delete Logger::gLogger; return 0; }