void ProcessNonDaemonCommands(int argc, const char **argv) { if (strcmp(argv[1], "index") == 0) { if (CommandPipe == NULL) { GenomeIndex::runIndexer(argc - 2, argv + 2); } else { // // The error cases in index build don't really free memory properly, so we just don't allows it in daemon mode. // WriteErrorMessage("The index command is not available in daemon mode. Please run 'snap index' directly.\n"); } } else if (strcmp(argv[1], "single") == 0 || strcmp(argv[1], "paired") == 0) { for (int i = 1; i < argc; /* i is increased below */) { unsigned nArgsConsumed; if (strcmp(argv[i], "single") == 0) { SingleAlignerContext single; single.runAlignment(argc - i, argv + i, SNAP_VERSION, &nArgsConsumed); } else if (strcmp(argv[i], "paired") == 0) { PairedAlignerContext paired; paired.runAlignment(argc - i, argv + i, SNAP_VERSION, &nArgsConsumed); } else { fprintf(stderr, "Invalid command: %s\n\n", argv[i]); usage(); return; } _ASSERT(nArgsConsumed > 0); i += nArgsConsumed; } } else { WriteErrorMessage("Invalid command: %s\n\n", argv[1]); usage(); } }
bool Genome::openFileAndGetSizes(const char *filename, GenericFile **file, GenomeDistance *nBases, unsigned *nContigs, bool map) { if (map) { *file = GenericFile_map::open(filename); } else { *file = GenericFile::open(filename, GenericFile::ReadOnly); } if (*file == NULL) { WriteErrorMessage("Genome::openFileAndGetSizes: unable to open file '%s'\n",filename); return false; } char linebuf[2000]; char *retval = (*file)->gets(linebuf, sizeof(linebuf)); if (NULL == retval || 2 != sscanf(linebuf,"%lld %d\n", nBases, nContigs)) { (*file)->close(); delete *file; *file = NULL; WriteErrorMessage("Genome::openFileAndGetSizes: unable to read header\n"); return false; } return true; }
bool Genome::saveToFile(const char *fileName) const { // // Save file format is (in binary) the number of bases, the number of contigs, followed by // the contigs themselves, rounded up to 4K, followed by the bases. // FILE *saveFile = fopen(fileName,"wb"); if (saveFile == NULL) { WriteErrorMessage("Genome::saveToFile: unable to open file '%s'\n",fileName); return false; } fprintf(saveFile,"%d %d\n",nBases,nContigs); char *curChar = NULL; for (int i = 0; i < nContigs; i++) { for (int n = 0; n < strlen(contigs[i].name); n++){ curChar = contigs[i].name + n; if (*curChar == ' '){ *curChar = '_'; } } fprintf(saveFile,"%d %s\n",contigs[i].beginningOffset,contigs[i].name); } if (nBases != fwrite(bases,1,nBases,saveFile)) { WriteErrorMessage("Genome::saveToFile: fwrite failed\n"); fclose(saveFile); return false; } fclose(saveFile); return true; }
bool Genome::saveToFile(const char *fileName) const { // // Save file format is (in binary) the number of bases, the number of contigs, followed by // the contigs themselves, rounded up to 4K, followed by the bases. // FILE *saveFile = fopen(fileName,"wb"); if (saveFile == NULL) { WriteErrorMessage("Genome::saveToFile: unable to open file '%s'\n",fileName); return false; } fprintf(saveFile,"%lld %d\n",nBases, nContigs); char *curChar = NULL; for (int i = 0; i < nContigs; i++) { for (int n = 0; n < strlen(contigs[i].name); n++){ curChar = contigs[i].name + n; if (*curChar == ' '){ *curChar = '_'; } } fprintf(saveFile,"%lld %s\n",contigs[i].beginningLocation, contigs[i].name); } // // Write it out in (big) chunks. For whatever reason, fwrite with really big sizes seems not to // work as well as one would like. // const size_t max_chunk_size = 1 * 1024 * 1024 * 1024; // 1 GB (or GiB for the obsessively precise) size_t bases_to_write = nBases; size_t bases_written = 0; while (bases_to_write > 0) { size_t bases_this_write = __min(bases_to_write, max_chunk_size); if (bases_this_write != fwrite(bases + bases_written, 1, bases_this_write, saveFile)) { WriteErrorMessage("Genome::saveToFile: fwrite failed\n"); fclose(saveFile); return false; } bases_to_write -= bases_this_write; bases_written += bases_this_write; } _ASSERT(bases_written == nBases); fclose(saveFile); return true; }
void Genome::startContig(const char *contigName) { if (nContigs == maxContigs) { // // Reallocate (maybe we're sequencing a tree that's got lots of chromosomes). // int newMaxContigs = maxContigs * 2; Contig *newContigs = new Contig[newMaxContigs]; if (NULL == newContigs) { WriteErrorMessage("Genome: unable to reallocate contig array to size %d\n",newMaxContigs); soft_exit(1); } for (int i = 0; i < nContigs; i++) { newContigs[i] = contigs[i]; } delete [] contigs; contigs = newContigs; maxContigs = newMaxContigs; } contigs[nContigs].beginningOffset = nBases; size_t len = strlen(contigName) + 1; contigs[nContigs].name = new char[len]; contigs[nContigs].nameLength = (unsigned)len-1; strncpy(contigs[nContigs].name,contigName,len); contigs[nContigs].name[len-1] = '\0'; nContigs++; }
static void usage() { WriteErrorMessage( "Usage: snap <command> [<options>]\n" "Commands:\n" " index build a genome index\n" " single align single-end reads\n" " paired align paired-end reads\n" " daemon run in daemon mode--accept commands remotely\n" "Type a command without arguments to see its help.\n"); }
void Genome::addData(const char *data, size_t len) { if ((size_t)nBases + len > maxBases) { WriteErrorMessage("Tried to write beyond allocated genome size (or tried to write into a genome that was loaded from a file).\n" "Size = %lld\n",(_int64)maxBases); soft_exit(1); } memcpy(bases + nBases,data,len); nBases += (unsigned)len; }
void Genome::addData(const char *data, GenomeDistance len) { if (nBases + len > GenomeLocationAsInt64(maxBases)) { WriteErrorMessage("Tried to write beyond allocated genome size (or tried to write into a genome that was loaded from a file).\n" "Size = %lld\n", GenomeLocationAsInt64(maxBases)); soft_exit(1); } memcpy(bases + nBases,data,len); nBases += (unsigned)len; }
SimpleReadWriter(const FileFormat* i_format, DataWriter* i_writer, const Genome* i_genome, bool i_killIfTooSlow, bool i_emitInternalScore, char *i_internalScoreTag, bool i_ignoreAlignmentAdjustmentsForOm) : format(i_format), writer(i_writer), genome(i_genome), killIfTooSlow(i_killIfTooSlow), lastTooSlowCheck(0), emitInternalScore(i_emitInternalScore), ignoreAlignmentAdjustmentsForOm(i_ignoreAlignmentAdjustmentsForOm) { if (emitInternalScore) { if (strlen(i_internalScoreTag) != 2) { WriteErrorMessage("SimpleReadWriter: bogus internal score tag\n"); soft_exit(1); } strcpy(internalScoreTag, i_internalScoreTag); } else { internalScoreTag[0] = '\0'; } }
bool SNAPHashTable::saveToFile(const char *saveFileName, size_t *bytesWritten) { FILE *saveFile = fopen(saveFileName,"wb"); if (saveFile == NULL) { WriteErrorMessage("SNAPHashTable::SNAPHashTable(%s) fopen failed\n",saveFileName); return false; } bool worked = saveToFile(saveFile, bytesWritten); fclose(saveFile); return worked; }
Genome::Genome(GenomeDistance i_maxBases, GenomeDistance nBasesStored, unsigned i_chromosomePadding, unsigned i_maxContigs) : maxBases(i_maxBases), minLocation(0), maxLocation(i_maxBases), chromosomePadding(i_chromosomePadding), maxContigs(i_maxContigs), mappedFile(NULL) { bases = ((char *) BigAlloc(nBasesStored + 2 * N_PADDING)) + N_PADDING; if (NULL == bases) { WriteErrorMessage("Genome: unable to allocate memory for %llu bases\n", GenomeLocationAsInt64(maxBases)); soft_exit(1); } // Add N's for the N_PADDING bases before and after the genome itself memset(bases - N_PADDING, 'n', N_PADDING); memset(bases + nBasesStored, 'n', N_PADDING); nBases = 0; nContigs = 0; contigs = new Contig[maxContigs]; contigsByName = NULL; }
void SimpleReadWriter::checkIfTooSlow() { const _int64 tooSlowCheckPeriod = 5 * 60 * 1000; // 5 min in ms const _int64 tooSlowCheckMinReadsPerCheckPeriod = 5 * 60 * 1000; // One read/ms (or 1000 reads/s, but just on this thread). if (killIfTooSlow) { _int64 now = timeInMillis(); if (lastTooSlowCheck + tooSlowCheckPeriod <= now) { if (lastTooSlowCheck != 0 && writesSinceLastTooSlowCheck < tooSlowCheckMinReadsPerCheckPeriod) { WriteErrorMessage("Only wrote %lld writes during a %lld minute check period; we're probably out of memory and are giving up because of -kts\n", writesSinceLastTooSlowCheck, tooSlowCheckPeriod / (60 * 1000)); soft_exit(1); } lastTooSlowCheck = now; writesSinceLastTooSlowCheck = 0; } writesSinceLastTooSlowCheck++; } // if (killIfTooSlow) }
Genome::Genome(unsigned i_maxBases, unsigned nBasesStored, unsigned i_chromosomePadding) : maxBases(i_maxBases), minOffset(0), maxOffset(i_maxBases), chromosomePadding(i_chromosomePadding) { bases = ((char *) BigAlloc(nBasesStored + 2 * N_PADDING)) + N_PADDING; if (NULL == bases) { WriteErrorMessage("Genome: unable to allocate memory for %llu bases\n",(_int64)maxBases); soft_exit(1); } // Add N's for the N_PADDING bases before and after the genome itself memset(bases - N_PADDING, 'n', N_PADDING); memset(bases + nBasesStored, 'n', N_PADDING); nBases = 0; maxContigs = 32; // A power of two that's bigger than the usual number of chromosomes, so we don't have to // reallocate in practice. nContigs = 0; contigs = new Contig[maxContigs]; contigsByName = NULL; }
bool SimpleReadWriter::writePairs( const ReaderContext& context, Read **reads /* array of size NUM_READS_PER_PAIR */, PairedAlignmentResult *result, int nResults, SingleAlignmentResult **singleResults /* array of size NUM_READS_PER_PAIR*/, int *nSingleResults /* array of size NUM_READS_PER_PAIR*/, bool firstIsPrimary) { bool retVal = false; // // We need to write all alignments for the pair into the same buffer, so that a write from // some other thread doesn't separate them. We make two passes, trying to write into the // existing buffer, and then into a clean one. If that doesn't work, abort the alignment // run and ask for a bigger write buffer. // const int staticUsedBufferSize = 2000; size_t staticUsedBuffer[NUM_READS_PER_PAIR][staticUsedBufferSize]; GenomeLocation staticLocationBuffer[NUM_READS_PER_PAIR][staticUsedBufferSize]; GenomeLocation *finalLocations[NUM_READS_PER_PAIR]; size_t *usedBuffer[NUM_READS_PER_PAIR]; if (nResults + nSingleResults[0] <= staticUsedBufferSize && nResults + nSingleResults[1] <= staticUsedBufferSize) { usedBuffer[0] = staticUsedBuffer[0]; usedBuffer[1] = staticUsedBuffer[1]; finalLocations[0] = staticLocationBuffer[0]; finalLocations[1] = staticLocationBuffer[1]; } else { usedBuffer[0] = new size_t[nResults * NUM_READS_PER_PAIR + nSingleResults[0] + nSingleResults[1]]; usedBuffer[1] = usedBuffer[0] + nResults + nSingleResults[0]; finalLocations[0] = new GenomeLocation[nResults * NUM_READS_PER_PAIR + nSingleResults[0] + nSingleResults[1]]; finalLocations[1] = finalLocations[0] + nResults + nSingleResults[0]; } // // For paired reads, we need to have the same QNAME for both of them, and it needs to be unique among all other // reads in the dataset. For now, all we do is see if the read names end in /1 and /2, and if so truncate them. // size_t idLengths[NUM_READS_PER_PAIR]; idLengths[0] = reads[0]->getIdLength(); idLengths[1] = reads[1]->getIdLength(); if (idLengths[0] == idLengths[1] && idLengths[0] > 2 && reads[0]->getId()[idLengths[0]-2] == '/' && reads[1]->getId()[idLengths[0]-2] == '/') { char lastChar0, lastChar1; lastChar0 = reads[0]->getId()[idLengths[0] - 1]; lastChar1 = reads[1]->getId()[idLengths[1] - 1]; if ((lastChar0 == '1' || lastChar0 == '2') && (lastChar1 == '1' || lastChar1 == '2') && lastChar0 != lastChar1) { idLengths[0] -= 2; idLengths[1] -= 2; } } for (int pass = 0; pass < 2; pass++) { char* buffer; size_t size; size_t used = 0; bool fitInBuffer = true; if (!writer->getBuffer(&buffer, &size)) { goto done; } // // Write all of the pair alignments into the buffer. // for (int whichAlignmentPair = 0; whichAlignmentPair < nResults; whichAlignmentPair++) { reads[0]->setAdditionalFrontClipping(0); reads[1]->setAdditionalFrontClipping(0); GenomeLocation locations[2]; locations[0] = result[whichAlignmentPair].status[0] != NotFound ? result[whichAlignmentPair].location[0] : InvalidGenomeLocation; locations[1] = result[whichAlignmentPair].status[1] != NotFound ? result[whichAlignmentPair].location[1] : InvalidGenomeLocation; int writeOrder[2]; // The order in which we write the reads, which is just numerical by genome location. SO writeOrder[0] gets written first, and writeOrder[1] second. if (locations[0] <= locations[1]) { writeOrder[0] = 0; writeOrder[1] = 1; } else { writeOrder[0] = 1; writeOrder[1] = 0; } bool secondReadLocationChanged; int cumulativePositiveAddFrontClipping[NUM_READS_PER_PAIR] = { 0, 0 }; do { size_t tentativeUsed = 0; secondReadLocationChanged = false; for (int firstOrSecond = 0; firstOrSecond < NUM_READS_PER_PAIR; firstOrSecond++) { // looping over the order in which the reads are written, not the order in which they arrived int whichRead = writeOrder[firstOrSecond]; // // Loop until we get a write with no additional front clipping. // int addFrontClipping = 0; while (!format->writeRead(context, &lvc, buffer + used + tentativeUsed, size - used - tentativeUsed, &usedBuffer[firstOrSecond][whichAlignmentPair], idLengths[whichRead], reads[whichRead], result[whichAlignmentPair].status[whichRead], result[whichAlignmentPair].mapq[whichRead], locations[whichRead], result[whichAlignmentPair].direction[whichRead], whichAlignmentPair != 0 || !firstIsPrimary, &addFrontClipping, true, writeOrder[firstOrSecond] == 0, reads[1 - whichRead], result[whichAlignmentPair].status[1 - whichRead], locations[1 - whichRead], result[whichAlignmentPair].direction[1 - whichRead], result[whichAlignmentPair].alignedAsPair)) { if (0 == addFrontClipping || locations[whichRead] == InvalidGenomeLocation) { // // We failed because we ran out of buffer. // goto blownBuffer; } if (1 == firstOrSecond) { // // If the location of the second read changed, we need to redo the first one as well, because it includes an offset to the second read // secondReadLocationChanged = true; } const Genome::Contig *originalContig = genome->getContigAtLocation(locations[whichRead]); const Genome::Contig *newContig = genome->getContigAtLocation(locations[whichRead] + addFrontClipping); if (newContig != originalContig || NULL == newContig || locations[whichRead] + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) { // // Altering this would push us over a contig boundary. Just give up on the read. // result[whichAlignmentPair].status[whichRead] = NotFound; result[whichAlignmentPair].location[whichRead] = InvalidGenomeLocation; locations[whichRead] = InvalidGenomeLocation; } else { if (addFrontClipping > 0) { cumulativePositiveAddFrontClipping[firstOrSecond] += addFrontClipping; reads[whichRead]->setAdditionalFrontClipping(cumulativePositiveAddFrontClipping[firstOrSecond]); } locations[whichRead] += addFrontClipping; } } // While formatting didn't work tentativeUsed += usedBuffer[firstOrSecond][whichAlignmentPair]; } // for first or second read } while (secondReadLocationChanged); used += usedBuffer[0][whichAlignmentPair] + usedBuffer[1][whichAlignmentPair]; // // Both reads are written into the buffer. Save the final locations we used for when we commit. // for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) { finalLocations[whichRead][whichAlignmentPair] = locations[whichRead]; } } // for each pair. // // Now write the single alignments. // for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) { for (int whichAlignment = 0; whichAlignment < nSingleResults[whichRead]; whichAlignment++) { int addFrontClipping; reads[whichRead]->setAdditionalFrontClipping(0); GenomeLocation location = singleResults[whichRead][whichAlignment].status != NotFound ? singleResults[whichRead][whichAlignment].location : InvalidGenomeLocation; int cumulativePositiveAddFrontClipping = 0; while (!format->writeRead(context, &lvc, buffer + used, size - used, &usedBuffer[whichRead][nResults + whichAlignment], reads[whichRead]->getIdLength(), reads[whichRead], singleResults[whichRead][whichAlignment].status, singleResults[whichRead][whichAlignment].mapq, location, singleResults[whichRead][whichAlignment].direction, true, &addFrontClipping)) { if (0 == addFrontClipping) { goto blownBuffer; } const Genome::Contig *originalContig = genome->getContigAtLocation(location); const Genome::Contig *newContig = genome->getContigAtLocation(location + addFrontClipping); if (newContig != originalContig || NULL == newContig || location + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) { // // Altering this would push us over a contig boundary. Just give up on the read. // singleResults[whichRead][whichAlignment].status = NotFound; location = InvalidGenomeLocation; } else { if (addFrontClipping > 0) { cumulativePositiveAddFrontClipping += addFrontClipping; reads[whichRead]->setAdditionalFrontClipping(cumulativePositiveAddFrontClipping); } location += addFrontClipping; } } finalLocations[whichRead][nResults + whichAlignment] = location; used += usedBuffer[whichRead][nResults + whichAlignment]; } // For each single alignment of a read } // For each read // // They all fit into the buffer. // // // Commit the updates for the pairs. // for (int whichReadPair = 0; whichReadPair < nResults; whichReadPair++) { for (int firstOrSecond = 0; firstOrSecond < NUM_READS_PER_PAIR; firstOrSecond++) { // adjust for write order int writeFirstOrSecond = (!!firstOrSecond) ^ (finalLocations[0][whichReadPair] > finalLocations[1][whichReadPair]); // goofy looking !! converts int to bool writer->advance((unsigned)usedBuffer[firstOrSecond][whichReadPair], finalLocations[writeFirstOrSecond][whichReadPair] == InvalidGenomeLocation ? finalLocations[1 - writeFirstOrSecond][whichReadPair] : finalLocations[writeFirstOrSecond][whichReadPair]); } } // // Now commit the updates for the single reads. // for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) { for (int whichAlignment = 0; whichAlignment < nSingleResults[whichRead]; whichAlignment++) { writer->advance((unsigned)usedBuffer[whichRead][nResults + whichAlignment], finalLocations[whichRead][nResults + whichAlignment]); } } retVal = true; break; blownBuffer: if (pass > 0) { WriteErrorMessage("Unable to fit all alignments for one read pair into a single write buffer. Increase the size of the write buffer with -wbs, or reduce the number of alignments with -om or -omax\n"); WriteErrorMessage("Read id: '%.*s'\n", reads[0]->getIdLength(), reads[0]->getId()); soft_exit(1); } if (!writer->nextBatch()) { goto done; } } // For each buffer full pass done: if (usedBuffer[0] != staticUsedBuffer[0]) { delete[] usedBuffer[0]; usedBuffer[0] = usedBuffer[1] = NULL; delete[] finalLocations[0]; finalLocations[0] = finalLocations[1] = NULL; } reads[0]->setAdditionalFrontClipping(0); reads[1]->setAdditionalFrontClipping(0); return retVal; }
// DumpErrorInfo queries SQLOLEDB error interfaces, retrieving available // status or error information. inline void ComSession::DumpErrorInfo( std::wostream* pOstr, IUnknown* pObjectWithError, REFIID rErrorInterface) const { // Interfaces used in the example. CComPtr<IErrorInfo> pIErrorInfoAll; CComPtr<IErrorRecords> pIErrorRecords; CComPtr<ISupportErrorInfo> pISupportErrorInfo; // Only ask for error information if the interface supports // it. if (pObjectWithError == NULL || FAILED(pObjectWithError->QueryInterface(IID_ISupportErrorInfo, reinterpret_cast<void**>(&pISupportErrorInfo)))) { *pOstr << L"SupportErrorErrorInfo interface not supported" << std::endl; return; } if (FAILED(pISupportErrorInfo->InterfaceSupportsErrorInfo(rErrorInterface))) { *pOstr << L"InterfaceWithError interface not supported" << std::endl; return; } // Do not test the return of GetErrorInfo. It can succeed and return // a NULL pointer in pIErrorInfoAll. Simply test the pointer. HRESULT r = GetErrorInfo(0, &pIErrorInfoAll); if ((pIErrorInfoAll == NULL) || FAILED(r)) { *pOstr << L"GetErrorInfo failed." << std::endl; return; } // Test to see if it's a valid OLE DB IErrorInfo interface // exposing a list of records. if (FAILED(pIErrorInfoAll->QueryInterface(IID_IErrorRecords, reinterpret_cast<void**>(&pIErrorRecords))) ) { // IErrorInfo is valid; get the source and // description to see what it is. WriteErrorMessage(pIErrorInfoAll, pOstr); return; } // Basic error information from GetBasicErrorInfo. ERRORINFO errorinfo; // Number of error records. ULONG nRecs; ULONG nRec; // ISQLErrorInfo parameters. CComBSTR bstrSQLSTATE; LONG lNativeError; pIErrorRecords->GetRecordCount(&nRecs); // Within each record, retrieve information from each // of the defined interfaces. for (nRec = 0; nRec < nRecs; ++nRec) { // From IErrorRecords, get the HRESULT and a reference // to the ISQLErrorInfo interface. pIErrorRecords->GetBasicErrorInfo(nRec, &errorinfo); CComPtr<ISQLErrorInfo> pISQLErrorInfo; CComPtr<IErrorInfo> pIErrorInfoRecord; pIErrorRecords->GetCustomErrorObject( nRec, IID_ISQLErrorInfo, reinterpret_cast<IUnknown**>(&pISQLErrorInfo)); if (pISQLErrorInfo != NULL) { pISQLErrorInfo->GetSQLInfo(&bstrSQLSTATE, &lNativeError); // Display the SQLSTATE and native error values. *pOstr << L"SQLSTATE:\t" << bstrSQLSTATE.m_str << std::endl; } if (SUCCEEDED(pIErrorRecords->GetErrorInfo(nRec, ::GetSystemDefaultLCID(), &pIErrorInfoRecord))) { WriteErrorMessage(pIErrorInfoRecord, pOstr); } } }
void RunDaemonMode(int argc, const char **argv) { if (argc < 2 || argc > 3) { daemonUsage(); } printf("SNAP in daemon mode, waiting for commands to execute\n"); const char *pipeName = argc == 3 ? argv[2] : DEFAULT_NAMED_PIPE_NAME; CommandPipe = OpenNamedPipe(pipeName, true); if (NULL == CommandPipe) { WriteErrorMessage("Unable to open named pipe for command IO.\n"); soft_exit(1); } const size_t commandBufferSize = 10000; // Yes, this is fixed size, no it's not a buffer overflow. The named pipe reader just quits if it's too long. char commandBuffer[commandBufferSize]; // // Format of commands is argc (in ascii) followed by argc arguments, each in one line. // for (;;) { if (!ReadFromNamedPipe(CommandPipe, commandBuffer, commandBufferSize)) { CloseNamedPipe(CommandPipe); CommandPipe = NULL; WriteStatusMessage("Named pipe closed. Exiting\n"); soft_exit_no_print(0); } int argc = atoi(commandBuffer); if (0 == argc) { WriteErrorMessage("Expected argument count on named pipe, got '%s'; ignoring.\n", commandBuffer); } else { char **argv = new char*[argc]; for (int i = 0; i < argc; i++) { argv[i] = new char[commandBufferSize]; if (!ReadFromNamedPipe(CommandPipe, argv[i], commandBufferSize)) { CloseNamedPipe(CommandPipe); CommandPipe = NULL; WriteStatusMessage("Error reading argument #%d from named pipe.\n", i); soft_exit(1); } } // for each arg if (argc > 1 && strcmp(argv[1], "exit") == 0) { WriteStatusMessage("SNAP server exiting by request\n"); WriteToNamedPipe(CommandPipe, CommandExecutedString); soft_exit_no_print(1); } printf("Executing command: "); for (int i = 1; i < argc; i++) { printf("%s ", argv[i]); } printf("\n"); ProcessNonDaemonCommands(argc, (const char **) argv); printf("\n"); for (int i = 0; i < argc; i++) { delete[] argv[i]; argv[i] = NULL; } delete[] argv; argv = NULL; } WriteToNamedPipe(CommandPipe, CommandExecutedString); } }
bool SimpleReadWriter::writeReads( const ReaderContext& context, Read *read, SingleAlignmentResult *results, int nResults, bool firstIsPrimary) { char* buffer; size_t size; size_t used; bool result = false; for (int i = 0; i < nResults; i++) { if (results[i].status == NotFound) { results[i].location = InvalidGenomeLocation; } } // // We need to keep track of the offsets of all of the alignments in the output buffer so we can commit them. However, // we want to avoid dynamic memory allocation as much as possible. So, we have a static buffer on the stack that's big enough // for the great majority of cases, and then allocate dynamically if that's too small. Makes for annoying, but efficient // code. // const int staticUsedBufferSize = 2000; size_t staticUsedBuffer[staticUsedBufferSize]; GenomeLocation staticFinalLocationsBuffer[staticUsedBufferSize]; size_t *usedBuffer; GenomeLocation *finalLocations; if (nResults <= staticUsedBufferSize) { usedBuffer = staticUsedBuffer; finalLocations = staticFinalLocationsBuffer; } else { usedBuffer = new size_t[nResults]; finalLocations = new GenomeLocation[nResults]; } for (int pass = 0; pass < 2; pass++) { // Make two passes, one with whatever buffer space is left and one with a clean buffer. bool blewBuffer = false; if (!writer->getBuffer(&buffer, &size)) { goto done; } used = 0; for (int whichResult = 0; whichResult < nResults; whichResult++) { int addFrontClipping = 0; read->setAdditionalFrontClipping(0); int cumulativeAddFrontClipping = 0; finalLocations[whichResult] = results[whichResult].location; while (!format->writeRead(context, &lvc, buffer + used, size - used, &usedBuffer[whichResult], read->getIdLength(), read, results[whichResult].status, results[whichResult].mapq, finalLocations[whichResult], results[whichResult].direction, (whichResult > 0) || !firstIsPrimary, &addFrontClipping)) { if (0 == addFrontClipping) { blewBuffer = true; break; } // redo if read modified (e.g. to add soft clipping, or move alignment for a leading I. const Genome::Contig *originalContig = results[whichResult].status == NotFound ? NULL : genome->getContigAtLocation(results[whichResult].location); const Genome::Contig *newContig = results[whichResult].status == NotFound ? NULL : genome->getContigAtLocation(results[whichResult].location + addFrontClipping); if (newContig == NULL || newContig != originalContig || finalLocations[whichResult] + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) { // // Altering this would push us over a contig boundary. Just give up on the read. // results[whichResult].status = NotFound; results[whichResult].location = InvalidGenomeLocation; finalLocations[whichResult] = InvalidGenomeLocation; } else { cumulativeAddFrontClipping += addFrontClipping; if (addFrontClipping > 0) { read->setAdditionalFrontClipping(cumulativeAddFrontClipping); } finalLocations[whichResult] = results[whichResult].location + cumulativeAddFrontClipping; } } // while formatting doesn't work if (blewBuffer) { break; } used += usedBuffer[whichResult]; _ASSERT(used <= size); if (used > 0xffffffff) { WriteErrorMessage("SimpleReadWriter:writeReads: used too big\n"); soft_exit(1); } } // for each result. if (!blewBuffer) { // // Everything worked OK. // for (int whichResult = 0; whichResult < nResults; whichResult++) { writer->advance((unsigned)usedBuffer[whichResult], finalLocations[whichResult]); } result = true; goto done; } if (pass == 1) { WriteErrorMessage("Failed to write into fresh buffer; trying providing the -wbs switch with a larger value\n"); soft_exit(1); } if (!writer->nextBatch()) { goto done; } } // for each pass (i.e., not empty, empty buffer) done: if (usedBuffer != staticUsedBuffer) { delete[] usedBuffer; usedBuffer = NULL; delete[] finalLocations; finalLocations = NULL; } read->setAdditionalFrontClipping(0); return result; }
bool SNAPHashTable::saveToFile(FILE *saveFile, size_t *bytesWritten) { *bytesWritten = 0; if (1 != fwrite(&magic,sizeof(magic), 1, saveFile)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite magic number failed\n"); return false; } (*bytesWritten) += sizeof(magic); if (1 != fwrite(&tableSize,sizeof(tableSize), 1, saveFile)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite table size failed\n"); return false; } (*bytesWritten) += sizeof(tableSize); if (1 != fwrite(&usedElementCount,sizeof(usedElementCount), 1, saveFile)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite used element count size failed\n"); return false; } (*bytesWritten) += sizeof(usedElementCount); if (1 != fwrite(&keySizeInBytes, sizeof(keySizeInBytes), 1, saveFile)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite key size failed\n"); return false; } (*bytesWritten) += sizeof(keySizeInBytes); if (1 != fwrite(&valueSizeInBytes, sizeof(valueSizeInBytes), 1, saveFile)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite data size failed\n"); return false; } (*bytesWritten) += sizeof(valueSizeInBytes); if (1 != fwrite(&valueCount, sizeof(valueCount), 1, saveFile)) { WriteErrorMessage("SNAPHashTable: fwrite value count failed\n"); return false; } (*bytesWritten) += sizeof(valueCount); if (1 != fwrite(&invalidValueValue, valueSizeInBytes, 1, saveFile)) { WriteErrorMessage("SNAPHashTable: fwrite invalid value value failed\n"); return false; } (*bytesWritten) += valueSizeInBytes; size_t maxWriteSize = 100 * 1024 * 1024; size_t writeOffset = 0; while (writeOffset < tableSize * elementSize) { size_t amountToWrite = __min(maxWriteSize,tableSize * elementSize - writeOffset); size_t thisWrite = fwrite((char*)Table + writeOffset, 1, amountToWrite, saveFile); if (thisWrite < amountToWrite) { WriteErrorMessage("SNAPHashTable::saveToFile: fwrite failed, %d\n" "handle %p, addr %p, atr: %lu, &bw %p\n",errno, saveFile,(char*)Table + writeOffset, amountToWrite, &bytesWritten); return false; } writeOffset += thisWrite; (*bytesWritten) += thisWrite; } return true; }
SNAPHashTable *SNAPHashTable::loadFromBlob(GenericFile_Blob *loadFile) { SNAPHashTable *table = new SNAPHashTable(); unsigned fileMagic; if (sizeof(magic) != loadFile->read(&fileMagic, sizeof(magic))) { WriteErrorMessage("Magic number mismatch on hash table load. %d != %d\n", fileMagic, magic); soft_exit(1); } if (fileMagic != magic) { WriteErrorMessage("SNAPHashTable: magic number mismatch. Perhaps you have a corruped index. %d != %d\n", fileMagic, magic); soft_exit(1); } if (sizeof(table->tableSize) != loadFile->read(&table->tableSize, sizeof(table->tableSize))) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fread table size failed\n"); soft_exit(1); } if (sizeof(table->usedElementCount) != loadFile->read(&table->usedElementCount, sizeof(table->usedElementCount))) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fread used element count failed\n"); soft_exit(1); } if (sizeof(table->keySizeInBytes) != loadFile->read(&table->keySizeInBytes, sizeof(table->keySizeInBytes))) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fread keySizeInBytes size failed. Perhaps this is an old format hash table and needs to be rebuilt.\n"); soft_exit(1); } if (table->keySizeInBytes < 4 || table->keySizeInBytes > 8) { WriteErrorMessage("SNAPHashTable::SNAPHashTable Key size must be between 4 and 8 inclusive. Perhaps this is an old format hash table and needs to be rebuilt.\n"); soft_exit(1); } if (sizeof(table->valueSizeInBytes) != loadFile->read(&table->valueSizeInBytes, sizeof(table->valueSizeInBytes))) { WriteErrorMessage("SNAPHashTable::SNAPHashTable fread dataSizeInBytes size failed. Perhaps this is an old format hash table and needs to be rebuilt.\n"); soft_exit(1); } if (table->valueSizeInBytes == 0 || table->valueSizeInBytes > sizeof(_uint64)) { // // It must be at least one byte, because we need that much for the unused value value. The code stuffs // values into _uint64, so it can't be bigger than that. // WriteErrorMessage( "SNAPHashTable::SNAPHashTable value size in bytes (%d) must be between 1 and 8. Perhaps you have a hash table from a future version of SNAP? Or else it's corrupt.\n", table->valueSizeInBytes); soft_exit(1); } if (sizeof(table->valueCount) != loadFile->read(&table->valueCount, sizeof(table->valueCount))) { WriteErrorMessage("SNAPHashTable::SNAPHashTable: value count failed to read.\n"); soft_exit(1); } if (table->valueCount == 0 || table->valueCount > 2) { // Technically, > 2 would work fine with the code, but SNAP doesn't use it, so the check is here to detect corruption. WriteErrorMessage("SNAPHashTable::SNAPHashTable: invalid value count (%d), possible corruption or bad file format.\n", table->valueCount); soft_exit(1); } table->invalidValueValue = 0; // Need this in case valueSizeInBytes < sizeof(ValueType) if (table->valueSizeInBytes != loadFile->read(&table->invalidValueValue, table->valueSizeInBytes)) { WriteErrorMessage("SNAPHashTable::SNAPHashTable: unable to read invalid value value\n"); soft_exit(1); } if (table->tableSize <= 0) { WriteErrorMessage("SNAPHashTable::SNAPHashTable Zero or negative hash table size\n"); soft_exit(1); } table->elementSize = table->keySizeInBytes + table->valueSizeInBytes * table->valueCount; size_t bytesMapped; table->Table = loadFile->mapAndAdvance(table->tableSize * table->elementSize, &bytesMapped); if (bytesMapped != table->tableSize * table->elementSize) { WriteErrorMessage("SNAPHashTable: unable to map table\n"); soft_exit(1); } table->ownsMemoryForTable = false; return table; }
const Genome * Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, GenomeLocation minLocation, GenomeDistance length, bool map) { GenericFile *loadFile; GenomeDistance nBases; unsigned nContigs; if (!openFileAndGetSizes(fileName, &loadFile, &nBases, &nContigs, map)) { // // It already printed an error. Just fail. // return NULL; } GenomeLocation maxLocation(nBases); if (0 == length) { length = maxLocation - minLocation; } else { // // Don't let length go beyond nBases. // length = __min(length, maxLocation - minLocation); maxLocation = minLocation + length; } Genome *genome = new Genome(nBases, length, chromosomePadding); genome->nBases = nBases; genome->nContigs = genome->maxContigs = nContigs; genome->contigs = new Contig[nContigs]; genome->minLocation = minLocation; if (GenomeLocationAsInt64(minLocation) >= nBases) { WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n", GenomeLocationAsInt64(minLocation), nBases); soft_exit(-1); } genome->maxLocation = maxLocation; static const unsigned contigNameBufferSize = 512; char contigNameBuffer[contigNameBufferSize]; unsigned n; size_t contigSize; char *curName; for (unsigned i = 0; i < nContigs; i++) { if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){ WriteErrorMessage("Unable to read contig description\n"); delete genome; return NULL; } for (n = 0; n < contigNameBufferSize; n++){ if (contigNameBuffer[n] == ' ') { contigNameBuffer[n] = '\0'; break; } } _int64 contigStart; if (1 != sscanf(contigNameBuffer, "%lld", &contigStart)) { WriteErrorMessage("Unable to parse contig start in genome file '%s', '%s%'\n", fileName, contigNameBuffer); soft_exit(1); } genome->contigs[i].beginningLocation = GenomeLocation(contigStart); contigNameBuffer[n] = ' '; n++; // increment n so we start copying at the position after the space contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n genome->contigs[i].name = new char[contigSize + 1]; genome->contigs[i].nameLength = (unsigned)contigSize; curName = genome->contigs[i].name; for (unsigned pos = 0; pos < contigSize; pos++) { curName[pos] = contigNameBuffer[pos + n]; } curName[contigSize] = '\0'; } if (0 != loadFile->advance(GenomeLocationAsInt64(minLocation))) { WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n"); soft_exit(1); } size_t readSize; if (map) { GenericFile_map *mappedFile = (GenericFile_map *)loadFile; genome->bases = (char *)mappedFile->mapAndAdvance(length, &readSize); genome->mappedFile = mappedFile; mappedFile->prefetch(); } else { readSize = loadFile->read(genome->bases, length); loadFile->close(); delete loadFile; loadFile = NULL; } if (length != readSize) { WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, readSize); delete loadFile; delete genome; return NULL; } genome->fillInContigLengths(); genome->sortContigsByName(); return genome; }
// // Makes a copy of a Genome, but with only one of the sex chromosomes. // // The fate of the mitochondrion is that of the X chromosome. // Genome * Genome::copy(bool copyX, bool copyY, bool copyM) const { Genome *newCopy = new Genome(getCountOfBases(),getCountOfBases(), chromosomePadding); if (NULL == newCopy) { WriteErrorMessage("Genome::copy: failed to allocate space for copy.\n"); return NULL; } const Genome::Contig *currentContig = NULL; const Genome::Contig *nextContig = getContigAtLocation(0); unsigned offsetInReference = 0; while (offsetInReference < getCountOfBases()) { if (NULL != nextContig && offsetInReference >= nextContig->beginningOffset) { // // Start of a new contig. See if we want to skip it. // currentContig = nextContig; nextContig = getNextContigAfterLocation(offsetInReference + 1); if ((!copyX && !strcmp(currentContig->name,"chrX")) || (!copyY && !strcmp(currentContig->name,"chrY")) || (!copyM && !strcmp(currentContig->name,"chrM"))) { // // Yes, skip over this contig. // nextContig = getNextContigAfterLocation(offsetInReference + 1); if (NULL == nextContig) { // // The chromosome that we're skipping was the last one, so we're done. // break; } else { offsetInReference = nextContig->beginningOffset; continue; } } // If skipping this chromosome newCopy->startContig(currentContig->name); } // If new contig beginning const size_t maxCopySize = 10000; char dataBuffer[maxCopySize + 1]; unsigned amountToCopy = maxCopySize; if (nextContig && nextContig->beginningOffset < offsetInReference + amountToCopy) { amountToCopy = nextContig->beginningOffset - offsetInReference; } if (getCountOfBases() < offsetInReference + amountToCopy) { amountToCopy = getCountOfBases() - offsetInReference; } memcpy(dataBuffer,getSubstring(offsetInReference,amountToCopy), amountToCopy); dataBuffer[amountToCopy] = '\0'; newCopy->addData(dataBuffer); offsetInReference += amountToCopy; } newCopy->fillInContigLengths(); newCopy->sortContigsByName(); return newCopy; }
const Genome * Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, unsigned i_minOffset, unsigned length) { GenericFile *loadFile; unsigned nBases,nContigs; if (!openFileAndGetSizes(fileName,&loadFile,&nBases,&nContigs)) { // // It already printed an error. Just fail. // return NULL; } if (0 == length) { length = nBases - i_minOffset; } else { // // Don't let length go beyond nBases. // length = __min(length,nBases - i_minOffset); } Genome *genome = new Genome(nBases,length, chromosomePadding); genome->nBases = nBases; genome->nContigs = genome->maxContigs = nContigs; genome->contigs = new Contig[nContigs]; genome->minOffset = i_minOffset; if (i_minOffset >= nBases) { WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n",i_minOffset,nBases); } genome->maxOffset = i_minOffset + length; static const unsigned contigNameBufferSize = 512; char contigNameBuffer[contigNameBufferSize]; unsigned n; size_t contigSize; char *curName; for (unsigned i = 0; i < nContigs; i++) { if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){ WriteErrorMessage("Unable to read contig description\n"); delete genome; return NULL; } for (n = 0; n < contigNameBufferSize; n++){ if (contigNameBuffer[n] == ' ') { contigNameBuffer[n] = '\0'; break; } } genome->contigs[i].beginningOffset = atoi(contigNameBuffer); contigNameBuffer[n] = ' '; n++; // increment n so we start copying at the position after the space contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n genome->contigs[i].name = new char[contigSize + 1]; genome->contigs[i].nameLength = (unsigned)contigSize; curName = genome->contigs[i].name; for (unsigned pos = 0; pos < contigSize; pos++) { curName[pos] = contigNameBuffer[pos + n]; } curName[contigSize] = '\0'; } // // Skip over the miserable \n that gets left in the file. // /* char newline; if (1 != fread(&newline,1,1,loadFile)) { WriteErrorMessage("Genome::loadFromFile: Unable to read expected newline\n"); delete genome; return NULL; } if (newline != 10) { WriteErrorMessage("Genome::loadFromFile: Expected newline to be 0x0a, got 0x%02x\n",newline); delete genome; return NULL; } */ if (0 != loadFile->advance(i_minOffset)) { WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n"); soft_exit(1); } size_t retval; if (length != (retval = loadFile->read(genome->bases,length))) { WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, retval); loadFile->close(); delete loadFile; delete genome; return NULL; } loadFile->close(); delete loadFile; genome->fillInContigLengths(); genome->sortContigsByName(); return genome; }