void ProcessNonDaemonCommands(int argc, const char **argv) {
	if (strcmp(argv[1], "index") == 0) {
		if (CommandPipe == NULL) {
			GenomeIndex::runIndexer(argc - 2, argv + 2);
		} else {
			// The error cases in index build don't really free memory properly, so we just don't allows it in daemon mode.
			WriteErrorMessage("The index command is not available in daemon mode.  Please run 'snap index' directly.\n");
	} else if (strcmp(argv[1], "single") == 0 || strcmp(argv[1], "paired") == 0) {
		for (int i = 1; i < argc; /* i is increased below */) {
			unsigned nArgsConsumed;
			if (strcmp(argv[i], "single") == 0) {
				SingleAlignerContext single;
				single.runAlignment(argc - i, argv + i, SNAP_VERSION, &nArgsConsumed);
			} else if (strcmp(argv[i], "paired") == 0) {
				PairedAlignerContext paired;
				paired.runAlignment(argc - i, argv + i, SNAP_VERSION, &nArgsConsumed);
			} else {
				fprintf(stderr, "Invalid command: %s\n\n", argv[i]);
			_ASSERT(nArgsConsumed > 0);
			i += nArgsConsumed;
	} else {
		WriteErrorMessage("Invalid command: %s\n\n", argv[1]);
文件: Genome.cpp 项目: CoREse/snap
Genome::openFileAndGetSizes(const char *filename, GenericFile **file, GenomeDistance *nBases, unsigned *nContigs, bool map)
	if (map) {
		*file = GenericFile_map::open(filename);
	} else {
		*file = GenericFile::open(filename, GenericFile::ReadOnly);

    if (*file == NULL) {
        WriteErrorMessage("Genome::openFileAndGetSizes: unable to open file '%s'\n",filename);
        return false;

    char linebuf[2000];
    char *retval = (*file)->gets(linebuf, sizeof(linebuf));

    if (NULL == retval || 2 != sscanf(linebuf,"%lld %d\n", nBases, nContigs)) {
        delete *file;
        *file = NULL;
        WriteErrorMessage("Genome::openFileAndGetSizes: unable to read header\n");
        return false;
    return true;
文件: Genome.cpp 项目: gdtm86/snapr
Genome::saveToFile(const char *fileName) const
    // Save file format is (in binary) the number of bases, the number of contigs, followed by
    //  the contigs themselves, rounded up to 4K, followed by the bases.

    FILE *saveFile = fopen(fileName,"wb");
    if (saveFile == NULL) {
        WriteErrorMessage("Genome::saveToFile: unable to open file '%s'\n",fileName);
        return false;

    fprintf(saveFile,"%d %d\n",nBases,nContigs);
    char *curChar = NULL;

    for (int i = 0; i < nContigs; i++) {
        for (int n = 0; n < strlen(contigs[i].name); n++){
         curChar = contigs[i].name + n;
         if (*curChar == ' '){ *curChar = '_'; }
        fprintf(saveFile,"%d %s\n",contigs[i].beginningOffset,contigs[i].name);

    if (nBases != fwrite(bases,1,nBases,saveFile)) {
        WriteErrorMessage("Genome::saveToFile: fwrite failed\n");
        return false;

    return true;
文件: Genome.cpp 项目: CoREse/snap
Genome::saveToFile(const char *fileName) const
    // Save file format is (in binary) the number of bases, the number of contigs, followed by
    //  the contigs themselves, rounded up to 4K, followed by the bases.

    FILE *saveFile = fopen(fileName,"wb");
    if (saveFile == NULL) {
        WriteErrorMessage("Genome::saveToFile: unable to open file '%s'\n",fileName);
        return false;

    fprintf(saveFile,"%lld %d\n",nBases, nContigs);
    char *curChar = NULL;

    for (int i = 0; i < nContigs; i++) {
        for (int n = 0; n < strlen(contigs[i].name); n++){
         curChar = contigs[i].name + n;
         if (*curChar == ' '){ *curChar = '_'; }
        fprintf(saveFile,"%lld %s\n",contigs[i].beginningLocation, contigs[i].name);

	// Write it out in (big) chunks.  For whatever reason, fwrite with really big sizes seems not to
	// work as well as one would like.
	const size_t max_chunk_size = 1 * 1024 * 1024 * 1024;	// 1 GB (or GiB for the obsessively precise)

	size_t bases_to_write = nBases;
	size_t bases_written = 0;
	while (bases_to_write > 0) {
		size_t bases_this_write = __min(bases_to_write, max_chunk_size);
		if (bases_this_write != fwrite(bases + bases_written, 1, bases_this_write, saveFile)) {
			WriteErrorMessage("Genome::saveToFile: fwrite failed\n");
			return false;
		bases_to_write -= bases_this_write;
		bases_written += bases_this_write;

	_ASSERT(bases_written == nBases);

    return true;
文件: Genome.cpp 项目: gdtm86/snapr
Genome::startContig(const char *contigName)
    if (nContigs == maxContigs) {
        // Reallocate (maybe we're sequencing a tree that's got lots of chromosomes).
        int newMaxContigs = maxContigs * 2;
        Contig *newContigs = new Contig[newMaxContigs];
        if (NULL == newContigs) {
            WriteErrorMessage("Genome: unable to reallocate contig array to size %d\n",newMaxContigs);
        for (int i = 0; i < nContigs; i++) {
            newContigs[i] = contigs[i];

        delete [] contigs;
        contigs = newContigs;
        maxContigs = newMaxContigs;

    contigs[nContigs].beginningOffset = nBases;
    size_t len = strlen(contigName) + 1;
    contigs[nContigs].name = new char[len];
    contigs[nContigs].nameLength = (unsigned)len-1;

    contigs[nContigs].name[len-1] = '\0';

static void usage()
		"Usage: snap <command> [<options>]\n"
		"   index    build a genome index\n"
		"   single   align single-end reads\n"
		"   paired   align paired-end reads\n"
		"   daemon   run in daemon mode--accept commands remotely\n"
		"Type a command without arguments to see its help.\n");
文件: Genome.cpp 项目: gdtm86/snapr
Genome::addData(const char *data, size_t len)
    if ((size_t)nBases + len > maxBases) {
        WriteErrorMessage("Tried to write beyond allocated genome size (or tried to write into a genome that was loaded from a file).\n"
                          "Size = %lld\n",(_int64)maxBases);

    memcpy(bases + nBases,data,len);
    nBases += (unsigned)len;
文件: Genome.cpp 项目: CoREse/snap
Genome::addData(const char *data, GenomeDistance len)
    if (nBases + len > GenomeLocationAsInt64(maxBases)) {
        WriteErrorMessage("Tried to write beyond allocated genome size (or tried to write into a genome that was loaded from a file).\n"
                          "Size = %lld\n", GenomeLocationAsInt64(maxBases));

    memcpy(bases + nBases,data,len);
    nBases += (unsigned)len;
 SimpleReadWriter(const FileFormat* i_format, DataWriter* i_writer, const Genome* i_genome, bool i_killIfTooSlow, bool i_emitInternalScore, char *i_internalScoreTag, bool i_ignoreAlignmentAdjustmentsForOm)
     : format(i_format), writer(i_writer), genome(i_genome), killIfTooSlow(i_killIfTooSlow), lastTooSlowCheck(0), emitInternalScore(i_emitInternalScore), ignoreAlignmentAdjustmentsForOm(i_ignoreAlignmentAdjustmentsForOm)
     if (emitInternalScore) {
         if (strlen(i_internalScoreTag) != 2) {
             WriteErrorMessage("SimpleReadWriter: bogus internal score tag\n");
         strcpy(internalScoreTag, i_internalScoreTag);
     } else  {
         internalScoreTag[0] = '\0';
SNAPHashTable::saveToFile(const char *saveFileName, size_t *bytesWritten)
    FILE *saveFile = fopen(saveFileName,"wb");
    if (saveFile == NULL) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable(%s) fopen failed\n",saveFileName);
        return false;

    bool worked = saveToFile(saveFile, bytesWritten);

    return worked;
文件: Genome.cpp 项目: CoREse/snap
Genome::Genome(GenomeDistance i_maxBases, GenomeDistance nBasesStored, unsigned i_chromosomePadding, unsigned i_maxContigs)
: maxBases(i_maxBases), minLocation(0), maxLocation(i_maxBases), chromosomePadding(i_chromosomePadding), maxContigs(i_maxContigs),
    bases = ((char *) BigAlloc(nBasesStored + 2 * N_PADDING)) + N_PADDING;
    if (NULL == bases) {
        WriteErrorMessage("Genome: unable to allocate memory for %llu bases\n", GenomeLocationAsInt64(maxBases));

    // Add N's for the N_PADDING bases before and after the genome itself
    memset(bases - N_PADDING, 'n', N_PADDING);
    memset(bases + nBasesStored, 'n', N_PADDING);

    nBases = 0;

    nContigs = 0;
    contigs = new Contig[maxContigs];
    contigsByName = NULL;
    const _int64 tooSlowCheckPeriod = 5 * 60 * 1000;    // 5 min in ms
    const _int64 tooSlowCheckMinReadsPerCheckPeriod = 5 * 60 * 1000;    // One read/ms (or 1000 reads/s, but just on this thread).

    if (killIfTooSlow) {
        _int64 now = timeInMillis();
        if (lastTooSlowCheck + tooSlowCheckPeriod <= now) {
            if (lastTooSlowCheck != 0 && writesSinceLastTooSlowCheck < tooSlowCheckMinReadsPerCheckPeriod) {
                WriteErrorMessage("Only wrote %lld writes during a %lld minute check period; we're probably out of memory and are giving up because of -kts\n", writesSinceLastTooSlowCheck, tooSlowCheckPeriod / (60 * 1000));

            lastTooSlowCheck = now;
            writesSinceLastTooSlowCheck = 0;

    } // if (killIfTooSlow)
文件: Genome.cpp 项目: gdtm86/snapr
Genome::Genome(unsigned i_maxBases, unsigned nBasesStored, unsigned i_chromosomePadding)
    : maxBases(i_maxBases), minOffset(0), maxOffset(i_maxBases), chromosomePadding(i_chromosomePadding)
    bases = ((char *) BigAlloc(nBasesStored + 2 * N_PADDING)) + N_PADDING;
    if (NULL == bases) {
        WriteErrorMessage("Genome: unable to allocate memory for %llu bases\n",(_int64)maxBases);

    // Add N's for the N_PADDING bases before and after the genome itself
    memset(bases - N_PADDING, 'n', N_PADDING);
    memset(bases + nBasesStored, 'n', N_PADDING);

    nBases = 0;

    maxContigs = 32; // A power of two that's bigger than the usual number of chromosomes, so we don't have to
                    // reallocate in practice.

    nContigs = 0;
    contigs = new Contig[maxContigs];
    contigsByName = NULL;
    const ReaderContext& context, 
    Read **reads /* array of size NUM_READS_PER_PAIR */, 
    PairedAlignmentResult *result, 
    int nResults,
    SingleAlignmentResult **singleResults /* array of size NUM_READS_PER_PAIR*/, 
    int *nSingleResults /* array of size NUM_READS_PER_PAIR*/, 
    bool firstIsPrimary)
    bool retVal = false;
    // We need to write all alignments for the pair into the same buffer, so that a write from
    // some other thread doesn't separate them.  We make two passes, trying to write into the 
    // existing buffer, and then into a clean one.  If that doesn't work, abort the alignment
    // run and ask for a bigger write buffer.
    const int staticUsedBufferSize = 2000;
    size_t staticUsedBuffer[NUM_READS_PER_PAIR][staticUsedBufferSize];
    GenomeLocation staticLocationBuffer[NUM_READS_PER_PAIR][staticUsedBufferSize];

    GenomeLocation *finalLocations[NUM_READS_PER_PAIR];
    size_t *usedBuffer[NUM_READS_PER_PAIR];
    if (nResults + nSingleResults[0] <= staticUsedBufferSize && nResults + nSingleResults[1] <= staticUsedBufferSize) {
        usedBuffer[0] = staticUsedBuffer[0];
        usedBuffer[1] = staticUsedBuffer[1];
        finalLocations[0] = staticLocationBuffer[0];
        finalLocations[1] = staticLocationBuffer[1];
    } else {
        usedBuffer[0] = new size_t[nResults * NUM_READS_PER_PAIR + nSingleResults[0] + nSingleResults[1]];
        usedBuffer[1] = usedBuffer[0] + nResults + nSingleResults[0];
        finalLocations[0] = new GenomeLocation[nResults * NUM_READS_PER_PAIR + nSingleResults[0] + nSingleResults[1]];
        finalLocations[1] = finalLocations[0] + nResults + nSingleResults[0];

    // For paired reads, we need to have the same QNAME for both of them, and it needs to be unique among all other
    // reads in the dataset.  For now, all we do is see if the read names end in /1 and /2, and if so truncate them.
    size_t idLengths[NUM_READS_PER_PAIR];
    idLengths[0] = reads[0]->getIdLength();
    idLengths[1] = reads[1]->getIdLength();
    if (idLengths[0] == idLengths[1] && idLengths[0] > 2 && reads[0]->getId()[idLengths[0]-2] == '/' && reads[1]->getId()[idLengths[0]-2] == '/') {
        char lastChar0, lastChar1;
        lastChar0 = reads[0]->getId()[idLengths[0] - 1];
        lastChar1 = reads[1]->getId()[idLengths[1] - 1];
        if ((lastChar0 == '1' || lastChar0 == '2') && (lastChar1 == '1' || lastChar1 == '2') && 
            lastChar0 != lastChar1) {
                idLengths[0] -= 2;
                idLengths[1] -= 2;

    for (int pass = 0; pass < 2; pass++) {

        char* buffer;
        size_t size;
        size_t used = 0;

        bool fitInBuffer = true;

        if (!writer->getBuffer(&buffer, &size)) {
            goto done;

        // Write all of the pair alignments into the buffer.
        for (int whichAlignmentPair = 0; whichAlignmentPair < nResults; whichAlignmentPair++) {

            GenomeLocation locations[2];
            locations[0] = result[whichAlignmentPair].status[0] != NotFound ? result[whichAlignmentPair].location[0] : InvalidGenomeLocation;
            locations[1] = result[whichAlignmentPair].status[1] != NotFound ? result[whichAlignmentPair].location[1] : InvalidGenomeLocation;

            int writeOrder[2];  // The order in which we write the reads, which is just numerical by genome location.  SO writeOrder[0] gets written first, and writeOrder[1] second.

            if (locations[0] <= locations[1]) {
                writeOrder[0] = 0;
                writeOrder[1] = 1;
            } else {
                writeOrder[0] = 1;
                writeOrder[1] = 0;

            bool secondReadLocationChanged;
            int cumulativePositiveAddFrontClipping[NUM_READS_PER_PAIR] = { 0, 0 };

            do {
                size_t tentativeUsed = 0;
                secondReadLocationChanged = false;

                for (int firstOrSecond = 0; firstOrSecond < NUM_READS_PER_PAIR; firstOrSecond++) {  // looping over the order in which the reads are written, not the order in which they arrived
                    int whichRead = writeOrder[firstOrSecond];
                    // Loop until we get a write with no additional front clipping.
                    int addFrontClipping = 0;

                    while (!format->writeRead(context, &lvc, buffer + used + tentativeUsed, size - used - tentativeUsed, &usedBuffer[firstOrSecond][whichAlignmentPair],
                        idLengths[whichRead], reads[whichRead], result[whichAlignmentPair].status[whichRead], result[whichAlignmentPair].mapq[whichRead], locations[whichRead], result[whichAlignmentPair].direction[whichRead],
                        whichAlignmentPair != 0 || !firstIsPrimary, &addFrontClipping, true, writeOrder[firstOrSecond] == 0,
                        reads[1 - whichRead], result[whichAlignmentPair].status[1 - whichRead], locations[1 - whichRead], result[whichAlignmentPair].direction[1 - whichRead],
                        result[whichAlignmentPair].alignedAsPair)) {

                        if (0 == addFrontClipping || locations[whichRead] == InvalidGenomeLocation) {
                            // We failed because we ran out of buffer.
                            goto blownBuffer;

                        if (1 == firstOrSecond) {
                            // If the location of the second read changed, we need to redo the first one as well, because it includes an offset to the second read
                            secondReadLocationChanged = true;

                        const Genome::Contig *originalContig = genome->getContigAtLocation(locations[whichRead]);
                        const Genome::Contig *newContig = genome->getContigAtLocation(locations[whichRead] + addFrontClipping);
                        if (newContig != originalContig || NULL == newContig || locations[whichRead] + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) {
                            // Altering this would push us over a contig boundary.  Just give up on the read.
                            result[whichAlignmentPair].status[whichRead] = NotFound;
                            result[whichAlignmentPair].location[whichRead] = InvalidGenomeLocation;
                            locations[whichRead] = InvalidGenomeLocation;
                        } else {
                            if (addFrontClipping > 0) {
                                cumulativePositiveAddFrontClipping[firstOrSecond] += addFrontClipping;
                            locations[whichRead] += addFrontClipping;
                    } // While formatting didn't work
                    tentativeUsed += usedBuffer[firstOrSecond][whichAlignmentPair];
                } // for first or second read

            } while (secondReadLocationChanged);
            used += usedBuffer[0][whichAlignmentPair] + usedBuffer[1][whichAlignmentPair];

            // Both reads are written into the buffer.  Save the final locations we used for when we commit.
            for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) {
                finalLocations[whichRead][whichAlignmentPair] = locations[whichRead];
        } // for each pair.

        // Now write the single alignments.
        for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) {
            for (int whichAlignment = 0; whichAlignment < nSingleResults[whichRead]; whichAlignment++) {
                int addFrontClipping;
                GenomeLocation location = singleResults[whichRead][whichAlignment].status != NotFound ? singleResults[whichRead][whichAlignment].location : InvalidGenomeLocation;
                int cumulativePositiveAddFrontClipping = 0;

                while (!format->writeRead(context, &lvc, buffer + used, size - used, &usedBuffer[whichRead][nResults + whichAlignment], reads[whichRead]->getIdLength(),
                    reads[whichRead], singleResults[whichRead][whichAlignment].status, singleResults[whichRead][whichAlignment].mapq, location, singleResults[whichRead][whichAlignment].direction,
                    true, &addFrontClipping)) {

                    if (0 == addFrontClipping) {
                        goto blownBuffer;

                    const Genome::Contig *originalContig = genome->getContigAtLocation(location);
                    const Genome::Contig *newContig = genome->getContigAtLocation(location + addFrontClipping);
                    if (newContig != originalContig || NULL == newContig || location + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) {
                        // Altering this would push us over a contig boundary.  Just give up on the read.
                        singleResults[whichRead][whichAlignment].status = NotFound;
                        location = InvalidGenomeLocation;
                    } else {
                        if (addFrontClipping > 0) {
                            cumulativePositiveAddFrontClipping += addFrontClipping;
                        location += addFrontClipping;

                finalLocations[whichRead][nResults + whichAlignment] = location;
                used += usedBuffer[whichRead][nResults + whichAlignment];
            } // For each single alignment of a read
        } // For each read

        // They all fit into the buffer.

        // Commit the updates for the pairs.
        for (int whichReadPair = 0; whichReadPair < nResults; whichReadPair++) {
            for (int firstOrSecond = 0; firstOrSecond < NUM_READS_PER_PAIR; firstOrSecond++) {
                // adjust for write order
                int writeFirstOrSecond = (!!firstOrSecond) ^ (finalLocations[0][whichReadPair] > finalLocations[1][whichReadPair]); // goofy looking !! converts int to bool
                    finalLocations[writeFirstOrSecond][whichReadPair] == InvalidGenomeLocation ? finalLocations[1 - writeFirstOrSecond][whichReadPair] : finalLocations[writeFirstOrSecond][whichReadPair]);

        // Now commit the updates for the single reads.
        for (int whichRead = 0; whichRead < NUM_READS_PER_PAIR; whichRead++) {
            for (int whichAlignment = 0; whichAlignment < nSingleResults[whichRead]; whichAlignment++) {
                writer->advance((unsigned)usedBuffer[whichRead][nResults + whichAlignment], finalLocations[whichRead][nResults + whichAlignment]);

        retVal = true;

        if (pass > 0) {
            WriteErrorMessage("Unable to fit all alignments for one read pair into a single write buffer.  Increase the size of the write buffer with -wbs, or reduce the number of alignments with -om or -omax\n");
            WriteErrorMessage("Read id: '%.*s'\n", reads[0]->getIdLength(), reads[0]->getId());

        if (!writer->nextBatch()) {
            goto done;
    } // For each buffer full pass

    if (usedBuffer[0] != staticUsedBuffer[0]) {
        delete[] usedBuffer[0];
        usedBuffer[0] = usedBuffer[1] = NULL;

        delete[] finalLocations[0];
        finalLocations[0] = finalLocations[1] = NULL;


    return retVal;
// DumpErrorInfo queries SQLOLEDB error interfaces, retrieving available
// status or error information.
inline void ComSession::DumpErrorInfo(
    std::wostream* pOstr,
    IUnknown*      pObjectWithError,
    REFIID         rErrorInterface) const
    // Interfaces used in the example.
    CComPtr<IErrorInfo> pIErrorInfoAll;
    CComPtr<IErrorRecords> pIErrorRecords;
    CComPtr<ISupportErrorInfo> pISupportErrorInfo;

    // Only ask for error information if the interface supports
    // it.
    if (pObjectWithError == NULL || FAILED(pObjectWithError->QueryInterface(IID_ISupportErrorInfo,
                                                reinterpret_cast<void**>(&pISupportErrorInfo)))) {
        *pOstr << L"SupportErrorErrorInfo interface not supported" << std::endl;
    if (FAILED(pISupportErrorInfo->InterfaceSupportsErrorInfo(rErrorInterface))) {
        *pOstr << L"InterfaceWithError interface not supported" << std::endl;

    // Do not test the return of GetErrorInfo. It can succeed and return
    // a NULL pointer in pIErrorInfoAll. Simply test the pointer.
    HRESULT r = GetErrorInfo(0, &pIErrorInfoAll);

    if ((pIErrorInfoAll == NULL) || FAILED(r)) {
        *pOstr << L"GetErrorInfo failed." << std::endl;
    // Test to see if it's a valid OLE DB IErrorInfo interface
    // exposing a list of records.
    if (FAILED(pIErrorInfoAll->QueryInterface(IID_IErrorRecords,
                                              reinterpret_cast<void**>(&pIErrorRecords))) ) {
        // IErrorInfo is valid; get the source and
        // description to see what it is.
        WriteErrorMessage(pIErrorInfoAll, pOstr);

    // Basic error information from GetBasicErrorInfo.
    ERRORINFO errorinfo;

    // Number of error records.
    ULONG nRecs;
    ULONG nRec;

    // ISQLErrorInfo parameters.
    LONG lNativeError;


    // Within each record, retrieve information from each
    // of the defined interfaces.
    for (nRec = 0; nRec < nRecs; ++nRec) {
        // From IErrorRecords, get the HRESULT and a reference
        // to the ISQLErrorInfo interface.
        pIErrorRecords->GetBasicErrorInfo(nRec, &errorinfo);

        CComPtr<ISQLErrorInfo> pISQLErrorInfo;
        CComPtr<IErrorInfo> pIErrorInfoRecord;


        if (pISQLErrorInfo != NULL) {
            pISQLErrorInfo->GetSQLInfo(&bstrSQLSTATE, &lNativeError);

            // Display the SQLSTATE and native error values.
            *pOstr << L"SQLSTATE:\t" << bstrSQLSTATE.m_str << std::endl;

        if (SUCCEEDED(pIErrorRecords->GetErrorInfo(nRec, ::GetSystemDefaultLCID(), &pIErrorInfoRecord))) {
            WriteErrorMessage(pIErrorInfoRecord, pOstr);
void RunDaemonMode(int argc, const char **argv)
	if (argc < 2 || argc > 3) {

	printf("SNAP in daemon mode, waiting for commands to execute\n");

	const char *pipeName = argc == 3 ? argv[2] : DEFAULT_NAMED_PIPE_NAME;
	CommandPipe = OpenNamedPipe(pipeName, true);

	if (NULL == CommandPipe) {
		WriteErrorMessage("Unable to open named pipe for command IO.\n");

	const size_t commandBufferSize = 10000;	// Yes, this is fixed size, no it's not a buffer overflow.  The named pipe reader just quits if it's too long.
	char commandBuffer[commandBufferSize];

	// Format of commands is argc (in ascii) followed by argc arguments, each in one line.
	for (;;) {
		if (!ReadFromNamedPipe(CommandPipe, commandBuffer, commandBufferSize)) {
			CommandPipe = NULL;
			WriteStatusMessage("Named pipe closed.  Exiting\n");

		int argc = atoi(commandBuffer);
		if (0 == argc) {
			WriteErrorMessage("Expected argument count on named pipe, got '%s'; ignoring.\n", commandBuffer);
		} else {
			char **argv = new char*[argc];
			for (int i = 0; i < argc; i++) {
				argv[i] = new char[commandBufferSize];
				if (!ReadFromNamedPipe(CommandPipe, argv[i], commandBufferSize)) {
					CommandPipe = NULL;
					WriteStatusMessage("Error reading argument #%d from named pipe.\n", i);
			} // for each arg

			if (argc > 1 && strcmp(argv[1], "exit") == 0) {
				WriteStatusMessage("SNAP server exiting by request\n");
				WriteToNamedPipe(CommandPipe, CommandExecutedString);

			printf("Executing command: ");
			for (int i = 1; i < argc; i++) {
				printf("%s ", argv[i]);

			ProcessNonDaemonCommands(argc, (const char **) argv);


			for (int i = 0; i < argc; i++) {
				delete[] argv[i];
				argv[i] = NULL;
			delete[] argv;
			argv = NULL;
		WriteToNamedPipe(CommandPipe, CommandExecutedString);
    const ReaderContext& context, 
    Read *read, 
    SingleAlignmentResult *results, 
    int nResults,
    bool firstIsPrimary)
    char* buffer;
    size_t size;
    size_t used;
    bool result = false;

    for (int i = 0; i < nResults; i++) {
        if (results[i].status == NotFound) {
            results[i].location = InvalidGenomeLocation;

    // We need to keep track of the offsets of all of the alignments in the output buffer so we can commit them.  However,
    // we want to avoid dynamic memory allocation as much as possible.  So, we have a static buffer on the stack that's big enough
    // for the great majority of cases, and then allocate dynamically if that's too small.  Makes for annoying, but efficient
    // code.

    const int staticUsedBufferSize = 2000;
    size_t staticUsedBuffer[staticUsedBufferSize];

    GenomeLocation staticFinalLocationsBuffer[staticUsedBufferSize];

    size_t *usedBuffer;
    GenomeLocation *finalLocations;
    if (nResults <= staticUsedBufferSize) {
        usedBuffer = staticUsedBuffer;
        finalLocations = staticFinalLocationsBuffer;
    } else {
        usedBuffer = new size_t[nResults];
        finalLocations = new GenomeLocation[nResults];

    for (int pass = 0; pass < 2; pass++) { // Make two passes, one with whatever buffer space is left and one with a clean buffer.
        bool blewBuffer = false;

        if (!writer->getBuffer(&buffer, &size)) {
            goto done;

        used = 0;

        for (int whichResult = 0; whichResult < nResults; whichResult++) {
            int addFrontClipping = 0;
            int cumulativeAddFrontClipping = 0;
            finalLocations[whichResult] = results[whichResult].location;

            while (!format->writeRead(context, &lvc, buffer + used, size - used, &usedBuffer[whichResult], read->getIdLength(), read, results[whichResult].status,
                results[whichResult].mapq, finalLocations[whichResult], results[whichResult].direction, (whichResult > 0) || !firstIsPrimary, &addFrontClipping)) {

                if (0 == addFrontClipping) {
                    blewBuffer = true;

                // redo if read modified (e.g. to add soft clipping, or move alignment for a leading I.
                const Genome::Contig *originalContig = results[whichResult].status == NotFound ? NULL
                    : genome->getContigAtLocation(results[whichResult].location);
                const Genome::Contig *newContig = results[whichResult].status == NotFound ? NULL
                    : genome->getContigAtLocation(results[whichResult].location + addFrontClipping);
                if (newContig == NULL || newContig != originalContig || finalLocations[whichResult] + addFrontClipping > originalContig->beginningLocation + originalContig->length - genome->getChromosomePadding()) {
                    // Altering this would push us over a contig boundary.  Just give up on the read.
                    results[whichResult].status = NotFound;
                    results[whichResult].location = InvalidGenomeLocation;
                    finalLocations[whichResult] = InvalidGenomeLocation;
                } else {
                    cumulativeAddFrontClipping += addFrontClipping;
                    if (addFrontClipping > 0) {
                    finalLocations[whichResult] = results[whichResult].location + cumulativeAddFrontClipping;
            } // while formatting doesn't work

            if (blewBuffer) {

            used += usedBuffer[whichResult];
            _ASSERT(used <= size);

            if (used > 0xffffffff) {
                 WriteErrorMessage("SimpleReadWriter:writeReads: used too big\n");
        } // for each result.

        if (!blewBuffer) {
            // Everything worked OK.
            for (int whichResult = 0; whichResult < nResults; whichResult++) {
                writer->advance((unsigned)usedBuffer[whichResult], finalLocations[whichResult]);
            result = true;
            goto done;

        if (pass == 1) {
            WriteErrorMessage("Failed to write into fresh buffer; trying providing the -wbs switch with a larger value\n");

        if (!writer->nextBatch()) {
            goto done;
    } // for each pass (i.e., not empty, empty buffer)
    if (usedBuffer != staticUsedBuffer) {
        delete[] usedBuffer;
        usedBuffer = NULL;

        delete[] finalLocations;
        finalLocations = NULL;


    return result;
SNAPHashTable::saveToFile(FILE *saveFile, size_t *bytesWritten) 
    *bytesWritten = 0;
    if (1 != fwrite(&magic,sizeof(magic), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite magic number failed\n");
        return false;
    (*bytesWritten) += sizeof(magic);
    if (1 != fwrite(&tableSize,sizeof(tableSize), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite table size failed\n");
        return false;
    (*bytesWritten) += sizeof(tableSize);

    if (1 != fwrite(&usedElementCount,sizeof(usedElementCount), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite used element count size failed\n");
        return false;
    (*bytesWritten) += sizeof(usedElementCount);

    if (1 != fwrite(&keySizeInBytes, sizeof(keySizeInBytes), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite key size failed\n");
        return false;
    (*bytesWritten) += sizeof(keySizeInBytes);

    if (1 != fwrite(&valueSizeInBytes, sizeof(valueSizeInBytes), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fwrite data size failed\n");
        return false;
    (*bytesWritten) += sizeof(valueSizeInBytes);

    if (1 != fwrite(&valueCount, sizeof(valueCount), 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable: fwrite value count failed\n");
        return false;
    (*bytesWritten) += sizeof(valueCount);

    if (1 != fwrite(&invalidValueValue, valueSizeInBytes, 1, saveFile)) {
        WriteErrorMessage("SNAPHashTable: fwrite invalid value value failed\n");
        return false;
    (*bytesWritten) += valueSizeInBytes;

    size_t maxWriteSize = 100 * 1024 * 1024;
    size_t writeOffset = 0;
    while (writeOffset < tableSize * elementSize) {
        size_t amountToWrite = __min(maxWriteSize,tableSize * elementSize - writeOffset);
        size_t thisWrite = fwrite((char*)Table + writeOffset, 1, amountToWrite, saveFile);
        if (thisWrite < amountToWrite) {
            WriteErrorMessage("SNAPHashTable::saveToFile: fwrite failed, %d\n"
                              "handle %p, addr %p, atr: %lu, &bw %p\n",errno, saveFile,(char*)Table + writeOffset, amountToWrite, &bytesWritten);
            return false;
        writeOffset += thisWrite;
        (*bytesWritten) += thisWrite;

    return true;
SNAPHashTable *SNAPHashTable::loadFromBlob(GenericFile_Blob *loadFile)
    SNAPHashTable *table = new SNAPHashTable();

    unsigned fileMagic;
    if (sizeof(magic) != loadFile->read(&fileMagic, sizeof(magic))) {
        WriteErrorMessage("Magic number mismatch on hash table load.  %d != %d\n", fileMagic, magic);

    if (fileMagic != magic) {
        WriteErrorMessage("SNAPHashTable: magic number mismatch.  Perhaps you have a corruped index.  %d != %d\n", fileMagic, magic);
    if (sizeof(table->tableSize) != loadFile->read(&table->tableSize, sizeof(table->tableSize))) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fread table size failed\n");

    if (sizeof(table->usedElementCount) != loadFile->read(&table->usedElementCount, sizeof(table->usedElementCount))) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fread used element count failed\n");

    if (sizeof(table->keySizeInBytes) != loadFile->read(&table->keySizeInBytes, sizeof(table->keySizeInBytes))) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fread keySizeInBytes size failed.  Perhaps this is an old format hash table and needs to be rebuilt.\n");

    if (table->keySizeInBytes < 4 || table->keySizeInBytes > 8) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable Key size must be between 4 and 8 inclusive.  Perhaps this is an old format hash table and needs to be rebuilt.\n");

    if (sizeof(table->valueSizeInBytes) != loadFile->read(&table->valueSizeInBytes, sizeof(table->valueSizeInBytes))) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable fread dataSizeInBytes size failed.  Perhaps this is an old format hash table and needs to be rebuilt.\n");

    if (table->valueSizeInBytes == 0 || table->valueSizeInBytes > sizeof(_uint64)) {
        // It must be at least one byte, because we need that much for the unused value value. The code stuffs
        // values into _uint64, so it can't be bigger than that.
            "SNAPHashTable::SNAPHashTable value size in bytes (%d) must be between 1 and 8.  Perhaps you have a hash table from a future version of SNAP?  Or else it's corrupt.\n", table->valueSizeInBytes);

    if (sizeof(table->valueCount) != loadFile->read(&table->valueCount, sizeof(table->valueCount))) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable: value count failed to read.\n");

    if (table->valueCount == 0 || table->valueCount > 2) {
        // Technically, > 2 would work fine with the code, but SNAP doesn't use it, so the check is here to detect corruption.
        WriteErrorMessage("SNAPHashTable::SNAPHashTable: invalid value count (%d), possible corruption or bad file format.\n", table->valueCount);

    table->invalidValueValue = 0;   // Need this in case valueSizeInBytes < sizeof(ValueType)
    if (table->valueSizeInBytes != loadFile->read(&table->invalidValueValue, table->valueSizeInBytes)) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable: unable to read invalid value value\n");

    if (table->tableSize <= 0) {
        WriteErrorMessage("SNAPHashTable::SNAPHashTable Zero or negative hash table size\n");

    table->elementSize = table->keySizeInBytes + table->valueSizeInBytes * table->valueCount;

    size_t bytesMapped;
    table->Table = loadFile->mapAndAdvance(table->tableSize * table->elementSize, &bytesMapped);
    if (bytesMapped != table->tableSize * table->elementSize) {
        WriteErrorMessage("SNAPHashTable: unable to map table\n");
    table->ownsMemoryForTable = false;

    return table;
文件: Genome.cpp 项目: CoREse/snap
    const Genome *
Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, GenomeLocation minLocation, GenomeDistance length, bool map)
    GenericFile *loadFile;
    GenomeDistance nBases;
    unsigned nContigs;

    if (!openFileAndGetSizes(fileName, &loadFile, &nBases, &nContigs, map)) {
        // It already printed an error.  Just fail.
        return NULL;

    GenomeLocation maxLocation(nBases);

    if (0 == length) {
        length = maxLocation - minLocation;
    } else {
        // Don't let length go beyond nBases.
        length = __min(length, maxLocation - minLocation);
        maxLocation = minLocation + length;

    Genome *genome = new Genome(nBases, length, chromosomePadding);
    genome->nBases = nBases;
    genome->nContigs = genome->maxContigs = nContigs;
    genome->contigs = new Contig[nContigs];
    genome->minLocation = minLocation;
    if (GenomeLocationAsInt64(minLocation) >= nBases) {
        WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n", GenomeLocationAsInt64(minLocation), nBases);

    genome->maxLocation = maxLocation;

    static const unsigned contigNameBufferSize = 512;
    char contigNameBuffer[contigNameBufferSize];
    unsigned n;
    size_t contigSize;
    char *curName;
    for (unsigned i = 0; i < nContigs; i++) {
        if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){
	  WriteErrorMessage("Unable to read contig description\n");
            delete genome;
            return NULL;

	for (n = 0; n < contigNameBufferSize; n++){
	  if (contigNameBuffer[n] == ' ') {
	    contigNameBuffer[n] = '\0'; 

    _int64 contigStart;
    if (1 != sscanf(contigNameBuffer, "%lld", &contigStart)) {
        WriteErrorMessage("Unable to parse contig start in genome file '%s', '%s%'\n", fileName, contigNameBuffer);
    genome->contigs[i].beginningLocation = GenomeLocation(contigStart);
	contigNameBuffer[n] = ' '; 
	n++; // increment n so we start copying at the position after the space
	contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n
    genome->contigs[i].name = new char[contigSize + 1];
    genome->contigs[i].nameLength = (unsigned)contigSize;
	curName = genome->contigs[i].name;
	for (unsigned pos = 0; pos < contigSize; pos++) {
	  curName[pos] = contigNameBuffer[pos + n];
        curName[contigSize] = '\0';

    if (0 != loadFile->advance(GenomeLocationAsInt64(minLocation))) {
        WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n");

    size_t readSize;
	if (map) {
		GenericFile_map *mappedFile = (GenericFile_map *)loadFile;
		genome->bases = (char *)mappedFile->mapAndAdvance(length, &readSize);
		genome->mappedFile = mappedFile;
	} else {
		readSize = loadFile->read(genome->bases, length);

		delete loadFile;
		loadFile = NULL;

	if (length != readSize) {
		WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, readSize);
		delete loadFile;
		delete genome;
		return NULL;
    return genome;
文件: Genome.cpp 项目: gdtm86/snapr
// Makes a copy of a Genome, but with only one of the sex chromosomes.
// The fate of the mitochondrion is that of the X chromosome.
    Genome *
Genome::copy(bool copyX, bool copyY, bool copyM) const
    Genome *newCopy = new Genome(getCountOfBases(),getCountOfBases(), chromosomePadding);

    if (NULL == newCopy) {
        WriteErrorMessage("Genome::copy: failed to allocate space for copy.\n");
        return NULL;

    const Genome::Contig *currentContig = NULL;
    const Genome::Contig *nextContig = getContigAtLocation(0);

    unsigned offsetInReference = 0;
    while (offsetInReference < getCountOfBases()) {
        if (NULL != nextContig && offsetInReference >= nextContig->beginningOffset) {
            // Start of a new contig.  See if we want to skip it.
            currentContig = nextContig;
            nextContig = getNextContigAfterLocation(offsetInReference + 1);
            if ((!copyX && !strcmp(currentContig->name,"chrX")) ||
                (!copyY && !strcmp(currentContig->name,"chrY")) ||
                (!copyM && !strcmp(currentContig->name,"chrM"))) {
                // Yes, skip over this contig.
                nextContig = getNextContigAfterLocation(offsetInReference + 1);
                if (NULL == nextContig) {
                    // The chromosome that we're skipping was the last one, so we're done.
                } else {
                    offsetInReference = nextContig->beginningOffset;
            } // If skipping this chromosome

        } // If new contig beginning

        const size_t maxCopySize = 10000;
        char dataBuffer[maxCopySize + 1];

        unsigned amountToCopy = maxCopySize;
        if (nextContig && nextContig->beginningOffset < offsetInReference + amountToCopy) {
            amountToCopy = nextContig->beginningOffset - offsetInReference;

        if (getCountOfBases() < offsetInReference + amountToCopy) {
            amountToCopy = getCountOfBases() - offsetInReference;

        memcpy(dataBuffer,getSubstring(offsetInReference,amountToCopy), amountToCopy);
        dataBuffer[amountToCopy] = '\0';


        offsetInReference += amountToCopy;

    return newCopy;
文件: Genome.cpp 项目: gdtm86/snapr
    const Genome *
Genome::loadFromFile(const char *fileName, unsigned chromosomePadding, unsigned i_minOffset, unsigned length)
    GenericFile *loadFile;
    unsigned nBases,nContigs;

    if (!openFileAndGetSizes(fileName,&loadFile,&nBases,&nContigs)) {
        // It already printed an error.  Just fail.
        return NULL;

    if (0 == length) {
        length = nBases - i_minOffset;
    } else {
        // Don't let length go beyond nBases.
        length = __min(length,nBases - i_minOffset);

    Genome *genome = new Genome(nBases,length, chromosomePadding);
    genome->nBases = nBases;
    genome->nContigs = genome->maxContigs = nContigs;
    genome->contigs = new Contig[nContigs];
    genome->minOffset = i_minOffset;
    if (i_minOffset >= nBases) {
        WriteErrorMessage("Genome::loadFromFile: specified minOffset %u >= nBases %u\n",i_minOffset,nBases);


    genome->maxOffset = i_minOffset + length;

    static const unsigned contigNameBufferSize = 512;
    char contigNameBuffer[contigNameBufferSize];
    unsigned n;
    size_t contigSize;
    char *curName;
    for (unsigned i = 0; i < nContigs; i++) {
        if (NULL == loadFile->gets(contigNameBuffer, contigNameBufferSize)){
	  WriteErrorMessage("Unable to read contig description\n");
            delete genome;
            return NULL;

	for (n = 0; n < contigNameBufferSize; n++){
	  if (contigNameBuffer[n] == ' ') {
	    contigNameBuffer[n] = '\0'; 

    genome->contigs[i].beginningOffset = atoi(contigNameBuffer);
	contigNameBuffer[n] = ' '; 
	n++; // increment n so we start copying at the position after the space
	contigSize = strlen(contigNameBuffer + n) - 1; //don't include the final \n
    genome->contigs[i].name = new char[contigSize + 1];
    genome->contigs[i].nameLength = (unsigned)contigSize;
	curName = genome->contigs[i].name;
	for (unsigned pos = 0; pos < contigSize; pos++) {
	  curName[pos] = contigNameBuffer[pos + n];
        curName[contigSize] = '\0';

    // Skip over the miserable \n that gets left in the file.
    /*  char newline;
    if (1 != fread(&newline,1,1,loadFile)) {
        WriteErrorMessage("Genome::loadFromFile: Unable to read expected newline\n");
        delete genome;
        return NULL;

    if (newline != 10) {
        WriteErrorMessage("Genome::loadFromFile: Expected newline to be 0x0a, got 0x%02x\n",newline);
        delete genome;
        return NULL;

    if (0 != loadFile->advance(i_minOffset)) {
        WriteErrorMessage("Genome::loadFromFile: _fseek64bit failed\n");

    size_t retval;
    if (length != (retval = loadFile->read(genome->bases,length))) {
        WriteErrorMessage("Genome::loadFromFile: fread of bases failed; wanted %u, got %d\n", length, retval);
        delete loadFile;
        delete genome;
        return NULL;

    delete loadFile;
    return genome;