Esempio n. 1
0
void BaseFile::CopyReadAt(uint32_t readIndex, SMRTSequence &read) {
    assert(holeNumbers.size() > readIndex);
    read.HoleNumber(holeNumbers[readIndex]);
    if (holeXY.size() > 0) {
        assert(holeXY.size() > readIndex);
        read.HoleXY(holeXY[readIndex].xy[0],
                    holeXY[readIndex].xy[1]);
    }

    DSLength startPos = readStartPositions[readIndex];
    DNALength readLength = readLengths[readIndex];
    read.length = readLength;
    read.Allocate(readLength);
    if (baseCalls.size() > 0) {
        assert(baseCalls.size() >= readLength + startPos);
        CopyArray(baseCalls, startPos, readLength, read.seq);
    }
    if (qualityValues.size() > 0) {
        assert(qualityValues.size() >= readLength + startPos);
        CopyArray(qualityValues, startPos, readLength, read.qual.data);
    }
    if (basWidthInFrames.size() > 0) {
        assert(basWidthInFrames.size() >= readLength + startPos);
        CopyArray(basWidthInFrames, startPos, readLength, read.widthInFrames);
    }
    if (deletionQV.size() > 0) {
        assert(deletionQV.size() >= readLength + startPos);
        CopyArray(deletionQV, startPos, readLength, read.deletionQV.data);
    }
    if (deletionTag.size() > 0) {
        assert(deletionTag.size() >= readLength + startPos);
        CopyArray(deletionTag, startPos, readLength, read.deletionTag);
    }
    if (insertionQV.size() > 0) {
        assert(insertionQV.size()  >= readLength + startPos);
        CopyArray(insertionQV, startPos, readLength, read.insertionQV.data);
    }
    if (substitutionQV.size() > 0) {
        assert(substitutionQV.size() >= readLength + startPos);
        CopyArray(substitutionQV, startPos, readLength, read.substitutionQV.data);
    }
    if (mergeQV.size() > 0) {
        assert(mergeQV.size() >= readLength + startPos);
        CopyArray(mergeQV, startPos, readLength, read.mergeQV.data);
    }
    if (substitutionTag.size() > 0) {
        assert(substitutionTag.size() >= readLength + startPos);
        CopyArray(substitutionTag, startPos, readLength, read.substitutionTag);
    }
    if (preBaseFrames.size() > 0) {
        assert(preBaseFrames.size() >= readLength + startPos);
        CopyArray(preBaseFrames, startPos, readLength, read.preBaseFrames);
    }

}
Esempio n. 2
0
void MakeVirtualRead(SMRTSequence & smrtRead,
                     const vector<SMRTSequence> & subreads)
{
    assert(subreads.size() > 0);
    DNALength hqStart = 0, hqEnd = 0;
    for(auto subread: subreads) {
        hqStart = min(DNALength(subread.SubreadStart()), hqStart);
        hqEnd   = max(DNALength(subread.SubreadEnd()),   hqEnd);
    }
    smrtRead.Free();
    smrtRead.Allocate(hqEnd);
    memset(smrtRead.seq, 'N', sizeof(char) * hqEnd);
    smrtRead.lowQualityPrefix = hqStart;
    smrtRead.lowQualitySuffix = smrtRead.length - hqEnd;
    smrtRead.highQualityRegionScore = subreads[0].highQualityRegionScore;
    smrtRead.HoleNumber(subreads[0].HoleNumber());
    stringstream ss;
    ss << SMRTTitle(subreads[0].GetTitle()).MovieName() << "/" << subreads[0].HoleNumber();
    smrtRead.CopyTitle(ss.str());
    for (auto subread: subreads) {
        memcpy(&smrtRead.seq[subread.SubreadStart()],
               &subread.seq[0], sizeof(char) * subread.length);
    }
}
Esempio n. 3
0
    void ImportReadFromCmpH5(int alignmentIndex, SMRTSequence &read) {
        CmpAlignment cmpAlignment;
        alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment);

        //
        // Cache some stats about the read, and where it was aligned to.
        //
        int queryStart = cmpAlignment.GetQueryStart();
        int queryEnd   = cmpAlignment.GetQueryEnd();
        read.holeNumber = cmpAlignment.GetHoleNumber();
        int refGroupId = cmpAlignment.GetRefGroupId();
        int alnGroupId = cmpAlignment.GetAlnGroupId();
        int refGroupIndex  = refGroupIdToArrayIndex[refGroupId];
        if (alnGroupIdToReadGroupName.find(alnGroupId) == alnGroupIdToReadGroupName.end()) {
            cout << "INTERNAL ERROR! Could not find read group name for alignment "
                << "group with Id " << alnGroupId << "." << endl;
            assert(0);
        }
        string readGroupName = alnGroupIdToReadGroupName[alnGroupId];

        if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) ==
                refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) {
            cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of "
                << " the path in alignment " << alignmentIndex
                << " though it does not exist in the ref align group specified for this alignment." << endl;
            assert(0);
        }

        int readGroupIndex = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];
        HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];

        int offsetBegin = cmpAlignment.GetOffsetBegin();
        int offsetEnd   = cmpAlignment.GetOffsetEnd();

        int alignedSequenceLength = offsetEnd - offsetBegin;
        string   alignedSequence;
        string   readSequence;
        vector<unsigned char> byteAlignment;

        if (alignedSequenceLength >= 0) {
            alignedSequence.resize(alignedSequenceLength);
            byteAlignment.resize(alignedSequenceLength);
        }

        //
        // Read the alignment string.  All alignments 
        //
        refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                offsetEnd, 
                &byteAlignment[0]);

        //
        // Convert to something we can compare easily.
        //
        ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]);


        // 
        // Initialize the sequence of the read.
        //
        RemoveGaps(alignedSequence, readSequence);

        //
        // Make space for the sequence and all fields.
        //
        read.length = readSequence.size();
        read.Allocate(read.length);
        memcpy(read.seq, readSequence.c_str(), readSequence.size() * sizeof(char));

        vector<int> baseToAlignmentMap;
        CreateSequenceToAlignmentMap(byteAlignment, baseToAlignmentMap);

        //
        // Read in the quality values
        //


        vector<unsigned char> storedQVArray;

        vector<UChar> qvValues;
        vector<HalfWord> frameValues;
        int length = offsetEnd - offsetBegin;
        qvValues.resize(length);
        frameValues.resize(length);
        int i;


        if (expGroup->experimentGroup.ContainsObject("QualityValue")) {
            expGroup->qualityValue.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.qual.data[0]);
            int i;
            for (i= 0; i < read.length; i++) {
                assert(read.qual[i] < 100);
            }
        }

        if (expGroup->experimentGroup.ContainsObject("InsertionQV")) {
            expGroup->insertionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.insertionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionQV")) {
            expGroup->substitutionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.substitutionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionQV")) {
            expGroup->deletionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.deletionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionTag")) {
            vector<char> deletionTagValues;
            deletionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->deletionTag.Read(offsetBegin, offsetEnd, &deletionTagValues[0]);
            StoreQualityValueFromAlignment(deletionTagValues, baseToAlignmentMap, read.deletionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionTag")) {
            vector<char> substitutionTagValues;
            substitutionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->substitutionTag.Read(offsetBegin, offsetEnd, &substitutionTagValues[0]);
            StoreQualityValueFromAlignment(substitutionTagValues, baseToAlignmentMap, read.substitutionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("PulseIndex")) {
            vector<uint32_t> pulseIndexValues;
            pulseIndexValues.resize(offsetEnd-offsetBegin);
            expGroup->pulseIndex.Read(offsetBegin, offsetEnd, &pulseIndexValues[0]);
            StoreQualityValueFromAlignment(pulseIndexValues, baseToAlignmentMap, read.pulseIndex);
        }

        if (expGroup->experimentGroup.ContainsObject("PreBaseFrames")) {
            expGroup->preBaseFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.preBaseFrames);
        }

        if (expGroup->experimentGroup.ContainsObject("WidthInFrames")) {
            expGroup->widthInFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.widthInFrames);
        }

    }
Esempio n. 4
0
int main(int argc, char* argv[]) {
    string refGenomeFileName = "";
    string lengthModelFileName = "";
    string outputModelFileName = "";
    DNALength numBasesPerFile = 0;
    string sourceReadsFileName = "";
    string titleTableFileName = "";
    int numBasH5Files = 1;
    string basH5BaseFileName = "simulated";
    string movieName = "m101211_092754_00114_cSIM_s1_p0";
    bool   doRandGenInit = true;
    bool   usePosMap     = false;
    bool   printPercentRepeat = false;
    string posMapFileName = "";
    vector<string> movieNames;
    bool useLengthModel = false;
    bool useFixedLength = false;
    ofstream posMapFile;
    int scaledLength = 0;
    int fixedLength = 0;
    int nBasFiles = 1;
    bool useLengthsModel = true;
    bool printHelp = false;


    //  Look to see if the refAsReads flag is specified anywhere before
    //  parsing the command line.

    CommandLineParser clp;
    string commandLine;
    string helpString;
    SetHelp(helpString);
    vector<string> fns;

    clp.RegisterStringOption("genome", &refGenomeFileName, "");
    clp.RegisterIntOption("numBasesPerFile", (int*)&numBasesPerFile, "",
                          CommandLineParser::PositiveInteger);
    clp.RegisterStringOption("sourceReads", &sourceReadsFileName, "");
    clp.RegisterStringOption("lengthModel", &lengthModelFileName, "");
    clp.RegisterIntOption("fixedLength", &fixedLength, "",
                          CommandLineParser::PositiveInteger);
    clp.RegisterFlagOption("lengthModel", &useLengthModel, "");
    clp.RegisterStringOption("movieName", &movieName, "");
    clp.RegisterStringOption("titleTable", &titleTableFileName, "");
    clp.RegisterStringOption("baseFileName", &basH5BaseFileName, "");
    clp.RegisterIntOption("nFiles", &nBasFiles, "",
                          CommandLineParser::PositiveInteger);
    clp.RegisterIntOption("meanLength", &scaledLength, "",
                          CommandLineParser::PositiveInteger);
    clp.RegisterStringOption("posMap", &posMapFileName, "");
    clp.RegisterFlagOption("printPercentRepeat", &printPercentRepeat, "");
    clp.RegisterFlagOption("h", &printHelp, "");

    clp.SetHelp(helpString);
    clp.ParseCommandLine(argc, argv, fns);
    clp.CommandLineToString(argc, argv, commandLine);

    clp.SetProgramName("alchemy");

    outputModelFileName = fns[0];
    if (argc <= 1 or printHelp or outputModelFileName == "") {
        cout << helpString << endl;
        exit(0);
    }

    if (usePosMap) {
        CrucialOpen(posMapFileName, posMapFile, std::ios::out);
    }

    if (sourceReadsFileName == "" and fixedLength == 0) {
        useLengthModel = true;
    }

    if (useLengthModel and fixedLength != 0) {
        cout << "ERROR! You must either use a length model or a fixed length." << endl;
        exit(1);
    }

    if (sourceReadsFileName == "" and numBasesPerFile == 0) {
        cout << "ERROR! You must specify either a set of read to use as " << endl
             << "original reads for simulation or the total number of bases " << endl
             << "to simulate in each bas.h5 file." << endl;
        exit(1);
    }

    if (sourceReadsFileName == "" and refGenomeFileName == "") {
        cout << "ERROR! You must specify a genome to sample reads from or a set of read "<<endl
             << "to use as original reads for simulation." << endl;
        exit(1);
    }

    if (fixedLength != 0 and refGenomeFileName == "") {
        cout << "ERROR! You must specify a genome file if using a fixed length." << endl;
        exit(1);
    }

    if ((fixedLength != 0 or scaledLength != 0) and sourceReadsFileName != "") {
        cout << "ERROR! You cannot specify a fixed length nor mean length with a source " << endl
             << "reads file.  The read lengths are taken from the source reads or the length model." << endl;
        exit(1);
    }

    LengthHistogram   lengthHistogram;
    OutputSampleListSet   outputModel(0);
    TitleTable titleTable;

    if (doRandGenInit) {
        InitializeRandomGeneratorWithTime();
    }

    //
    // Read models.
    //
    if (titleTableFileName != "") {
        titleTable.Read(titleTableFileName);
    }


    outputModel.Read(outputModelFileName);

    if (useLengthModel) {
        lengthHistogram.BuildFromAlignmentLengths(outputModel.lengths);
    }


    vector<int> alignmentLengths;
    int meanAlignmentLength;


    if (scaledLength != 0 and useLengthModel) {
        //
        // Scale the histogram so that the average length is 'scaledLength'.
        //

        // 1. Integrate histogram
        long totalLength = 0;
        long totalSamples = 0;
        int hi;
        for (hi = 0; hi < lengthHistogram.lengthHistogram.cdf.size()-1; hi++) {
            int ni;
            ni = lengthHistogram.lengthHistogram.cdf[hi+1] - lengthHistogram.lengthHistogram.cdf[hi];
            totalLength += ni * lengthHistogram.lengthHistogram.data[hi];
        }
        totalSamples = lengthHistogram.lengthHistogram.cdf[lengthHistogram.lengthHistogram.cdf.size()-1];

        float meanSampleLength = totalLength / (1.0*totalSamples);
        float fractionIncrease = scaledLength / meanSampleLength;

        for (hi = 0; hi < lengthHistogram.lengthHistogram.cdf.size(); hi++) {
            lengthHistogram.lengthHistogram.data[hi] *= fractionIncrease;
        }
    }

    FASTAReader inReader, seqReader;
    vector<FASTASequence> reference;
    DNALength refLength = 0;
    int i;
    if (refGenomeFileName != "") {
        inReader.Init(refGenomeFileName);
        inReader.ReadAllSequences(reference);

        for (i = 0; i < reference.size(); i++) {
            refLength += reference[i].length;
        }
    }

    if (sourceReadsFileName !=  "") {
        seqReader.Init(sourceReadsFileName);
    }

    ofstream readsFile;

    //
    // Create and simulate bas.h5 files.
    //
    int baseFileIndex;
    bool readsRemain = true;
    for (baseFileIndex = 0; ((sourceReadsFileName == "" and baseFileIndex < nBasFiles)  // case 1 is reads are generated by file
                             or (sourceReadsFileName != "" and readsRemain)); // case 2 is reads are generated by an input file.
            baseFileIndex++) {
        //
        // Prep the base file for writing.
        //
        stringstream fileNameStrm, movieNameStrm;
        //string movieName = "m000000_000000_00000_cSIMULATED_s";
        movieNameStrm << movieName << baseFileIndex << "_p0";
        string fullMovieName = movieNameStrm.str();
        fileNameStrm  << fullMovieName <<  ".bas.h5";


        HDFBasWriter basWriter;
        HDFRegionTableWriter regionWriter;
        //
        // This is mainly used to create the atributes.
        //
        RegionTable regionTable;
        regionTable.CreateDefaultAttributes();

        basWriter.SetPlatform(Springfield);
        //
        // Use a fixed set of fields for now.
        //

        // These are all pulled from the outputModel.
        basWriter.IncludeField("Basecall");
        basWriter.IncludeField("QualityValue");
        basWriter.IncludeField("SubstitutionQV");
        basWriter.IncludeField("SubstitutionTag");
        basWriter.IncludeField("InsertionQV");
        basWriter.IncludeField("DeletionQV");
        basWriter.IncludeField("DeletionTag");
        basWriter.IncludeField("WidthInFrames");
        basWriter.IncludeField("PreBaseFrames");
        basWriter.IncludeField("PulseIndex");

        vector<unsigned char> qualityValue, substitutionQV, substitutionTag, insertionQV, deletionQV, deletionTag;
        vector<HalfWord> widthInFrames, preBaseFrames, pulseIndex;

        // Just go from 0 .. hole Number
        basWriter.IncludeField("HoleNumber");
        // Fixed to 0.
        basWriter.IncludeField("HoleXY");
        if (usePosMap == false) {
            basWriter.IncludeField("SimulatedSequenceIndex");
            basWriter.IncludeField("SimulatedCoordinate");
        }
        basWriter.SetChangeListID("1.3.0.50.104380");


        DNALength numSimulatedBases  = 0;
        FASTASequence sampleSeq;
        //sampleSeq.length = readLength;
        int maxRetry = 10000000;
        int retryNumber = 0;
        int numReads = 0;
        int readLength = 0;

        while (numBasesPerFile == 0 or numSimulatedBases < numBasesPerFile) {
            DNALength seqIndex, seqPos;
            if (useLengthModel or fixedLength) {
                if (useLengthModel) {
                    lengthHistogram.GetRandomLength(readLength);
                }
                else {
                    readLength = fixedLength;
                }
            }
            if (refGenomeFileName != "") {
                FindRandomPos(reference, seqIndex, seqPos, readLength + (outputModel.keyLength - 1));
                sampleSeq.seq    = &reference[seqIndex].seq[seqPos];
                sampleSeq.length = readLength + (outputModel.keyLength - 1);
                assert(reference[seqIndex].length >= sampleSeq.length);
            }
            else if (sourceReadsFileName != "") {
                if (seqReader.GetNext(sampleSeq) == false) {
                    readsRemain = false;
                    break;
                }
                if (sampleSeq.length < outputModel.keyLength) {
                    continue;
                }
                //
                // Now attempt to parse the position from the fasta title.
                //

                if (useLengthModel) {
                    int tryNumber = 0;
                    readLength = 0;
                    int maxNTries = 1000;
                    int tryBuffer[5] = {-1,-1,-1,-1,-1};
                    while (tryNumber < maxNTries and readLength < outputModel.keyLength) {
                        lengthHistogram.GetRandomLength(readLength);
                        readLength = sampleSeq.length = min(sampleSeq.length, (unsigned int) readLength);
                        tryBuffer[tryNumber%5] = readLength;
                        tryNumber++;
                    }
                    if (tryNumber >= maxNTries) {
                        cout << "ERROR. Could not generate a read length greater than the " << outputModel.keyLength << " requried " <<endl
                             << "minimum number of bases using the length model specified in the alchemy." <<endl
                             << "model.  Something is either wrong with the model or the context length is too large." <<endl;
                        cout << "The last few tries were: " << tryBuffer[0] << " " << tryBuffer[1] << " " << tryBuffer[2] << " " << tryBuffer[3] << " " << tryBuffer[4] << endl;
                        exit(1);
                    }
                }

                readLength = sampleSeq.length;
                vector<string> tokens;
                Tokenize(sampleSeq.title, "|", tokens);
                if (tokens.size() == 4) {
                    seqPos = atoi(tokens[2].c_str());
                    if (titleTableFileName == "") {
                        seqIndex = 0;
                    }
                    else {
                        int index;
                        titleTable.Lookup(tokens[1], index);
                        seqIndex = index;
                    }
                }
                else {
                    seqPos   = 0;
                }
            }

            //
            // If this is the first read printed to the base file, initialize it.
            //
            if (numSimulatedBases == 0) {
                basWriter.Initialize(fileNameStrm.str(), movieNameStrm.str(), Springfield);
                regionWriter.Initialize(basWriter.pulseDataGroup);
            }

            numSimulatedBases += readLength;

            int p;
            // create the sample sequence
            int contextLength = outputModel.keyLength;
            int contextMiddle = contextLength / 2;
            string outputString;

            int nDel = 0;
            int nIns = 0;

            //
            // Simulate to beyond the sample length.
            //
            qualityValue.clear();
            substitutionQV.clear();
            substitutionTag.clear();
            insertionQV.clear();
            deletionQV.clear();
            deletionTag.clear();
            pulseIndex.clear();
            widthInFrames.clear();
            preBaseFrames.clear();
            assert(sampleSeq.length > contextMiddle + 1);
            for (p = contextMiddle;
                    p < sampleSeq.length - contextMiddle - 1; p++) {
                string refContext;
                refContext.assign((const char*) &sampleSeq.seq[p-contextMiddle], contextLength);

                string outputContext;
                int    contextWasFound;
                OutputSample sample;
                int i;
                for (i = 0; i < refContext.size(); i++) {
                    refContext[i] = toupper(refContext[i]);
                }
                outputModel.SampleRandomSample(refContext, sample);

                if (sample.type == OutputSample::Deletion ) {
                    //
                    // There was a deletion.  Advance in reference, then output
                    // the base after the deletion.
                    //
                    p++;
                    ++nDel;
                }

                int cp;
                //
                // Add the sampled context, possibly multiple characters because of an insertion.
                //
                for (i = 0; i < sample.nucleotides.size(); i++) {
                    outputString.push_back(sample.nucleotides[i]);
                    qualityValue.push_back(sample.qualities[i].qv[0]);
                    deletionQV.push_back(sample.qualities[i].qv[1]);
                    insertionQV.push_back(sample.qualities[i].qv[2]);
                    substitutionQV.push_back(sample.qualities[i].qv[3]);
                    deletionTag.push_back(sample.qualities[i].tags[0]);
                    substitutionTag.push_back(sample.qualities[i].tags[1]);
                    pulseIndex.push_back(sample.qualities[i].frameValues[0]);
                    preBaseFrames.push_back(sample.qualities[i].frameValues[1]);
                    widthInFrames.push_back(sample.qualities[i].frameValues[2]);
                }
                nIns += sample.qualities.size() - 1;
            }
            if (outputString.find('N') != outputString.npos or
                    outputString.find('n') != outputString.npos) {
                cout << "WARNING!  The sampled string " << endl << outputString << endl
                     << "should not contain N's, but it seems to.  This is being ignored "<<endl
                     << "for now so that simulation may continue, but this shouldn't happen"<<endl
                     << "and is really a bug." << endl;
                numSimulatedBases -= readLength;
                continue;
            }
            //
            // Ok, done creating the read, now time to create some quality values!!!!!
            //
            SMRTSequence read;
            read.length = outputString.size();
            read.Allocate(read.length);
            memcpy(read.seq, outputString.c_str(), read.length * sizeof(unsigned char));
            assert(qualityValue.size() == read.length * sizeof(unsigned char));
            memcpy(read.qual.data, &qualityValue[0], read.length * sizeof(unsigned char));
            memcpy(read.deletionQV.data, &deletionQV[0], read.length * sizeof(unsigned char));
            memcpy(read.insertionQV.data, &insertionQV[0], read.length * sizeof(unsigned char));
            memcpy(read.substitutionQV.data, &substitutionQV[0], read.length * sizeof(unsigned char));
            memcpy(read.deletionTag, &deletionTag[0], read.length * sizeof(unsigned char));
            memcpy(read.substitutionTag, &substitutionTag[0], read.length * sizeof(unsigned char));
            memcpy(read.pulseIndex, &pulseIndex[0], read.length * sizeof(int));
            memcpy(read.preBaseFrames, &preBaseFrames[0], read.length * sizeof(HalfWord));
            memcpy(read.widthInFrames, &widthInFrames[0], read.length * sizeof(HalfWord));

            //
            // The pulse index for now is just fake data.
            //
            int i;
            for (i = 0; i < read.length; i++) {
                read.pulseIndex[i] = 1;
            }
            read.xy[0] = seqIndex;
            read.xy[1] = seqPos;
            read.zmwData.holeNumber = numReads;

            basWriter.Write(read);
            // Record where this was simulated from.
            if (usePosMap == false) {
                basWriter.WriteSimulatedCoordinate(seqPos);
                basWriter.WriteSimulatedSequenceIndex(seqIndex);
            }
            else {
                posMapFile << fullMovieName << "/" << numReads << "/0_" << read.length << " " << seqIndex << " "<< seqPos;
                if (printPercentRepeat) {
                    DNALength nRepeat = sampleSeq.GetRepeatContent();
                    posMapFile << " " << nRepeat*1.0/sampleSeq.length;
                }
                posMapFile << endl;
            }
            RegionAnnotation region;
            region.row[0] = read.zmwData.holeNumber;
            region.row[1] = 1;
            region.row[2] = 0;
            region.row[3] = read.length;
            region.row[4] = 1000; // Should be enough.
            regionWriter.Write(region);
            region.row[1] = 2; // Rewrite for hq region encompassing everything.
            regionWriter.Write(region);
            if (sourceReadsFileName != "") {
                sampleSeq.Free();
            }
            read.Free();
            ++numReads;
        }
        regionWriter.Finalize(regionTable.columnNames,
                              regionTable.regionTypes,
                              regionTable.regionDescriptions,
                              regionTable.regionSources);
        basWriter.Close();
        numReads = 0;
        //
        // The bas writer should automatically flush on closing.
        //
    }
    if (usePosMap) {
        posMapFile.close();
    }

    for (i = 0; i < reference.size(); i++) {
        reference[i].Free();
    }
}