void AlignmentSetToCmpH5Adapter<T_CmpFile>::StoreAlignmentCandidate( AlignmentCandidate<> &alignment, int alnSegment, T_CmpFile &cmpFile, int moleculeNumber, bool copyQVs) { // // Find out where the movie is going to get stored. // std::string movieName; int holeNumber = 0; bool nameParsedProperly; nameParsedProperly = ParsePBIReadName(alignment.qName, movieName, holeNumber); if (!nameParsedProperly) { std::cout <<"ERROR. Attempting to store a read with name " << alignment.qName << " that does not " << std::endl << "appear to be a PacBio read." << std::endl; exit(1); } unsigned int movieId = StoreMovieInfo(movieName, cmpFile); // Check whether the reference is in /RefInfo. std::map<std::string, int>::iterator mapIt; mapIt = refNameToRefInfoIndex.find(alignment.tName); if (mapIt == refNameToRefInfoIndex.end()) { std::cout << "ERROR. The reference name " << alignment.tName << " was not found in the list of references." << std::endl; std::cout << "Perhaps a different reference file was aligned to than " << std::endl << "what was provided for SAM conversion. " << std::endl; exit(1); } // Store refGroup unsigned int refGroupId = StoreRefGroup(alignment.tName, cmpFile); std::string refGroupName = refNameToRefGroupNameandId[alignment.tName].name; assert(refGroupId == refNameToRefGroupNameandId[alignment.tName].id); if (cmpFile.refGroupIdToArrayIndex.find(refGroupId) == cmpFile.refGroupIdToArrayIndex.end()) { std::cout << "ERROR. The reference ID is not indexed. " << "This is an internal inconsistency." << std::endl; exit(1); } size_t refGroupIndex= cmpFile.refGroupIdToArrayIndex[refGroupId]; assert(refGroupIndex + 1 == refGroupId); std::string path = "/" + refGroupName + "/" + movieName; unsigned int pathId = StorePath(path, cmpFile); vector<unsigned int> alnIndex; alnIndex.resize(22); RemoveGapsAtEndOfAlignment(alignment); /* * Store the alignment string */ vector<unsigned char> byteAlignment; AlignmentToByteAlignment(alignment, alignment.qAlignedSeq, alignment.tAlignedSeq, byteAlignment); unsigned int offsetBegin, offsetEnd; cmpFile.StoreAlnArray(byteAlignment, alignment.tName, movieName, offsetBegin, offsetEnd); // Copy QVs into cmp.h5 if (copyQVs) { std::vector<std::string> optionalQVs; alignment.CopyQVs(&optionalQVs); for (size_t qv_i=0; qv_i<optionalQVs.size(); qv_i++) { std::string *qvName = &alignment.optionalQVNames[qv_i]; std::string *qvString = &optionalQVs[qv_i]; // If the qvString is empty, then the alignment is missing the quality // value if (qvString->size() == 0) { continue; } unsigned int qvOffsetBegin, qvOffsetEnd; if (qvName->compare(qvName->size() - 3, 3, "Tag") == 0) { std::vector<char> qvVector; QVsToCmpH5QVs(*qvString, byteAlignment, true, &qvVector); cmpFile.StoreTags(qvVector, alignment.tName, *qvName, movieName, &qvOffsetBegin, &qvOffsetEnd); } else { std::vector<UChar> qvVector; QVsToCmpH5QVs(*qvString, byteAlignment, false, &qvVector); cmpFile.StoreQVs(qvVector, alignment.tName, *qvName, movieName, &qvOffsetBegin, &qvOffsetEnd); } assert(qvOffsetBegin == offsetBegin); assert(qvOffsetEnd == offsetEnd); } } numAlignments++; DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn; //distScoreFn does not matter since the score is not stored. ComputeAlignmentStats(alignment, alignment.qAlignedSeq.seq, alignment.tAlignedSeq.seq, distScoreFn); /* The current AlnIndex column names: (0): "AlnID", "AlnGroupID", "MovieID", "RefGroupID", "tStart", (5): "tEnd", "RCRefStrand", "HoleNumber", "SetNumber", (9): "StrobeNumber", "MoleculeID", "rStart", "rEnd", "MapQV", "nM", (15): "nMM", "nIns", "nDel", "Offset_begin", "Offset_end", (20): "nBackRead", "nReadOverlap" */ if (moleculeNumber == -1) { moleculeNumber = numZMWsPerMovieSpringField * (movieId - 1) + holeNumber; } alnIndex[0] = numAlignments; // AlnId alnIndex[1] = pathId; // AlnGroupID alnIndex[2] = movieId; // MovieID alnIndex[3] = refGroupId; // RefGroupID alnIndex[4] = alignment.tAlignedSeqPos; // tStart alnIndex[5] = alignment.tAlignedSeqPos + alignment.tAlignedSeqLength; // tEnd alnIndex[6] = alignment.tStrand; // RCRefStrand alnIndex[7] = holeNumber; alnIndex[8] = 0; // SET NUMBER -- parse later!!!! alnIndex[9] = alnSegment; // strobenumber alnIndex[10] = moleculeNumber; alnIndex[11] = alignment.qAlignedSeqPos; alnIndex[12] = alignment.qAlignedSeqPos + alignment.qAlignedSeqLength; alnIndex[13] = alignment.mapQV; alnIndex[14] = alignment.nMatch; alnIndex[15] = alignment.nMismatch; alnIndex[16] = alignment.nIns; alnIndex[17] = alignment.nDel; alnIndex[18] = offsetBegin; alnIndex[19] = offsetEnd; alnIndex[20] = 0; alnIndex[21] = 0; cmpFile.alnInfoGroup.WriteAlnIndex(alnIndex); }
// // Initialize instance from a VMware 4.x descriptor file // VDKSTAT VDiskVmdk::Initialize(PCHAR pPath) { VDiskFile file; CHAR buf[MAX_PATH + 40]; CHAR path[MAX_PATH]; PVOID cbparams[2]; PCHAR current; ULONG signature; ULONG_PTR len; VDKSTAT ret; // // parameter check // if (!pPath || !*pPath) { return VDK_PARAM; } // // store path // if ((ret = StorePath(pPath)) != VDK_OK) { return ret; } // // open file // if ((ret = file.Open(pPath)) != VDK_OK) { return ret; } // // initialize members // cbparams[0] = pPath; // // read first 4 bytes // signature = 0; ret = file.ReadByte((PUCHAR)&signature, sizeof(signature), &len); if (ret != VDK_OK) { return ret; } if (len != sizeof(signature)) { return VDK_EOF; } if (signature == VMDK_SIGNATURE) { // // it's a monolithic sparse file // -- descriptor offset is stored in header // VMDK_HEADER vmdk; ret = file.ReadByte((PUCHAR)&vmdk + sizeof(signature), sizeof(vmdk) - sizeof(signature), &len); if (ret != VDK_OK) { return ret; } if (len != sizeof(vmdk) - sizeof(signature)) { return VDK_DATA; } if (vmdk.DescOffsetLow == 0 || vmdk.DescOffsetHigh != 0 || vmdk.DescSizeLow == 0 || vmdk.DescSizeHigh != 0) { VDiskCallBack(VDISK_CB_VMDK_NODESC, cbparams); return VDK_DATA; } ret = VdkSeekFile(file.Handle(), vmdk.DescOffsetLow << VDK_BYTE_SHIFT_TO_SECTOR); if (ret != VDK_OK) { return ret; } } else { ret = VdkSeekFile(file.Handle(), 0); if (ret != VDK_OK) { return ret; } } while ((ret = file.ReadText(buf, sizeof(buf), NULL)) == VDK_OK) { // // replace tabs with blanks // current = buf; while (*current) { if (*current == '\t' || *current == '\n') { *current = ' '; } current++; } // // remove trailing blanks // while (current > buf && *(--current) == ' ') { *current = '\0'; } // // skip leading blanks // current = buf; while (*current == ' ') { current++; } // // blank line? // if (!*current) { continue; } cbparams[1] = current; // // parse current line // if (!VdkCmpNoCaseN(current, "RW ", 3) || !VdkCmpNoCaseN(current, "RDONLY", 6)) { VDiskExt *ext; PCHAR top, tail, p; ULONG capacity; CHAR delim; BOOL sparse; ULONG offset; HANDLE hFile; // // search capacity field // p = current + 3; while (*p == ' ') { p++; } capacity = atol(p); if (!capacity) { capacity = VDiskCallBack( VDISK_CB_DESC_CAPACITY, cbparams); if (!capacity) { return VDK_DATA; } SetFlag(VDISK_FLAG_DIRTY); } // // search extent type field // while (isdigit(*p)) { p++; } while (*p == ' ') { p++; } if (!VdkCmpNoCaseN(p, "flat ", 5)) { sparse = FALSE; } else if (!VdkCmpNoCaseN(p, "sparse ", 7)) { sparse = TRUE; } else { ULONG type = VDiskCallBack( VDISK_CB_DESC_FILETYPE, cbparams); if (type == VDK_FILETYPE_FLAT) { sparse = FALSE; } else if (type == VDK_FILETYPE_VMDK) { sparse = TRUE; } else { return VDK_DATA; } SetFlag(VDISK_FLAG_DIRTY); } // // search extent path field // top = p; while (*top && *top != ' ') { top++; } while (*top == ' ') { top++; } if (*top == '\"') { delim = '\"'; top++; } else { delim = ' '; } tail = top; while (*tail && *tail != delim) { tail++; } if (!*tail || tail - top >= MAX_PATH) { if (!VDiskCallBack(VDISK_CB_DESC_BADENTRY, cbparams)) { return VDK_CANCEL; } SetFlag(VDISK_FLAG_DIRTY); continue; } if ((isalpha(*top) && *(top + 1) == ':') || *top == PATH_SEPARATOR_CHAR || *top == ALT_SEPARATOR_CHAR) { SetFlag(VDISK_FLAG_ABSPATH); } else { ClrFlag(VDISK_FLAG_ABSPATH); } VdkCopyMem(path, top, tail - top); path[tail - top] = '\0'; if (sparse) { offset = 0; } else { // // search "backing offset" field // p = tail + 1; while (*p == ' ') { p++; } if (isdigit(*p)) { offset = atol(p); } else { offset = 0; } } // open the extent file *tail = '\0'; ret = VDiskSearchFile(&hFile, path, m_pPath); if (ret != VDK_OK) { return ret; } // // create an extent object // if (sparse) { ext = new VDiskExtVmdk; } else { ext = new VDiskExtRaw; } if (ext == NULL) { ret = VdkLastError(); VdkCloseFile(hFile); return ret; } ret = AddExtent(ext); if (ret != VDK_OK) { VdkCloseFile(hFile); delete ext; return ret; } ret = ext->SetPath(path); if (ret != VDK_OK) { VdkCloseFile(hFile); return ret; } ret = ext->Load(hFile); VdkCloseFile(hFile); if (ret != VDK_OK) { return ret; } ext->SetCapacity(capacity); if (!sparse) { ((VDiskExtRaw *)ext)->SetBackOffset(offset); } } else if (!VdkCmpNoCaseN(current, "ddb.geometry.sectors", 20)) { PCHAR p = current + 20; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nSectors = atol(p); if (!m_nSectors) { m_nSectors = VDiskCallBack( VDISK_CB_DESC_GEOMETRY, cbparams); if (!m_nSectors) { return VDK_DATA; } SetFlag(VDISK_FLAG_DIRTY); } } else if (!VdkCmpNoCaseN(current, "ddb.geometry.heads", 18)) { PCHAR p = current + 18; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nTracks = atol(p); if (!m_nTracks) { m_nTracks = VDiskCallBack( VDISK_CB_DESC_GEOMETRY, cbparams); if (!m_nTracks) { return VDK_DATA; } SetFlag(VDISK_FLAG_DIRTY); } } else if (!VdkCmpNoCaseN(current, "ddb.geometry.cylinders", 22)) { PCHAR p = current + 22; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nCylinders = atol(p); if (!m_nCylinders) { m_nCylinders = VDiskCallBack( VDISK_CB_DESC_GEOMETRY, cbparams); if (!m_nCylinders) { return VDK_DATA; } SetFlag(VDISK_FLAG_DIRTY); } } else if (!VdkCmpNoCaseN(current, "CID", 3)) { PCHAR p = current + 3; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } if (!sscanf(p, "%lx", &m_nTimeStamp)) { if (!VDiskCallBack(VDISK_CB_DESC_TIMESTAMP, cbparams)) { return VDK_CANCEL; } m_nTimeStamp = (ULONG)-1; SetFlag(VDISK_FLAG_DIRTY); } } else if (!VdkCmpNoCaseN(current, "parentCID", 9)) { PCHAR p = current + 9; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } if (!sscanf(p, "%lx", &m_nParentTS)) { if (!VDiskCallBack(VDISK_CB_DESC_TIMESTAMP, cbparams)) { return VDK_CANCEL; } m_nParentTS = (ULONG)-1; SetFlag(VDISK_FLAG_DIRTY); } if (m_nParentTS != (ULONG)-1) { SetFlag(VDISK_FLAG_CHILD); } } else if (!VdkCmpNoCaseN(current, "ddb.virtualHWVersion", 20)) { PCHAR p = current + 20; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nHardwareVer = atol(p); } else if (!VdkCmpNoCaseN(current, "ddb.adapterType", 15)) { PCHAR p = current + 15; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } if (!VdkCmpNoCaseN(p, "ide", 3)) { m_nController = VDISK_CONTROLLER_IDE; } else if (!VdkCmpNoCaseN(p, "buslogic", 8) || !VdkCmpNoCaseN(p, "lsilogic", 8)) { m_nController = VDISK_CONTROLLER_SCSI; } else { m_nController = VDiskCallBack( VDISK_CB_CONTROLLER, cbparams); if (m_nController != VDISK_CONTROLLER_SCSI && m_nController != VDISK_CONTROLLER_IDE) { return VDK_CANCEL; } SetFlag(VDISK_FLAG_DIRTY); } } else if (!VdkCmpNoCaseN(current, "createType", 10)) { PCHAR p = current + 10; ULONG type; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } if (!VdkCmpNoCaseN(p, "twoGbMaxExtentSparse", 20)) { type = VDISK_VMDK_SPLIT_SPARSE; } else if (!VdkCmpNoCaseN(p, "monolithicSparse", 16)) { type = VDISK_VMDK_MONO_SPARSE; } else if (!VdkCmpNoCaseN(p, "twoGbMaxExtentFlat", 18)) { type = VDISK_VMDK_SPLIT_FLAT; } else if (!VdkCmpNoCaseN(p, "monolithicFlat", 14)) { type = VDISK_VMDK_MONO_FLAT; } else { type = VDiskCallBack( VDISK_CB_DESC_DISKTYPE, cbparams); SetFlag(VDISK_FLAG_DIRTY); } switch (type) { case VDISK_VMDK_SPLIT_FLAT: ClrFlag(VDISK_FLAG_SINGLE); ClrFlag(VDISK_FLAG_CHILD); ClrFlag(VDISK_FLAG_SPARSE); break; case VDISK_VMDK_MONO_FLAT: SetFlag(VDISK_FLAG_SINGLE); ClrFlag(VDISK_FLAG_CHILD); ClrFlag(VDISK_FLAG_SPARSE); break; case VDISK_VMDK_SPLIT_SPARSE: ClrFlag(VDISK_FLAG_SINGLE); SetFlag(VDISK_FLAG_SPARSE); break; case VDISK_VMDK_MONO_SPARSE: SetFlag(VDISK_FLAG_SINGLE); SetFlag(VDISK_FLAG_SPARSE); break; default: return VDK_DATA; } } else if (!VdkCmpNoCaseN(current, "version", 7)) { /* PCHAR p = current + 7; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nVmdkVersion = atol(p); */ } else if (!VdkCmpNoCaseN(current, "parentFileNameHint", 18)) { PCHAR top = current + 18; PCHAR tail; while (*top == ' ' || *top == '=' || *top == '\"') { top++; } tail = top; while (*tail && *tail != '\"') { tail++; } *tail = '\0'; ret = StoreParentPath(top); if (ret != VDK_OK) { return ret; } SetFlag(VDISK_FLAG_CHILD); } else if (!VdkCmpNoCaseN(current, "ddb.toolsVersion", 16)) { PCHAR p = current + 16; while (*p == ' ' || *p == '=' || *p == '\"') { p++; } m_nToolsFlag = atol(p); } else if (*current && *current != '#') { if (!VDiskCallBack(VDISK_CB_DESC_BADENTRY, cbparams)) { return VDK_CANCEL; } SetFlag(VDISK_FLAG_DIRTY); } } if (ret == VDK_EOF) { return Check(); } else { return ret; } }