void AlignmentSetToCmpH5Adapter<T_CmpFile>::StoreAlignmentCandidate(
    AlignmentCandidate<> &alignment, 
    int alnSegment,
    T_CmpFile &cmpFile,
    int moleculeNumber,
    bool copyQVs) {
  //
  // Find out where the movie is going to get stored.
  //
  std::string movieName;
  int holeNumber = 0;
  bool nameParsedProperly;
  
  nameParsedProperly = ParsePBIReadName(alignment.qName, movieName, holeNumber);
  if (!nameParsedProperly) {
    std::cout <<"ERROR. Attempting to store a read with name " 
          << alignment.qName << " that does not " << std::endl
          << "appear to be a PacBio read." << std::endl;
    exit(1);
  }

  unsigned int movieId = StoreMovieInfo(movieName, cmpFile);

  // Check whether the reference is in /RefInfo.
  std::map<std::string, int>::iterator mapIt;
  mapIt = refNameToRefInfoIndex.find(alignment.tName);
  if (mapIt == refNameToRefInfoIndex.end()) {
    std::cout << "ERROR. The reference name " << alignment.tName 
          << " was not found in the list of references." << std::endl;
    std::cout << "Perhaps a different reference file was aligned to than " << std::endl
          << "what was provided for SAM conversion. " << std::endl;
    exit(1);
  } 

  // Store refGroup
  unsigned int refGroupId = StoreRefGroup(alignment.tName, cmpFile);
  std::string refGroupName = refNameToRefGroupNameandId[alignment.tName].name; 
  assert(refGroupId  == refNameToRefGroupNameandId[alignment.tName].id);

  if (cmpFile.refGroupIdToArrayIndex.find(refGroupId) == cmpFile.refGroupIdToArrayIndex.end()) {
    std::cout << "ERROR. The reference ID is not indexed. " 
          << "This is an internal inconsistency." << std::endl;
    exit(1);
  }

  size_t refGroupIndex= cmpFile.refGroupIdToArrayIndex[refGroupId];
  assert(refGroupIndex + 1 == refGroupId);

  std::string path = "/" + refGroupName + "/" + movieName;
  unsigned int pathId = StorePath(path, cmpFile);

  vector<unsigned int> alnIndex;
  alnIndex.resize(22);

  RemoveGapsAtEndOfAlignment(alignment);

  /*
    * Store the alignment string
    */
  vector<unsigned char> byteAlignment;
  AlignmentToByteAlignment(alignment, 
                            alignment.qAlignedSeq, alignment.tAlignedSeq,
                            byteAlignment);

  unsigned int offsetBegin, offsetEnd;
  cmpFile.StoreAlnArray(byteAlignment, alignment.tName, movieName, offsetBegin, offsetEnd);
  // Copy QVs into cmp.h5
  if (copyQVs) {
    std::vector<std::string> optionalQVs;
    alignment.CopyQVs(&optionalQVs);
    for (size_t qv_i=0; qv_i<optionalQVs.size(); qv_i++) {
      std::string *qvName = &alignment.optionalQVNames[qv_i];
      std::string *qvString = &optionalQVs[qv_i];
      
      // If the qvString is empty, then the alignment is missing the quality
      // value
      if (qvString->size() == 0) {
        continue;
      }

      unsigned int qvOffsetBegin, qvOffsetEnd;
      if (qvName->compare(qvName->size() - 3, 3, "Tag") == 0) {
        std::vector<char> qvVector;
        QVsToCmpH5QVs(*qvString, byteAlignment, true, &qvVector);
        cmpFile.StoreTags(qvVector, alignment.tName, *qvName,
                          movieName, &qvOffsetBegin, &qvOffsetEnd);
      } else {
        std::vector<UChar> qvVector;
        QVsToCmpH5QVs(*qvString, byteAlignment, false, &qvVector);
        cmpFile.StoreQVs(qvVector, alignment.tName, *qvName,
                         movieName, &qvOffsetBegin, &qvOffsetEnd);
      }
      assert(qvOffsetBegin == offsetBegin);
      assert(qvOffsetEnd == offsetEnd);
    }
  }

  numAlignments++;

  DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn;
  //distScoreFn does not matter since the score is not stored.
  ComputeAlignmentStats(alignment, alignment.qAlignedSeq.seq, alignment.tAlignedSeq.seq, distScoreFn);

  /*
    The current AlnIndex column names:
    (0): "AlnID", "AlnGroupID", "MovieID", "RefGroupID", "tStart",
    (5): "tEnd", "RCRefStrand", "HoleNumber", "SetNumber",
    (9): "StrobeNumber", "MoleculeID", "rStart", "rEnd", "MapQV", "nM",
    (15): "nMM", "nIns", "nDel", "Offset_begin", "Offset_end",
    (20): "nBackRead", "nReadOverlap"
  */
  if (moleculeNumber == -1) {
    moleculeNumber =  numZMWsPerMovieSpringField * (movieId - 1) + holeNumber;
  }
  alnIndex[0]  = numAlignments;  // AlnId
  alnIndex[1]  = pathId;        // AlnGroupID
  alnIndex[2]  = movieId;    // MovieID
  alnIndex[3]  = refGroupId; // RefGroupID
  alnIndex[4]  = alignment.tAlignedSeqPos; // tStart
  alnIndex[5]  = alignment.tAlignedSeqPos +  alignment.tAlignedSeqLength; // tEnd
  alnIndex[6]  = alignment.tStrand; // RCRefStrand
  alnIndex[7]  = holeNumber;
  alnIndex[8]  = 0; // SET NUMBER -- parse later!!!!
  alnIndex[9]  = alnSegment; // strobenumber
  alnIndex[10] = moleculeNumber;
  alnIndex[11] = alignment.qAlignedSeqPos; 
  alnIndex[12] = alignment.qAlignedSeqPos + alignment.qAlignedSeqLength;
  alnIndex[13] = alignment.mapQV;
  alnIndex[14] = alignment.nMatch;
  alnIndex[15] = alignment.nMismatch;
  alnIndex[16] = alignment.nIns;
  alnIndex[17] = alignment.nDel;
  alnIndex[18] = offsetBegin;
  alnIndex[19] = offsetEnd;
  alnIndex[20] = 0;
  alnIndex[21] = 0;
  cmpFile.alnInfoGroup.WriteAlnIndex(alnIndex);
}
Exemple #2
0
//
//	Initialize instance from a VMware 4.x descriptor file
//
VDKSTAT VDiskVmdk::Initialize(PCHAR pPath)
{
	VDiskFile file;
	CHAR	buf[MAX_PATH + 40];
	CHAR	path[MAX_PATH];
	PVOID	cbparams[2];
	PCHAR	current;
	ULONG	signature;
	ULONG_PTR len;
	VDKSTAT	ret;

	//
	//	parameter check
	//
	if (!pPath || !*pPath) {
		return VDK_PARAM;
	}

	//
	//	store path
	//
	if ((ret = StorePath(pPath)) != VDK_OK) {
		return ret;
	}

	//
	//	open file
	//
	if ((ret = file.Open(pPath)) != VDK_OK) {
		return ret;
	}

	//
	//	initialize members
	//
	cbparams[0] = pPath;

	//
	//	read first 4 bytes
	//
	signature = 0;

	ret = file.ReadByte((PUCHAR)&signature, sizeof(signature), &len);

	if (ret != VDK_OK) {
		return ret;
	}

	if (len != sizeof(signature)) {
		return VDK_EOF;
	}

	if (signature == VMDK_SIGNATURE) {
		//
		//	it's a monolithic sparse file
		//	-- descriptor offset is stored in header
		//
		VMDK_HEADER vmdk;

		ret = file.ReadByte((PUCHAR)&vmdk + sizeof(signature), 
			sizeof(vmdk) - sizeof(signature), &len);

		if (ret != VDK_OK) {
			return ret;
		}

		if (len != sizeof(vmdk) - sizeof(signature)) {
			return VDK_DATA;
		}

		if (vmdk.DescOffsetLow == 0		||
			vmdk.DescOffsetHigh != 0	||
			vmdk.DescSizeLow == 0		||
			vmdk.DescSizeHigh != 0) {

			VDiskCallBack(VDISK_CB_VMDK_NODESC, cbparams);

			return VDK_DATA;
		}

		ret = VdkSeekFile(file.Handle(),
			vmdk.DescOffsetLow << VDK_BYTE_SHIFT_TO_SECTOR);

		if (ret != VDK_OK) {
			return ret;
		}
	}
	else {
		ret = VdkSeekFile(file.Handle(), 0);

		if (ret != VDK_OK) {
			return ret;
		}
	}

	while ((ret = file.ReadText(buf, sizeof(buf), NULL)) == VDK_OK) {

		//
		//	replace tabs with blanks
		//
		current = buf;

		while (*current) {
			if (*current == '\t' || *current == '\n') {
				*current = ' ';
			}
			current++;
		}

		//
		//	remove trailing blanks
		//
		while (current > buf && *(--current) == ' ') {
			*current = '\0';
		}

		//
		//	skip leading blanks
		//
		current = buf;

		while (*current == ' ') {
			current++;
		}

		//
		//	blank line?
		//
		if (!*current) {
			continue;
		}

		cbparams[1] = current;

		//
		//	parse current line
		//
		if (!VdkCmpNoCaseN(current, "RW ", 3) ||
			!VdkCmpNoCaseN(current, "RDONLY", 6)) {
			VDiskExt	*ext;
			PCHAR		top, tail, p;
			ULONG		capacity;
			CHAR		delim;
			BOOL		sparse;
			ULONG		offset;
			HANDLE		hFile;

			//
			//	search capacity field
			//
			p = current + 3;

			while (*p == ' ') {
				p++;
			}

			capacity = atol(p);

			if (!capacity) {

				capacity = VDiskCallBack(
					VDISK_CB_DESC_CAPACITY, cbparams);

				if (!capacity) {
					return VDK_DATA;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}

			//
			//	search extent type field
			//
			while (isdigit(*p)) {
				p++;
			}

			while (*p == ' ') {
				p++;
			}

			if (!VdkCmpNoCaseN(p, "flat ", 5)) {
				sparse = FALSE;
			}
			else if (!VdkCmpNoCaseN(p, "sparse ", 7)) {
				sparse = TRUE;
			}
			else {

				ULONG type = VDiskCallBack(
					VDISK_CB_DESC_FILETYPE, cbparams);

				if (type == VDK_FILETYPE_FLAT) {
					sparse = FALSE;
				}
				else if (type == VDK_FILETYPE_VMDK) {
					sparse = TRUE;
				}
				else {
					return VDK_DATA;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}

			//
			//	search extent path field
			//
			top = p;

			while (*top && *top != ' ') {
				top++;
			}

			while (*top == ' ') {
				top++;
			}

			if (*top == '\"') {
				delim = '\"';
				top++;
			}
			else {
				delim = ' ';
			}

			tail = top;

			while (*tail && *tail != delim) {
				tail++;
			}

			if (!*tail || tail - top >= MAX_PATH) {

				if (!VDiskCallBack(VDISK_CB_DESC_BADENTRY, cbparams)) {
					return VDK_CANCEL;
				}

				SetFlag(VDISK_FLAG_DIRTY);
				continue;
			}

			if ((isalpha(*top) && *(top + 1) == ':') ||
				*top == PATH_SEPARATOR_CHAR ||
				*top == ALT_SEPARATOR_CHAR) {

				SetFlag(VDISK_FLAG_ABSPATH);
			}
			else {
				ClrFlag(VDISK_FLAG_ABSPATH);
			}

			VdkCopyMem(path, top, tail - top);
			path[tail - top] = '\0';

			if (sparse) {
				offset = 0;
			}
			else {
				//
				//	search "backing offset" field
				//
				p = tail + 1;

				while (*p == ' ') {
					p++;
				}

				if (isdigit(*p)) {
					offset = atol(p);
				}
				else {
					offset = 0;
				}
			}

			//	open the extent file
			*tail = '\0';

			ret = VDiskSearchFile(&hFile, path, m_pPath);

			if (ret != VDK_OK) {
				return ret;
			}

			//
			//	create an extent object
			//
			if (sparse) {
				ext = new VDiskExtVmdk;
			}
			else {
				ext = new VDiskExtRaw;
			}

			if (ext == NULL) {
				ret = VdkLastError();
				VdkCloseFile(hFile);
				return ret;
			}

			ret = AddExtent(ext);

			if (ret != VDK_OK) {
				VdkCloseFile(hFile);
				delete ext;
				return ret;
			}

			ret = ext->SetPath(path);

			if (ret != VDK_OK) {
				VdkCloseFile(hFile);
				return ret;
			}

			ret = ext->Load(hFile);

			VdkCloseFile(hFile);

			if (ret != VDK_OK) {
				return ret;
			}

			ext->SetCapacity(capacity);

			if (!sparse) {
				((VDiskExtRaw *)ext)->SetBackOffset(offset);
			}
		}
		else if (!VdkCmpNoCaseN(current, "ddb.geometry.sectors", 20)) {
			PCHAR p = current + 20;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nSectors = atol(p);

			if (!m_nSectors) {

				m_nSectors = VDiskCallBack(
					VDISK_CB_DESC_GEOMETRY, cbparams);

				if (!m_nSectors) {
					return VDK_DATA;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}
		}
		else if (!VdkCmpNoCaseN(current, "ddb.geometry.heads", 18)) {
			PCHAR p = current + 18;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nTracks = atol(p);

			if (!m_nTracks) {

				m_nTracks = VDiskCallBack(
					VDISK_CB_DESC_GEOMETRY, cbparams);

				if (!m_nTracks) {
					return VDK_DATA;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}
		}
		else if (!VdkCmpNoCaseN(current, "ddb.geometry.cylinders", 22)) {
			PCHAR p = current + 22;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nCylinders = atol(p);

			if (!m_nCylinders) {

				m_nCylinders = VDiskCallBack(
					VDISK_CB_DESC_GEOMETRY, cbparams);

				if (!m_nCylinders) {
					return VDK_DATA;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}
		}
		else if (!VdkCmpNoCaseN(current, "CID", 3)) {
			PCHAR p = current + 3;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			if (!sscanf(p, "%lx", &m_nTimeStamp)) {

				if (!VDiskCallBack(VDISK_CB_DESC_TIMESTAMP, cbparams)) {
					return VDK_CANCEL;
				}

				m_nTimeStamp = (ULONG)-1;
				SetFlag(VDISK_FLAG_DIRTY);
			}
		}
		else if (!VdkCmpNoCaseN(current, "parentCID", 9)) {
			PCHAR p = current + 9;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;

			}

			if (!sscanf(p, "%lx", &m_nParentTS)) {

				if (!VDiskCallBack(VDISK_CB_DESC_TIMESTAMP, cbparams)) {
					return VDK_CANCEL;
				}

				m_nParentTS = (ULONG)-1;
				SetFlag(VDISK_FLAG_DIRTY);
			}

			if (m_nParentTS != (ULONG)-1) {
				SetFlag(VDISK_FLAG_CHILD);
			}
		}
		else if (!VdkCmpNoCaseN(current, "ddb.virtualHWVersion", 20)) {
			PCHAR p = current + 20;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nHardwareVer = atol(p);
		}
		else if (!VdkCmpNoCaseN(current, "ddb.adapterType", 15)) {
			PCHAR p = current + 15;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			if (!VdkCmpNoCaseN(p, "ide", 3)) {
				m_nController = VDISK_CONTROLLER_IDE;
			}
			else if (!VdkCmpNoCaseN(p, "buslogic", 8) ||
				!VdkCmpNoCaseN(p, "lsilogic", 8)) {
				m_nController = VDISK_CONTROLLER_SCSI;
			}
			else {

				m_nController = VDiskCallBack(
					VDISK_CB_CONTROLLER, cbparams);

				if (m_nController != VDISK_CONTROLLER_SCSI &&
					m_nController != VDISK_CONTROLLER_IDE) {
					return VDK_CANCEL;
				}

				SetFlag(VDISK_FLAG_DIRTY);
			}
		}
		else if (!VdkCmpNoCaseN(current, "createType", 10)) {
			PCHAR p = current + 10;
			ULONG type;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			if (!VdkCmpNoCaseN(p, "twoGbMaxExtentSparse", 20)) {
				type = VDISK_VMDK_SPLIT_SPARSE;
			}
			else if (!VdkCmpNoCaseN(p, "monolithicSparse", 16)) {
				type = VDISK_VMDK_MONO_SPARSE;
			}
			else if (!VdkCmpNoCaseN(p, "twoGbMaxExtentFlat", 18)) {
				type = VDISK_VMDK_SPLIT_FLAT;
			}
			else if (!VdkCmpNoCaseN(p, "monolithicFlat", 14)) {
				type = VDISK_VMDK_MONO_FLAT;
			}
			else {

				type = VDiskCallBack(
					VDISK_CB_DESC_DISKTYPE, cbparams);

				SetFlag(VDISK_FLAG_DIRTY);
			}

			switch (type) {
			case VDISK_VMDK_SPLIT_FLAT:
				ClrFlag(VDISK_FLAG_SINGLE);
				ClrFlag(VDISK_FLAG_CHILD);
				ClrFlag(VDISK_FLAG_SPARSE);
				break;

			case VDISK_VMDK_MONO_FLAT:
				SetFlag(VDISK_FLAG_SINGLE);
				ClrFlag(VDISK_FLAG_CHILD);
				ClrFlag(VDISK_FLAG_SPARSE);
				break;

			case VDISK_VMDK_SPLIT_SPARSE:
				ClrFlag(VDISK_FLAG_SINGLE);
				SetFlag(VDISK_FLAG_SPARSE);
				break;

			case VDISK_VMDK_MONO_SPARSE:
				SetFlag(VDISK_FLAG_SINGLE);
				SetFlag(VDISK_FLAG_SPARSE);
				break;

			default:
				return VDK_DATA;
			}
		}
		else if (!VdkCmpNoCaseN(current, "version", 7)) {
/*
			PCHAR p = current + 7;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nVmdkVersion = atol(p);
*/
		}
		else if (!VdkCmpNoCaseN(current, "parentFileNameHint", 18)) {
			PCHAR top = current + 18;
			PCHAR tail;

			while (*top == ' ' || *top == '=' || *top == '\"') {
				top++;
			}

			tail = top;

			while (*tail && *tail != '\"') {
				tail++;
			}

			*tail = '\0';

			ret = StoreParentPath(top);

			if (ret != VDK_OK) {
				return ret;
			}

			SetFlag(VDISK_FLAG_CHILD);
		}
		else if (!VdkCmpNoCaseN(current, "ddb.toolsVersion", 16)) {
			PCHAR p = current + 16;

			while (*p == ' ' || *p == '=' || *p == '\"') {
				p++;
			}

			m_nToolsFlag = atol(p);
		}
		else if (*current && *current != '#') {

			if (!VDiskCallBack(VDISK_CB_DESC_BADENTRY, cbparams)) {
				return VDK_CANCEL;
			}

			SetFlag(VDISK_FLAG_DIRTY);
		}
	}

	if (ret == VDK_EOF) {
		return Check();
	}
	else {
		return ret;
	}
}