Пример #1
0
void DiagList::Copy(const DiagList &DL)
	{
	Clear();
	unsigned uCount = DL.GetCount();
	for (unsigned i = 0; i < uCount; ++i)
		Add(DL.Get(i));
	}
Пример #2
0
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
  unsigned uLengthA, unsigned uLengthB)
	{
	if (g_uDiagMargin > g_uMinDiagLength/2)
		Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
		  g_uDiagMargin, g_uMinDiagLength);

	unsigned uStartPosA = 0;
	unsigned uStartPosB = 0;
	const unsigned uDiagCount = DL.GetCount();
	DPRegion r;
	for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
		{
		const Diag &d = DL.Get(uDiagIndex);
		assert(d.m_uLength >= g_uMinDiagLength);
		const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
		const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
		const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
		const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;

		r.m_Type = DPREGIONTYPE_Rect;
		r.m_Rect.m_uStartPosA = uStartPosA;
		r.m_Rect.m_uStartPosB = uStartPosB;

		assert(uStartVertexA + 1 >= uStartPosA);
		assert(uStartVertexB + 1 >= uStartPosB);
		r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
		r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
		RL.Add(r);

		if (uEndVertexA > uStartVertexA + 1)
			{
			const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;

			r.m_Type = DPREGIONTYPE_Diag;
			r.m_Diag.m_uStartPosA = uStartVertexA + 1;
			r.m_Diag.m_uStartPosB = uStartVertexB + 1;
			assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
			r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
			RL.Add(r);
			}

		uStartPosA = uEndVertexA;
		uStartPosB = uEndVertexB;
		}

	assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
	assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);

	r.m_Type = DPREGIONTYPE_Rect;
	r.m_Rect.m_uStartPosA = uStartPosA;
	r.m_Rect.m_uStartPosB = uStartPosB;

	assert(uLengthA >= uStartPosA);
	assert(uLengthB >= uStartPosB);
	r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
	r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
	RL.Add(r);
	}
Пример #3
0
/// CompareDiagLists - Compare two diagnostic lists and return the difference
/// between them.
/// 
static bool CompareDiagLists(SourceManager &SourceMgr,
                             const_diag_iterator d1_begin,
                             const_diag_iterator d1_end,
                             const_diag_iterator d2_begin,
                             const_diag_iterator d2_end,
                             const char *MsgLeftOnly,
                             const char *MsgRightOnly) {
  DiagList LeftOnly;
  DiagList Left(d1_begin, d1_end);
  DiagList Right(d2_begin, d2_end);

  for (const_diag_iterator I = Left.begin(), E = Left.end(); I != E; ++I) {
    unsigned LineNo1 = SourceMgr.getInstantiationLineNumber(I->first);
    const std::string &Diag1 = I->second;

    DiagList::iterator II, IE;
    for (II = Right.begin(), IE = Right.end(); II != IE; ++II) {
      unsigned LineNo2 = SourceMgr.getInstantiationLineNumber(II->first);
      if (LineNo1 != LineNo2) continue;

      const std::string &Diag2 = II->second;
      if (Diag2.find(Diag1) != std::string::npos ||
          Diag1.find(Diag2) != std::string::npos) {
        break;
      }
    }
    if (II == IE) {
      // Not found.
      LeftOnly.push_back(*I);
    } else {
      // Found. The same cannot be found twice.
      Right.erase(II);
    }
  }
  // Now all that's left in Right are those that were not matched.

  return PrintProblem(SourceMgr, LeftOnly.begin(), LeftOnly.end(), MsgLeftOnly)
       | PrintProblem(SourceMgr, Right.begin(), Right.end(), MsgRightOnly);
}
Пример #4
0
// Merge diagonals that are continuations of each other with
// short breaks of up to length g_uMaxDiagBreak.
// In a sorted list of diagonals, we only have to check
// consecutive entries.
void MergeDiags(DiagList &DL)
	{
    unsigned &g_uMaxDiagBreak = getMuscleContext()->params.g_uMaxDiagBreak;

	return;
#if	DEBUG
	if (!DL.IsSorted())
		Quit("MergeDiags: !IsSorted");
#endif

// TODO: Fix this!
// Breaks must be with no offset (no gaps)
	const unsigned uCount = DL.GetCount();
	if (uCount <= 1)
		return;

	DiagList NewList;

	Diag MergedDiag;
	const Diag *ptrPrev = &DL.Get(0);
	for (unsigned i = 1; i < uCount; ++i)
		{
		const Diag *ptrDiag = &DL.Get(i);
		unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
		if (uBreakLength <= g_uMaxDiagBreak)
			{
			MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
			MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
			MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
			  + uBreakLength;
			ptrPrev = &MergedDiag;
			}
		else
			{
			NewList.Add(*ptrPrev);
			ptrPrev = ptrDiag;
			}
		}
	NewList.Add(*ptrPrev);
	DL.Copy(NewList);
	}
Пример #5
0
/// CheckResults - This compares the expected results to those that
/// were actually reported. It emits any discrepencies. Return "true" if there
/// were problems. Return "false" otherwise.
/// 
static bool CheckResults(Preprocessor &PP,
                         const DiagList &ExpectedErrors,
                         const DiagList &ExpectedWarnings,
                         const DiagList &ExpectedNotes) {
  const DiagnosticClient *DiagClient = PP.getDiagnostics().getClient();
  assert(DiagClient != 0 &&
      "DiagChecker requires a valid TextDiagnosticBuffer");
  const TextDiagnosticBuffer &Diags =
    static_cast<const TextDiagnosticBuffer&>(*DiagClient);
  SourceManager &SourceMgr = PP.getSourceManager();

  // We want to capture the delta between what was expected and what was
  // seen.
  //
  //   Expected \ Seen - set expected but not seen
  //   Seen \ Expected - set seen but not expected
  bool HadProblem = false;

  // See if there are error mismatches.
  HadProblem |= CompareDiagLists(SourceMgr,
                                 ExpectedErrors.begin(), ExpectedErrors.end(),
                                 Diags.err_begin(), Diags.err_end(),
                                 "Errors expected but not seen:",
                                 "Errors seen but not expected:");

  // See if there are warning mismatches.
  HadProblem |= CompareDiagLists(SourceMgr,
                                 ExpectedWarnings.begin(),
                                 ExpectedWarnings.end(),
                                 Diags.warn_begin(), Diags.warn_end(),
                                 "Warnings expected but not seen:",
                                 "Warnings seen but not expected:");

  // See if there are note mismatches.
  HadProblem |= CompareDiagLists(SourceMgr,
                                 ExpectedNotes.begin(),
                                 ExpectedNotes.end(),
                                 Diags.note_begin(), Diags.note_end(),
                                 "Notes expected but not seen:",
                                 "Notes seen but not expected:");

  return HadProblem;
}
Пример #6
0
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
    MuscleContext *ctx = getMuscleContext();
    ALPHA &g_Alpha = ctx->alpha.g_Alpha;
    unsigned* TuplePos = ctx->finddiagsn.TuplePos;
    unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength;

	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
		Quit("FindDiagsNuc: requires nucleo alphabet");

	DL.Clear();

// 16 is arbitrary slop, no principled reason for this.
	if (uLengthX < K + 16 || uLengthY < K + 16)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

#if	TRACE
	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
#endif

// Build tuple map for the longer profile, B
	if (uLengthB < K)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + K - 1;
		unsigned uEndPosB = uPosB + K - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}
Пример #7
0
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
	if (ALPHA_Amino != g_Alpha)
		Quit("FindDiags: requires amino acid alphabet");

	DL.Clear();

	if (uLengthX < 12 || uLengthY < 12)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

// Build tuple map for the longer profile, B
	if (uLengthB < KTUP)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + KTUP - 1;
		unsigned uEndPosB = uPosB + KTUP - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", 'A' + PA[n].m_uResidueGroup);
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", 'A' + PB[n].m_uResidueGroup);
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}
Пример #8
0
/// FindDiagnostics - Go through the comment and see if it indicates expected
/// diagnostics. If so, then put them in a diagnostic list.
/// 
static void FindDiagnostics(const char *CommentStart, unsigned CommentLen,
                            DiagList &ExpectedDiags,
                            Preprocessor &PP, SourceLocation Pos,
                            const char *ExpectedStr) {
  const char *CommentEnd = CommentStart+CommentLen;
  unsigned ExpectedStrLen = strlen(ExpectedStr);
  
  // Find all expected-foo diagnostics in the string and add them to
  // ExpectedDiags.
  while (CommentStart != CommentEnd) {
    CommentStart = std::find(CommentStart, CommentEnd, 'e');
    if (unsigned(CommentEnd-CommentStart) < ExpectedStrLen) return;
    
    // If this isn't expected-foo, ignore it.
    if (memcmp(CommentStart, ExpectedStr, ExpectedStrLen)) {
      ++CommentStart;
      continue;
    }
    
    CommentStart += ExpectedStrLen;
    
    // Skip whitespace.
    while (CommentStart != CommentEnd &&
           isspace(CommentStart[0]))
      ++CommentStart;
    
    // Default, if we find the '{' now, is 1 time.
    int Times = 1;
    int Temp = 0;
    // In extended syntax, there could be a digit now.
    while (CommentStart != CommentEnd &&
           CommentStart[0] >= '0' && CommentStart[0] <= '9') {
      Temp *= 10;
      Temp += CommentStart[0] - '0';
      ++CommentStart;
    }
    if (Temp > 0)
      Times = Temp;
    
    // Skip whitespace again.
    while (CommentStart != CommentEnd &&
           isspace(CommentStart[0]))
      ++CommentStart;
    
    // We should have a {{ now.
    if (CommentEnd-CommentStart < 2 ||
        CommentStart[0] != '{' || CommentStart[1] != '{') {
      if (std::find(CommentStart, CommentEnd, '{') != CommentEnd)
        EmitError(PP, Pos, "bogus characters before '{{' in expected string");
      else
        EmitError(PP, Pos, "cannot find start ('{{') of expected string");
      return;
    }
    CommentStart += 2;

    // Find the }}.
    const char *ExpectedEnd = CommentStart;
    while (1) {
      ExpectedEnd = std::find(ExpectedEnd, CommentEnd, '}');
      if (CommentEnd-ExpectedEnd < 2) {
        EmitError(PP, Pos, "cannot find end ('}}') of expected string");
        return;
      }
      
      if (ExpectedEnd[1] == '}')
        break;

      ++ExpectedEnd;  // Skip over singular }'s
    }

    std::string Msg(CommentStart, ExpectedEnd);
    std::string::size_type FindPos;
    while ((FindPos = Msg.find("\\n")) != std::string::npos)
      Msg.replace(FindPos, 2, "\n");
    // Add is possibly multiple times.
    for (int i = 0; i < Times; ++i)
      ExpectedDiags.push_back(std::make_pair(Pos, Msg));

    CommentStart = ExpectedEnd;
  }
}
Пример #9
0
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	LIST_DIAGS
	TICKS t1 = GetClockTicks();
#endif

	DiagList DL;

	if (ALPHA_Amino == g_Alpha)
		FindDiags(PA, uLengthA, PB, uLengthB, DL);
	else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
		FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
	else
		Quit("GlobalAlignDiags: bad alpha");

#if	TRACE
	Log("GlobalAlignDiags, diag list:\n");
	DL.LogMe();
#endif

	DL.Sort();
	DL.DeleteIncompatible();

#if	TRACE
	Log("After DeleteIncompatible:\n");
	DL.LogMe();
#endif

	MergeDiags(DL);

#if	TRACE
	Log("After MergeDiags:\n");
	DL.LogMe();
#endif

	DPRegionList RL;
	DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);

#if	TRACE
	Log("RegionList:\n");
	RL.LogMe();
#endif

#if	LIST_DIAGS
	{
	TICKS t2 = GetClockTicks();
	unsigned uArea = RL.GetDPArea();
	Log("ticks=%ld\n", (long) (t2 - t1));
	Log("area=%u\n", uArea);
	}
#endif

	g_dDPAreaWithoutDiags += uLengthA*uLengthB;

	double dDPAreaWithDiags = 0.0;
	const unsigned uRegionCount = RL.GetCount();
	for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
		{
		const DPRegion &r = RL.Get(uRegionIndex);

		PWPath RegPath;
		if (DPREGIONTYPE_Diag == r.m_Type)
			{
			DiagToPath(r.m_Diag, RegPath);
#if	TRACE_PATH
			Log("DiagToPath, path=\n");
			RegPath.LogMe();
#endif
			}
		else if (DPREGIONTYPE_Rect == r.m_Type)
			{
			const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
			const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
			const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
			const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
			const ProfPos *RegPA = PA + uRegStartPosA;
			const ProfPos *RegPB = PB + uRegStartPosB;

			dDPAreaWithDiags += uRegLengthA*uRegLengthB;
			GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
#if	TRACE_PATH
			Log("GlobalAlignNoDiags RegPath=\n");
			RegPath.LogMe();
#endif
			OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
#if	TRACE_PATH
			Log("After offset path, RegPath=\n");
			RegPath.LogMe();
#endif
			}
		else
			Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);

		AppendRegPath(Path, RegPath);
#if	TRACE_PATH
		Log("After AppendPath, path=");
		Path.LogMe();
#endif
		}

#if	TRACE
	{
	double dDPAreaWithoutDiags = uLengthA*uLengthB;
	Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
	  dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
	}
#endif
	g_dDPAreaWithDiags += dDPAreaWithDiags;
	return 0;
	}