void print_maf_match
   (FILE*			f,
	seq*			seq1,
	unspos			pos1,
	seq*			seq2,
	unspos			pos2,
	unspos			length,
	score			s,
	int				withComments)
	{
	seqpartition*	sp1 = &seq1->partition;
	seqpartition*	sp2 = &seq2->partition;
	partition*		part;
	u8*				s1 = seq1->v + pos1;
	u8*				s2 = seq2->v + pos2;
	char*			name1, *name2, *pref2, *suff1, *suff2;
	unspos			offset1, offset2, start1, start2;
	unspos			startLoc1, startLoc2;
	unspos			seq1Len, seq2Len, seq1True, seq2True;
	char			strand1, strand2;
	int				len1, len2, nameW, startW, lenW;
	unspos			ix;
	segment			seg;
	unspos			numer, denom;

	if (seq1->revCompFlags != rcf_forward)
		suicide ("attempt to print - strand or complement for sequence 1 in print_maf_match");

#ifdef debugSeq1Beg
	if ((pos1 < debugSeq1Beg) || (pos1+length > debugSeq1End)) return;
#endif // debugSeq1Beg

	// report diagonal

	if (maf_dbgReportDiag)
		fprintf (f, "# diagonal=" sgnposFmt "\n", diagNumber(pos1,pos2));

	if (withComments)
		{
		// report identity
		segment_identity (seq1, pos1, seq2, pos2, length, &numer, &denom);
		fprintf (f, "# identity=" unsposSlashFmt, numer, denom);
		if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom);
		fprintf (f, "\n");

		// report coverage
		seg.pos1   = pos1;
		seg.pos2   = pos2;
		seg.length = length;
		segment_coverage (seq1, seq2, &seg, &numer, &denom);
		fprintf (f, "# coverage=" unsposSlashFmt, numer, denom);
		if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom);
		fprintf (f, "\n");

		// report alignment path
		fprintf (f, "# cigar=" unsposFmt "m\n", length);
		}

	// figure out position offsets and names

	if (sp1->p == NULL)		// sequence 1 is not partitioned
		{
		name1 = (seq1->useFullNames)? seq1->header : seq1->shortHeader;
		if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1";
		offset1   = 0;
		startLoc1 = seq1->startLoc;
		seq1Len   = seq1->len;
		seq1True  = seq1->trueLen;
		}
	else					// sequence 1 is partitioned
	 	{
		part = lookup_partition (seq1, pos1);
		name1     = &sp1->pool[part->header];
		offset1   = part->sepBefore + 1;
		startLoc1 = part->startLoc;
		seq1Len   = part->sepAfter - offset1;
		seq1True  = part->trueLen;
		}

	if (sp2->p == NULL)		// sequence 2 is not partitioned
		{
		name2 = (seq2->useFullNames)? seq2->header : seq2->shortHeader;
		if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2";
		offset2   = 0;
		startLoc2 = seq2->startLoc;
		seq2Len   = seq2->len;
		seq2True  = seq2->trueLen;
		}
	else					// sequence 2 is partitioned
	 	{
		part = lookup_partition (seq2, pos2);
		name2     = &sp2->pool[part->header];
		offset2   = part->sepBefore + 1;
		startLoc2 = part->startLoc;
		seq2Len   = part->sepAfter - offset2;
		seq2True  = part->trueLen;
		}

	// print summary line

	fprintf (f, "a score=" scoreFmt "\n", s);

	// figure out fields and widths

	pref2 = ((maf_distinguishNames) && (strcmp (name1, name2) == 0))? "~" : "";
	suff1 = rcfSuffix[seq1->revCompFlags];
	suff2 = rcfSuffix[seq2->revCompFlags];

	if ((seq1->revCompFlags & rcf_rev) == 0)
		{
		start1  = pos1 - offset1 + startLoc1;
		strand1 = '+';
		}
	else
		{
		start1  = pos1 - offset1 + seq1True+2 - (startLoc1 + seq1Len);
		strand1 = '-';
		}
	if ((seq2->revCompFlags & rcf_rev) == 0)
		{
		start2  = pos2 - offset2 + startLoc2;
		strand2 = '+';
		}
	else
		{
		start2  = pos2 - offset2 + seq2True+2 - (startLoc2 + seq2Len);
		strand2 = '-';
		}

	len1  =                  strlen (name1) + strlen (suff1);
	len2  = strlen (pref2) + strlen (name2) + strlen (suff2);
	nameW = (len1 >= len2)? len1 : len2;

	startW = max_digits (start1, start2);
	lenW   = max_digits (seq1True, seq2True);

	// print aligning segment of sequence 1 (non-printables are printed as '*'
	// but such should never be seen unless there is a problem elsewhere)

	fprintf (f, "s %s%s%*s" unsposStarFmt " " unsposFmt " %c " unsposStarFmt " ",
	            name1, suff1, nameW+1-len1, " ",
	            startW, start1-1, length, strand1, lenW, seq1True);

	for (ix=0 ; ix<length ; ix++)
		fprintf (f, "%c", dna_toprint(s1[ix]));
	fprintf (f, "\n");

	// print aligning segment of sequence 2

	fprintf (f, "s %s%s%s%*s" unsposStarFmt " " unsposFmt " %c " unsposStarFmt " ",
	            pref2, name2, suff2, nameW+1-len2, " ",
	            startW, start2-1, length, strand2, lenW, seq2True);

	for (ix=0 ; ix<length ; ix++)
		fprintf (f, "%c", dna_toprint(s2[ix]));
	fprintf (f, "\n\n");
	}
Пример #2
0
void print_match (unspos pos1, unspos pos2, unspos length, score s)
	// pos1 and pos2 are the positions of first character in the match,
	// .. (origin-0).
	{
	static u32 printsUntilFlush = matchFlushFrequency;
	int outputFormat = currParams->outputFormat;

	if ((currParams->searchLimit > 0)
	 && (printedForQuery >= currParams->searchLimit))
		return;
	printedForQuery++;

	if (!strandHeaderPrinted)
		{ print_header ();  strandHeaderPrinted = true; }

	if (infer_scores_dbgShowIdentity)
		{
		unspos numer, denom;
		u32    bin;

		segment_identity (currParams->seq1, pos1, currParams->seq2, pos2, length,
		                  &numer, &denom);
		bin = identity_bin (numer, denom);
		// nota bene: positions written as 1-based
		print_generic (currParams->outputFile,
		               unsposSlashFmt
		               " pct_identity=" unsposSlashFmt
		               " (bin as " identityBinFormat ")",
		               pos1+1, pos2+1,
		               numer, denom,
		               bin_to_identity (bin));
		}

	switch (outputFormat)
		{
		case fmtGfa:
		case fmtGfaNoScore:
			print_gfa_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
		                     (outputFormat == fmtGfa)? s : 0);
			break;
		case fmtLav:
		case fmtLavComment:
		case fmtLavText:
		case fmtLavInfScores:
			print_lav_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s);
			if (outputFormat == fmtLavText)
				goto text_format;
			if (outputFormat == fmtLavInfScores)
				goto inf_scores_format;
			break;
		case fmtLavScore:
			print_lavscore_match (currParams->outputFile,
			                      currParams->seq1, pos1,
			                      currParams->seq2, pos2, length,
			                      s);
			break;
		case fmtAxt:
		case fmtAxtComment:
			print_axt_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s,
			                 /* comments */ outputFormat==fmtAxtComment,
			                 /* extras   */ NULL);
			break;
		case fmtAxtGeneral:
			print_axt_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s,
			                 /* comments */ false,
			                 /* extras   */ currParams->outputInfo);
			break;
		case fmtMaf:
		case fmtMafNoComment:
			print_maf_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s, /* comments */ false);
			break;
		case fmtMafComment:
			print_maf_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s, /* comments */ true);
			break;
		case fmtSoftSam:
		case fmtSoftSamNoHeader:
			print_sam_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s,
			                 /* softMasking */ true,
			                 currParams->samRGTags);
			break;
		case fmtHardSam:
		case fmtHardSamNoHeader:
			print_sam_match (currParams->outputFile,
			                 currParams->seq1, pos1,
			                 currParams->seq2, pos2, length,
			                 s,
			                 /* softMasking */ false,
			                 currParams->samRGTags);
			break;
		case fmtCigar:
			print_cigar_match (currParams->outputFile,
			                   currParams->seq1, pos1,
			                   currParams->seq2, pos2, length,
			                   s,
			                   /* withInfo       */ true,
			                   /* markMismatches */ false,
			                   /* letterAfter    */ false,
			                   /* hideSingles    */ false,
			                   /* lowerCase      */ false,
			                   /* withNewLine    */ true);
			break;
		case fmtGenpaf:
		case fmtGenpafNoHeader:
		case fmtGenpafNameHeader:
		case fmtGenpafBlast:
		case fmtGenpafBlastNoHeader:
			print_genpaf_match (currParams->outputFile,
			                    currParams->seq1, pos1,
			                    currParams->seq2, pos2, length,
			                    s, currParams->outputInfo);
			break;
		case fmtText:
		case fmtZeroText:
		text_format:
			print_text_align_match (currParams->outputFile,
			                        currParams->seq1, pos1,
			                        currParams->seq2, pos2, length,
			                        s,
			                        (outputFormat!=fmtZeroText),
			                        currParams->textContext);
			break;
		case fmtDiffs:
		case fmtDiffsNoBlocks:
			print_align_diffs_match (currParams->outputFile,
			                         currParams->seq1, pos1,
			                         currParams->seq2, pos2, length,
			                         (outputFormat == fmtDiffs),
			                         currParams->nIsAmbiguous);
			break;
		case fmtHspComp:
			print_match_composition (currParams->outputFile,
			                         currParams->seq1, pos1,
			                         currParams->seq2, pos2, length,
			                         s, currParams->hitSeed, currParams->step);
			break;
		case fmtInfStats:
			infer_stats_from_match (currParams->seq1, pos1,
			                        currParams->seq2, pos2, length);
			break;
		case fmtInfScores:
		inf_scores_format:
			gather_stats_from_match (currParams->seq1, pos1,
			                         currParams->seq2, pos2, length);
			break;
		case fmtIdDist:
			identity_dist_from_match (currParams->seq1, pos1,
			                          currParams->seq2, pos2, length);
			break;
		case fmtDeseed:
			dump_match (currParams->outputFile,
			            currParams->seq1, pos1,
			            currParams->seq2, pos2, length);
			printf ("\n");
			break;
		case fmtNone:
			; // (do nothing)
			break;
		default:
			suicidef ("internal error, in print_match, outputFormat=%d", outputFormat);
		}

	if (currParams->dotplotFile != NULL)
		print_genpaf_match (currParams->dotplotFile,
		                    currParams->seq1, pos1,
		                    currParams->seq2, pos2, length,
		                    s, currParams->dotplotKeys);

	if (--printsUntilFlush == 0)
		{
		fflush (currParams->outputFile);
		printsUntilFlush = matchFlushFrequency;
		}
	}