alignel* filter_aligns_by_num_gaps
   (alignel*	alignList,
	s32			maxSeparateGapsCount)
	{
	alignel*	a, *next;
	alignel*	head, *prev;
	unspos		height, width, i, j, run;
	u32			opIx;
	s32			numGaps;

	// process each alignment, collecting a list of those that are long enough

	head = prev = NULL;
	for (a=alignList ; a!=NULL ; a=next)
		{
		next = a->next;

		// if the alignment is too gappy, skip it

		numGaps = 0;

		height = a->end1 - a->beg1 + 1;
		width  = a->end2 - a->beg2 + 1;
		opIx = 0;
		for (i=j=0 ; (i<height)||(j<width) ; )
			{
			// handle the next run

			run = edit_script_run_of_subs (a->script, &opIx);
			i += run; j += run;

			// handle the next indel

			if ((i < height) || (j < width))
				{
				edit_script_indel_len (a->script, &opIx, &i, &j);
				if (++numGaps > maxSeparateGapsCount)
					break;
				}
			}

		if (numGaps > maxSeparateGapsCount)
			{ // (unwanted alignment, discard it)
			free_if_valid ("filter_aligns_by_continuity a->script", a->script);
			free_if_valid ("filter_aligns_by_continuity a",         a);
			continue;
			}

		// this alignment is ok, add it to the end of the new list we're
		// building

		if (head == NULL) head = prev = a;
		             else { prev->next = a;  prev = a; }

		a->next = NULL;
		}

	return head;
	}
Exemple #2
0
void print_lav_align
   (FILE*		f,
	const u8*	seq1,
	unspos		beg1,
	unspos		end1,
	const u8*	seq2,
	unspos		beg2,
	unspos		end2,
	editscript*	script,
	score		s)
	{
	unspos		height, width, i, j, prevI, prevJ;
	unspos		run, match;
	u32			opIx;

	beg1++; // (internally, we want origin 1, inclusive)
	beg2++;

	height = end1 - beg1 + 1;
	width  = end2 - beg2 + 1;

	fprintf (f, "a {\n  s " scoreFmtSimple "\n"
	            "  b " unsposFmt " " unsposFmt "\n"
	            "  e " unsposFmt " " unsposFmt "\n",
	            s, beg1, beg2, end1, end2);

	opIx = 0;
	for (i=j=0 ; (i< height)||(j<width) ; )
		{
		prevI = i;  prevJ = j;
		run = edit_script_run_of_subs_match (script, &opIx,
		                                     seq1+beg1+i-1, seq2+beg2+j-1,
		                                     &match);
		i += run; j += run;

		fprintf (f, "  l " unsposFmt " " unsposFmt
		            " "    unsposFmt " " unsposFmt " %d\n",
		            beg1+prevI, beg2+prevJ, beg1+i-1, beg2+j-1,
		            align_match_percent (run, match));

		if ((i < height) || (j < width))
			edit_script_indel_len (script, &opIx, &i, &j);
		}

	fprintf (f, "}\n");
	}
Exemple #3
0
void print_align_list_segments (alignel* alignList)
	{
	alignel*	a;
	unspos		beg1, end1, beg2, end2;
	unspos		height, width, i, j, prevI, prevJ, run;
	u32			opIx;
	score		s;

	for (a=alignList ; a!=NULL ; a=a->next)
		{
		beg1   = a->beg1;
		end1   = a->end1;
		beg2   = a->beg2;
		end2   = a->end2;
		height = end1 - beg1 + 1;
		width  = end2 - beg2 + 1;

		snoopGenpaf_1;

		// print the alignment's segments

		opIx = 0;
		for (i=j=0 ; (i< height)||(j<width) ; )
			{
			prevI = i;  prevJ = j;
			run = edit_script_run_of_subs (a->script, &opIx);
			i += run; j += run;
			if ((i < height) || (j < width))
				edit_script_indel_len (a->script, &opIx, &i, &j);

			s = score_match (currParams->scoring,
			                 currParams->seq1, beg1-1+prevI,
			                 currParams->seq2, beg2-1+prevJ,
			                 run);
			print_match (beg1-1+prevI, beg2-1+prevJ, run, s);
			}
		}

	}
void alignment_gap_rate
   (alignel*	a,
	unspos*		_numer,
	unspos*		_denom)
	{
	unspos		beg1 = a->beg1;
	unspos		beg2 = a->beg2;
	unspos		height, width, i, j, prevI, prevJ;
	u32			opIx;
    unspos		run;
	unspos		denom, gappedBases;

	height = a->end1 - beg1 + 1;
	width  = a->end2 - beg2 + 1;

	denom = 0;
	opIx  = 0;
	for (i=j=0 ; (i< height)||(j<width) ; )
		{
		prevI = i;  prevJ = j;
		run = edit_script_run_of_subs (a->script, &opIx);
		i += run; j += run;

		denom += run;

		if ((i < height) || (j < width))
			edit_script_indel_len (a->script, &opIx, &i, &j);
		}

	if (denom == 0)
		{ *_numer = *_denom = 0;  return; }

	gappedBases = (height - denom) + (width - denom);

	*_numer = gappedBases;
	*_denom = denom;
	}
void print_sam_align
   (FILE*			f,
	seq*			seq1,
	unspos			beg1,
	unspos			end1,
	seq*			seq2,
	unspos			beg2,
	unspos			end2,
	editscript*		script,
	arg_dont_complain(score s),
	int				softMasked,
	char*			rgTags)
	{
	seqpartition*	sp1 = &seq1->partition;
	seqpartition*	sp2 = &seq2->partition;
	partition*		part;
	unspos			height, width, i, j, prevI, prevJ, run;
	u32				opIx;
	unspos			len2;
	char*			name1, *name2;
	unspos			offset1, offset2, start1, start2;
	unspos			startLoc1, startLoc2;
	unspos			seq2Len, seq2True;
	int				flag;
	char			maskCh;
	unspos			preMask, postMask, tmp;

	if (seq1->revCompFlags != rcf_forward)
		suicide ("attempt to print - strand or complement for sequence 1 in print_sam_align");

	beg1++; // (internally, we want origin 1, inclusive)
	beg2++;

	       height = end1 - beg1 + 1;
	len2 = width  = end2 - beg2 + 1;

	//////////
	// figure out position offsets and names
	//////////

	if (sp1->p == NULL)		// sequence 1 is not partitioned
		{
		name1 = (seq1->useFullNames)? seq1->header : seq1->shortHeader;
		if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1";
		offset1   = 0;
		startLoc1 = seq1->startLoc;
		}
	else					// sequence 1 is partitioned
	 	{
		part = lookup_partition (seq1, beg1-1);
		name1     = &sp1->pool[part->header];
		offset1   = part->sepBefore + 1;
		startLoc1 = part->startLoc;
		}

	if (sp2->p == NULL)		// sequence 2 is not partitioned
		{
		name2 = (seq2->useFullNames)? seq2->header : seq2->shortHeader;
		if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2";
		offset2   = 0;
		seq2Len   = seq2->len;
		seq2True  = seq2->trueLen;
		startLoc2 = seq2->startLoc;
		}
	else					// sequence 2 is partitioned
	 	{
		part = lookup_partition (seq2, beg2-1);
		name2     = &sp2->pool[part->header];
		offset2   = part->sepBefore + 1;
		seq2Len   = part->sepAfter - offset2;
		seq2True  = part->trueLen;
		startLoc2 = part->startLoc;
		}

	//////////
	// print sam line (field names indicate below are per sam spec)
	//////////

	start1 = beg1-1 - offset1 + startLoc1;

	if ((seq2->revCompFlags & rcf_rev) == 0)
		{
		start2 = beg2-1 - offset2 + startLoc2;
		end2   = start2-1 + len2;
		flag   = 0;
		}
	else
		{
		start2 = startLoc2 + offset2 + (seq2Len - beg2) - (len2-1);
		end2   = startLoc2 + offset2 + (seq2Len - beg2);
		flag   = BAM_FREVERSE;
		}

	// print qname, flag, rname, pos and mapq

	fprintf (f, "%s\t%d\t%s\t" unsposFmt "\t%d\t",
	            name2, flag, name1, start1, 255);

	// print cigar

	maskCh = (softMasked)? 'S' : 'H';

	preMask = postMask = 0;
	if (start2 > 1) preMask = start2 - 1;
	if (end2 < seq2True) postMask = seq2True - end2;
	if ((seq2->revCompFlags & rcf_rev) != 0)
		{ tmp = preMask;  preMask = postMask;  postMask = tmp; }

	if (preMask != 0) fprintf (f, unsposFmt "%c", preMask, maskCh);

	opIx = 0;
	for (i=j=0 ; (i< height)||(j<width) ; )
		{
		run = edit_script_run_of_subs (script, &opIx);
		fprintf (f, unsposFmt "M", run);
		i += run; j += run;

		if ((i < height) || (j < width))
			{
			prevI = i;  prevJ = j;
			edit_script_indel_len (script, &opIx, &i, &j);
			if (i > prevI)
				fprintf (f, unsposFmt "D", i - prevI);
			if (j > prevJ)
				fprintf (f, unsposFmt "I", j - prevJ);
			}
		}

	if (postMask != 0) fprintf (f, unsposFmt "%c", postMask, maskCh);

	// print mrnm, mpos, and isize

	fprintf (f, "\t%s\t%d\t%d\t", "*", 0, 0);

	// print seq (data from sequence 2)

	print_query_bases (f, seq2, beg2-1, len2, softMasked);

	// print qual (if we have no qual data, we print "*")

	if (seq2->vq == NULL)
		fprintf (f, "\t%s", "*");
	else
		{
		fprintf (f, "\t");
		print_query_quals (f, seq2, beg2-1, len2, softMasked);
		}

	// print tags

	if (rgTags != NULL)
		fprintf (f, "\t%s", rgTags);

	fprintf (f, "\n");
	}
void print_maf_align
   (FILE*			f,
	seq*			seq1,
	unspos			beg1,
	unspos			end1,
	seq*			seq2,
	unspos			beg2,
	unspos			end2,
	editscript*		script,
	score			s)
	{
	seqpartition*	sp1 = &seq1->partition;
	seqpartition*	sp2 = &seq2->partition;
	partition*		part;
	unspos			height, width, i, j, run;
	u32				opIx;
	u8*				p, *q;
	unspos			ix;
	char*			name1, *name2, *pref2, *suff1, *suff2;
	unspos			offset1, offset2, start1, start2;
	unspos			startLoc1, startLoc2;
	unspos			seq1Len, seq2Len, seq1True, seq2True;
	char			strand1, strand2;
	unspos			startI, startJ;
	int				len1, len2, nameW, startW, endW, lenW;

#ifdef debugSeq1Beg
	if ((beg1 < debugSeq1Beg) || (end1 > debugSeq1End)) return;
#endif // debugSeq1Beg

	beg1++; // (internally, we want origin 1, inclusive)
	beg2++;

	height = end1 - beg1 + 1;
	width  = end2 - beg2 + 1;

	// report diagonal

	if (maf_dbgReportDiag)
		fprintf (f, "# diagonal=" sgnposFmt "\n", diagNumber(beg1,beg2));

	//////////
	// figure out position offsets and names
	//////////

	if (sp1->p == NULL)		// sequence 1 is not partitioned
		{
		name1 = (seq1->useFullNames)? seq1->header : seq1->shortHeader;
		if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1";
		offset1   = 0;
		startLoc1 = seq1->startLoc;
		seq1Len   = seq1->len;
		seq1True  = seq1->trueLen;
		}
	else					// sequence 1 is partitioned
	 	{
		part = lookup_partition (seq1, beg1-1);
		name1     = &sp1->pool[part->header];
		offset1   = part->sepBefore + 1;
		startLoc1 = part->startLoc;
		seq1Len   = part->sepAfter - offset1;
		seq1True  = part->trueLen;
		}

	if (sp2->p == NULL)		// sequence 2 is not partitioned
		{
		name2 = (seq2->useFullNames)? seq2->header : seq2->shortHeader;
		if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2";
		offset2   = 0;
		startLoc2 = seq2->startLoc;
		seq2Len   = seq2->len;
		seq2True  = seq2->trueLen;
		}
	else					// sequence 2 is partitioned
	 	{
		part = lookup_partition (seq2, beg2-1);
		name2     = &sp2->pool[part->header];
		startLoc2 = part->startLoc;
		offset2   = part->sepBefore + 1;
		seq2Len   = part->sepAfter - offset2;
		seq2True  = part->trueLen;
		}

	//////////
	// print summary line
	//////////

	fprintf (f, "a score=" scoreFmt "\n", s);

	//////////
	// print aligning path in sequence 1
	//////////

	// figure out fields and widths

	pref2 = ((maf_distinguishNames) && (strcmp (name1, name2) == 0))? "~" : "";
	suff1 = rcfSuffix[seq1->revCompFlags];
	suff2 = rcfSuffix[seq2->revCompFlags];

	if ((seq1->revCompFlags & rcf_rev) == 0)
		{
		start1  = beg1-1 - offset1 + startLoc1;
		strand1 = '+';
		}
	else
		{
		start1  = beg1-1 - offset1 + seq1True+2 - (startLoc1 + seq1Len);
		strand1 = '-';
		}
	if ((seq2->revCompFlags & rcf_rev) == 0)
		{
		start2  = beg2-1 - offset2 + startLoc2;
		strand2 = '+';
		}
	else
		{
		start2  = beg2-1 - offset2 + seq2True+2 - (startLoc2 + seq2Len);
		strand2 = '-';
		}

	len1  =                  strlen (name1) + strlen (suff1);
	len2  = strlen (pref2) + strlen (name2) + strlen (suff2);
	nameW = (len1 >= len2)? len1 : len2;

	startW = max_digits (start1, start2);
	endW   = max_digits (end1+1-beg1, end2+1-beg2);
	lenW   = max_digits (seq1True, seq2True);

	// print aligning path in sequence 1 (non-printables are printed as '*'
	// but such should never be seen unless there is a problem elsewhere)

	fprintf (f, "s %s%s%*s" unsposStarFmt " " unsposStarFmt " %c " unsposStarFmt " ",
	            name1, suff1, nameW+1-len1, " ",
	            startW, start1-1, endW, end1+1-beg1, strand1, lenW, seq1True);

	opIx = 0;
	for (i=j=0 ; (i<height)||(j<width) ; )
		{
		// handle the next run

		run = edit_script_run_of_subs (script, &opIx);

		p = seq1->v+beg1+i-1;
		q = seq2->v+beg2+j-1;
		for (ix=0 ; ix<run ; ix++)
			{ fprintf (f, "%c", dna_toprint(*p));  p++;  q++; }

		i += run; j += run;

		// handle the next indel

		if ((i < height) || (j < width))
			{
			startI = i;  p = seq1->v+beg1+i-1;
			startJ = j;  q = seq2->v+beg2+j-1;

			edit_script_indel_len (script, &opIx, &i, &j);

			if (i != startI)
				{
				for ( ; startI<i ; startI++)
					{ fprintf (f, "%c", dna_toprint(*p));  p++; }
				}

			if (j != startJ)
				{
				for ( ; startJ<j ; startJ++)
					{ fprintf (f, "-");  q++; }
				}
			}
		}

	fprintf (f, "\n");

	//////////
	// print aligning path in sequence 2
	//////////

	fprintf (f, "s %s%s%s%*s" unsposStarFmt " " unsposStarFmt " %c " unsposStarFmt " ",
	            pref2, name2, suff2, nameW+1-len2, " ",
	            startW, start2-1, endW, end2+1-beg2, strand2, lenW, seq2True);

	opIx = 0;
	for (i=j=0 ; (i<height)||(j<width) ; )
		{
		// handle the next run

		run = edit_script_run_of_subs (script, &opIx);

		p = seq1->v+beg1+i-1;
		q = seq2->v+beg2+j-1;
		for (ix=0 ; ix<run ; ix++)
			{ fprintf (f, "%c", dna_toprint(*q));  p++;  q++; }

		i += run; j += run;

		// handle the next indel

		if ((i < height) || (j < width))
			{
			startI = i;  p = seq1->v+beg1+i-1;
			startJ = j;  q = seq2->v+beg2+j-1;

			edit_script_indel_len (script, &opIx, &i, &j);

			if (i != startI)
				{
				for ( ; startI<i ; startI++)
					{ fprintf (f, "-");  p++; }
				}

			if (j != startJ)
				{
				for ( ; startJ<j ; startJ++)
					{ fprintf (f, "%c", dna_toprint(*q));  q++; }
				}
			}
		}

	fprintf (f, "\n\n");
	}
void print_maf_align_list
   (FILE*		f,
	alignel*	alignList,
	seq*		seq1,
	seq*		seq2,
	int			withComments)
	{
	alignel*	a;
	unspos		numer, denom;

	for (a=alignList ; a!=NULL ; a=a->next)
		{
		if (withComments)
			{
			unspos height, width, i, j, prevI, prevJ, run;
			u32    opIx;

			// report identity
			alignment_identity (seq1, seq2, a, &numer, &denom);
			fprintf (f, "# identity=" unsposSlashFmt, numer, denom);
			if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom);
			fprintf (f, "\n");

			// report coverage
			alignment_coverage (seq1, seq2, a, &numer, &denom);
			fprintf (f, "# coverage=" unsposSlashFmt, numer, denom);
			if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom);
			fprintf (f, "\n");

			// report continuity
			alignment_continuity (a, &numer, &denom);
			fprintf (f, "# continuity=" unsposSlashFmt, numer, denom);
			if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom);
			fprintf (f, "\n");

			// report alignment path

			fprintf (f, "# cigar=");

			height = a->end1 - a->beg1 + 1;
			width  = a->end2 - a->beg2 + 1;

			opIx = 0;
			for (i=j=0 ; (i< height)||(j<width) ; )
				{
				run = edit_script_run_of_subs (a->script, &opIx);
				fprintf (f, unsposFmt "m", run);
				i += run; j += run;
		
				if ((i < height) || (j < width))
					{
					prevI = i;  prevJ = j;
					edit_script_indel_len (a->script, &opIx, &i, &j);
					if (i > prevI)
						fprintf (f, unsposFmt "d", i - prevI);
					if (j > prevJ)
						fprintf (f, unsposFmt "i", j - prevJ);
					}
				}
			fprintf (f, "\n");
			}

		print_maf_align (f,
		                 seq1, a->beg1-1, a->end1,
		                 seq2, a->beg2-1, a->end2,
		                 a->script, a->s);
		}
	}