void print_maf_match (FILE* f, seq* seq1, unspos pos1, seq* seq2, unspos pos2, unspos length, score s, int withComments) { seqpartition* sp1 = &seq1->partition; seqpartition* sp2 = &seq2->partition; partition* part; u8* s1 = seq1->v + pos1; u8* s2 = seq2->v + pos2; char* name1, *name2, *pref2, *suff1, *suff2; unspos offset1, offset2, start1, start2; unspos startLoc1, startLoc2; unspos seq1Len, seq2Len, seq1True, seq2True; char strand1, strand2; int len1, len2, nameW, startW, lenW; unspos ix; segment seg; unspos numer, denom; if (seq1->revCompFlags != rcf_forward) suicide ("attempt to print - strand or complement for sequence 1 in print_maf_match"); #ifdef debugSeq1Beg if ((pos1 < debugSeq1Beg) || (pos1+length > debugSeq1End)) return; #endif // debugSeq1Beg // report diagonal if (maf_dbgReportDiag) fprintf (f, "# diagonal=" sgnposFmt "\n", diagNumber(pos1,pos2)); if (withComments) { // report identity segment_identity (seq1, pos1, seq2, pos2, length, &numer, &denom); fprintf (f, "# identity=" unsposSlashFmt, numer, denom); if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom); fprintf (f, "\n"); // report coverage seg.pos1 = pos1; seg.pos2 = pos2; seg.length = length; segment_coverage (seq1, seq2, &seg, &numer, &denom); fprintf (f, "# coverage=" unsposSlashFmt, numer, denom); if (denom != 0) fprintf (f, " (%.1f%%)", (100.0*numer) / denom); fprintf (f, "\n"); // report alignment path fprintf (f, "# cigar=" unsposFmt "m\n", length); } // figure out position offsets and names if (sp1->p == NULL) // sequence 1 is not partitioned { name1 = (seq1->useFullNames)? seq1->header : seq1->shortHeader; if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1"; offset1 = 0; startLoc1 = seq1->startLoc; seq1Len = seq1->len; seq1True = seq1->trueLen; } else // sequence 1 is partitioned { part = lookup_partition (seq1, pos1); name1 = &sp1->pool[part->header]; offset1 = part->sepBefore + 1; startLoc1 = part->startLoc; seq1Len = part->sepAfter - offset1; seq1True = part->trueLen; } if (sp2->p == NULL) // sequence 2 is not partitioned { name2 = (seq2->useFullNames)? seq2->header : seq2->shortHeader; if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2"; offset2 = 0; startLoc2 = seq2->startLoc; seq2Len = seq2->len; seq2True = seq2->trueLen; } else // sequence 2 is partitioned { part = lookup_partition (seq2, pos2); name2 = &sp2->pool[part->header]; offset2 = part->sepBefore + 1; startLoc2 = part->startLoc; seq2Len = part->sepAfter - offset2; seq2True = part->trueLen; } // print summary line fprintf (f, "a score=" scoreFmt "\n", s); // figure out fields and widths pref2 = ((maf_distinguishNames) && (strcmp (name1, name2) == 0))? "~" : ""; suff1 = rcfSuffix[seq1->revCompFlags]; suff2 = rcfSuffix[seq2->revCompFlags]; if ((seq1->revCompFlags & rcf_rev) == 0) { start1 = pos1 - offset1 + startLoc1; strand1 = '+'; } else { start1 = pos1 - offset1 + seq1True+2 - (startLoc1 + seq1Len); strand1 = '-'; } if ((seq2->revCompFlags & rcf_rev) == 0) { start2 = pos2 - offset2 + startLoc2; strand2 = '+'; } else { start2 = pos2 - offset2 + seq2True+2 - (startLoc2 + seq2Len); strand2 = '-'; } len1 = strlen (name1) + strlen (suff1); len2 = strlen (pref2) + strlen (name2) + strlen (suff2); nameW = (len1 >= len2)? len1 : len2; startW = max_digits (start1, start2); lenW = max_digits (seq1True, seq2True); // print aligning segment of sequence 1 (non-printables are printed as '*' // but such should never be seen unless there is a problem elsewhere) fprintf (f, "s %s%s%*s" unsposStarFmt " " unsposFmt " %c " unsposStarFmt " ", name1, suff1, nameW+1-len1, " ", startW, start1-1, length, strand1, lenW, seq1True); for (ix=0 ; ix<length ; ix++) fprintf (f, "%c", dna_toprint(s1[ix])); fprintf (f, "\n"); // print aligning segment of sequence 2 fprintf (f, "s %s%s%s%*s" unsposStarFmt " " unsposFmt " %c " unsposStarFmt " ", pref2, name2, suff2, nameW+1-len2, " ", startW, start2-1, length, strand2, lenW, seq2True); for (ix=0 ; ix<length ; ix++) fprintf (f, "%c", dna_toprint(s2[ix])); fprintf (f, "\n\n"); }
void print_match (unspos pos1, unspos pos2, unspos length, score s) // pos1 and pos2 are the positions of first character in the match, // .. (origin-0). { static u32 printsUntilFlush = matchFlushFrequency; int outputFormat = currParams->outputFormat; if ((currParams->searchLimit > 0) && (printedForQuery >= currParams->searchLimit)) return; printedForQuery++; if (!strandHeaderPrinted) { print_header (); strandHeaderPrinted = true; } if (infer_scores_dbgShowIdentity) { unspos numer, denom; u32 bin; segment_identity (currParams->seq1, pos1, currParams->seq2, pos2, length, &numer, &denom); bin = identity_bin (numer, denom); // nota bene: positions written as 1-based print_generic (currParams->outputFile, unsposSlashFmt " pct_identity=" unsposSlashFmt " (bin as " identityBinFormat ")", pos1+1, pos2+1, numer, denom, bin_to_identity (bin)); } switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, (outputFormat == fmtGfa)? s : 0); break; case fmtLav: case fmtLavComment: case fmtLavText: case fmtLavInfScores: print_lav_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s); if (outputFormat == fmtLavText) goto text_format; if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtLavScore: print_lavscore_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s); break; case fmtAxt: case fmtAxtComment: print_axt_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ outputFormat==fmtAxtComment, /* extras */ NULL); break; case fmtAxtGeneral: print_axt_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ false, /* extras */ currParams->outputInfo); break; case fmtMaf: case fmtMafNoComment: print_maf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ false); break; case fmtMafComment: print_maf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ true); break; case fmtSoftSam: case fmtSoftSamNoHeader: print_sam_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* softMasking */ true, currParams->samRGTags); break; case fmtHardSam: case fmtHardSamNoHeader: print_sam_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* softMasking */ false, currParams->samRGTags); break; case fmtCigar: print_cigar_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* withInfo */ true, /* markMismatches */ false, /* letterAfter */ false, /* hideSingles */ false, /* lowerCase */ false, /* withNewLine */ true); break; case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: print_genpaf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->outputInfo); break; case fmtText: case fmtZeroText: text_format: print_text_align_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, (outputFormat!=fmtZeroText), currParams->textContext); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, (outputFormat == fmtDiffs), currParams->nIsAmbiguous); break; case fmtHspComp: print_match_composition (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->hitSeed, currParams->step); break; case fmtInfStats: infer_stats_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtInfScores: inf_scores_format: gather_stats_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtIdDist: identity_dist_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtDeseed: dump_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length); printf ("\n"); break; case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_match, outputFormat=%d", outputFormat); } if (currParams->dotplotFile != NULL) print_genpaf_match (currParams->dotplotFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->dotplotKeys); if (--printsUntilFlush == 0) { fflush (currParams->outputFile); printsUntilFlush = matchFlushFrequency; } }