void print_x_stanza (unspos numMasked) { int outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_generic (currParams->outputFile, 'x', "num_masked=" unsposFmt, numMasked); break; case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_x_stanza (currParams->outputFile, numMasked); break; case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: case fmtMaf: case fmtMafComment: case fmtMafNoComment: case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: case fmtCigar: case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: case fmtText: case fmtZeroText: case fmtHspComp: case fmtDiffs: case fmtDiffsNoBlocks: case fmtInfStats: case fmtIdDist: case fmtDeseed: print_generic (currParams->outputFile, "num_masked=" unsposFmt, numMasked); break; case fmtInfScores: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_x_stanza, outputFormat=%d", outputFormat); } // if (currParams->dotplotFile != NULL) // ; // (do nothing) }
int hex_string_to_int (const char* s) { int v; char extra; if (sscanf (s, "%X%c", &v, &extra) != 1) suicidef ("\"%s\" is not an integer", s); return v; }
double string_to_double (const char* s) { double v; char extra; if (sscanf (s, "%lf%c", &v, &extra) != 1) suicidef ("\"%s\" is not a number", s); return v; }
FILE* fopen_or_die (const char* name, const char* mode) { FILE* f; f = fopen (name, mode); if (f == NULL) suicidef ("fopen_or_die failed to open \"%s\" for \"%s\"", name, mode); return f; }
double pct_string_to_double (const char* s) { double v; char pct, extra; if ((sscanf (s, "%lf%c%c", &v, &pct, &extra) != 2) || (pct != '%')) suicidef ("\"%s\" is not a percentage", s); return v / 100.0; }
static void validate_heap (segtable* st, char* msg) { possum scoreCov; segment* seg, *lftChild, *rgtChild; int ix, lftIx, rgtIx; if (st->coverage < st->coverageLimit) suicidef ("%s, below coverage limit", msg); for (ix=0,seg=st->seg ; ix<st->len ; ix++,seg++) { scoreCov = (possum) seg->length; lftIx = 2*ix+1; if (lftIx < st->len) { lftChild = &st->seg[lftIx]; if (lftChild->s < seg->s) suicidef ("%s, node %d > node %d", msg, ix, lftIx); if (lftChild->s == seg->s) scoreCov += lftChild->scoreCov; rgtIx = lftIx + 1; if (rgtIx < st->len) { rgtChild = &st->seg[rgtIx]; if (rgtChild->s < seg->s) suicidef ("%s, node %d > node %d", msg, ix, rgtIx); if (rgtChild->s == seg->s) scoreCov += rgtChild->scoreCov; } } if (scoreCov != seg->scoreCov) suicidef ("%s, node %d has bad score coverage", msg, ix); } }
int string_to_int (const char* s) { char* ss; int v; char extra; // skip to first non-blank ss = (char*) s; while ((*ss == ' ') || (*ss == '\t') || (*ss == '\n')) ss++; if (*ss == 0) goto empty_string; // convert to number if (sscanf (ss, "%d%c", &v, &extra) != 1) goto not_an_integer; // make sure signs match if ((v < 0) && (*ss != '-')) goto out_of_range; if ((v > 0) && (*ss == '-')) goto out_of_range; return v; ////////// // failure exits ////////// empty_string: suicidef ("an empty string is not an integer"); not_an_integer: suicidef ("\"%s\" is not an integer", s); out_of_range: suicidef ("\"%s\" is outside the range of a signed integer", s); return 0; }
void* realloc_or_die (char* id, void* _p, size_t size) { void* p; // make sure size is legit if (size > mallocLimit) { if (id == NULL) suicidef ("realloc_or_die blocked large request, for %s bytes (max is %s)", commatize(size), commatize(mallocLimit)); else suicidef ("realloc_or_die blocked large request, for %s bytes (max is %s), for %s", commatize(size), commatize(mallocLimit), id); } if (size == 0) size = 1; // allocate the memory p = realloc (_p, size); if (p == NULL) { if (id == NULL) suicidef ("call to realloc failed to allocate %lu bytes", commatize(size)); else suicidef ("call to realloc failed to allocate %lu bytes, for %s", commatize(size), id); } reportRealloc (id, _p, p, size); return p; }
segtable* new_segment_table (u32 size, unspos coverageLimit) { segtable* st; size_t bytesNeeded; // sanity check if (size < 1) suicidef ("in new_segment_table(), size can't be %d", size); // allocate bytesNeeded = segtable_bytes (size); if (bytesNeeded > mallocLimit) goto overflow; st = (segtable*) malloc_or_die ("new_segment_table", bytesNeeded); // initialize st->size = size; st->len = 0; st->haveScores = false; st->coverageLimit = coverageLimit; st->coverage = 0; st->lowScore = worstPossibleScore; return st; // failure exits overflow: suicidef ("internal error, in new_segment_table()\n" "table size (%s) exceeds allocation limit of %s;", commatize(bytesNeeded), commatize(mallocLimit)); return NULL; // (doesn't get here) }
void print_m_stanza (census* cen) { // note that census might be NULL int outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_m_stanza (currParams->outputFile, cen); break; case fmtGfa: case fmtGfaNoScore: case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: case fmtMaf: case fmtMafComment: case fmtMafNoComment: case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: case fmtCigar: case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: case fmtText: case fmtZeroText: case fmtHspComp: case fmtDiffs: case fmtDiffsNoBlocks: case fmtInfStats: case fmtInfScores: case fmtIdDist: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_m_stanza, outputFormat=%d", outputFormat); } // if (currParams->dotplotFile != NULL) // ; // (do nothing) }
void print_comment_close (void) { int outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_comment_close (currParams->outputFile); break; case fmtGfa: case fmtGfaNoScore: case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: case fmtMaf: case fmtMafComment: case fmtMafNoComment: case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: case fmtCigar: case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: case fmtText: case fmtZeroText: case fmtHspComp: case fmtDiffs: case fmtDiffsNoBlocks: case fmtInfStats: case fmtInfScores: case fmtIdDist: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_comment_close, outputFormat=%d", outputFormat); } }
void print_generic (FILE* f, const char* format, ...) { int outputFormat; va_list args; va_start (args, format); outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: vprint_gfa_generic (f, 'z', format, args); break; case fmtLavComment: vprint_lav_comment (f, format, args); break; case fmtLavText: vprint_lav_comment (f, format, args); if (format != NULL) { va_end (args); va_start (args, format); vfprintf (f, format, args); fprintf (f, "\n"); } break; case fmtAxtComment: vprint_axt_comment (f, format, args); break; case fmtMafComment: vprint_maf_comment (f, format, args); break; case fmtText: case fmtZeroText: if (format != NULL) { vfprintf (f, format, args); fprintf (f, "\n"); } break; case fmtLav: case fmtLavScore: case fmtLavInfScores: case fmtAxt: case fmtAxtGeneral: case fmtMaf: case fmtMafNoComment: case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: case fmtCigar: case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: case fmtHspComp: case fmtDiffs: case fmtDiffsNoBlocks: case fmtInfStats: case fmtInfScores: case fmtIdDist: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_generic, outputFormat=%d", outputFormat); } // if (currParams->dotplotFile != NULL) // ; // (do nothing) va_end (args); }
void print_job_header (void) { int outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_job_header (currParams->outputFile, program_name(), currParams->seq1->filename, currParams->seq2->filename); break; case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_job_header (currParams->outputFile, program_name(), currParams->seq1->filename, currParams->seq2->filename, currParams->args, currParams->scoring, &currParams->hspThreshold, &currParams->gappedThreshold, currParams->dynamicMasking, /*withExtras*/ (outputFormat==fmtLavComment), currParams->xDrop, currParams->yDrop); if (outputFormat == fmtLavText) goto text_format; if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: print_axt_job_header (currParams->outputFile, program_name(), currParams->args, currParams->scoring, &currParams->hspThreshold, &currParams->gappedThreshold, currParams->xDrop, currParams->yDrop); break; case fmtMaf: case fmtMafComment: case fmtMafNoComment: print_maf_job_header (currParams->outputFile, program_name(), currParams->args, currParams->scoring, &currParams->hspThreshold, &currParams->gappedThreshold, currParams->xDrop, currParams->yDrop, (outputFormat != fmtMafNoComment)); break; case fmtSoftSam: case fmtHardSam: print_sam_job_header (currParams->outputFile,currParams->readGroup); break; case fmtSoftSamNoHeader: case fmtHardSamNoHeader: ; // (do nothing) break; case fmtCigar: print_cigar_job_header (currParams->outputFile); break; case fmtGenpaf: print_genpaf_job_header (currParams->outputFile, currParams->outputInfo); break; case fmtGenpafNoHeader: case fmtGenpafNameHeader: print_genpaf_job_header (NULL, NULL); break; case fmtGenpafBlast: print_blast_job_header (currParams->outputFile); break; case fmtGenpafBlastNoHeader: ; // (do nothing) break; case fmtText: case fmtZeroText: text_format: print_text_align_job_header (currParams->outputFile, program_name(), currParams->seq1->filename, currParams->seq2->filename, (outputFormat!=fmtZeroText)); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_job_header (currParams->outputFile, program_name(), currParams->seq1->filename, currParams->seq2->filename); break; case fmtInfStats: init_inference_stats_job (currParams->seq1, currParams->seq2); break; case fmtIdDist: init_identity_dist_job (currParams->seq1, currParams->seq2); break; case fmtInfScores: inf_scores_format: ; // (do nothing) break; case fmtHspComp: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_job_header, outputFormat=%d", outputFormat); } // if (currParams->dotplotFile != NULL) // ; // (do nothing) }
void print_job_footer (void) { int outputFormat = currParams->outputFormat; switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_job_footer (currParams->outputFile); break; case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_job_footer (currParams->outputFile); if (outputFormat == fmtLavText) goto text_format; if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: print_axt_job_footer (currParams->outputFile); break; case fmtMaf: case fmtMafComment: case fmtMafNoComment: print_maf_job_footer (currParams->outputFile); break; case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: ; // (do nothing) break; case fmtCigar: print_cigar_job_footer (currParams->outputFile); break; case fmtGenpaf: print_genpaf_job_footer (currParams->outputFile); break; case fmtGenpafNoHeader: case fmtGenpafNameHeader: ; // (do nothing) break; case fmtGenpafBlast: print_blast_job_footer (currParams->outputFile); break; case fmtGenpafBlastNoHeader: ; // (do nothing) break; case fmtText: case fmtZeroText: text_format: print_text_align_job_footer (currParams->outputFile); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_job_footer (currParams->outputFile); break; case fmtInfStats: print_inference_stats_job (currParams->outputFile); break; case fmtIdDist: print_identity_dist_job (currParams->outputFile); break; case fmtInfScores: inf_scores_format: ; // (do nothing) break; case fmtHspComp: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_job_footer, outputFormat=%d", outputFormat); } // if (currParams->dotplotFile != NULL) // ; // (do nothing) }
int string_to_int2 (const char* s, int byThousands) { char ss[20]; int len = strlen (s); char* parseMe; int v; float vf; char extra; int mult; int isFloat; mult = 1; if (len >= (int) sizeof (ss)) parseMe = (char*) s; else { parseMe = ss; strcpy (ss, s); if (len > 0) { switch (ss[len-1]) { case 'K': case 'k': mult = (byThousands)? 1000 : 1024; break; case 'M': case 'm': mult = (byThousands)? 1000000 : 1024L * 1024L; break; case 'G': case 'g': mult = (byThousands)? 1000000000 : 1024L * 1024L * 1024L; break; } if (mult != 1) ss[len-1] = 0; } } isFloat = false; if (sscanf (parseMe, "%d%c", &v, &extra) != 1) { if (sscanf (parseMe, "%f%c", &vf, &extra) != 1) goto bad; isFloat = true; } if (isFloat) { if ((vf > 0) && ( vf*mult > INT_MAX)) goto overflow; if ((vf < 0) && (-vf*mult > INT_MAX)) goto overflow; v = (vf * mult) + .5; } else if (mult != 1) { if ((v > 0) && ( v > INT_MAX / mult)) goto overflow; if ((v < 0) && (-v > INT_MAX / mult)) goto overflow; v *= mult; } return v; bad: suicidef ("\"%s\" is not an integer", s); return 0; overflow: suicidef ("\"%s\" is out of range for an integer", s); return 0; }
segtable* add_segment (segtable* st, unspos pos1, unspos pos2, unspos length, score s, int id) { u32 newSize; size_t bytesNeeded; segment* seg, *parent; segment tempSeg; int ix, pIx; int tied, stopped; // fprintf (stderr, "add " unsposSlashSFmt " " unsposFmt " " scoreFmtSimple "; id %d\n", // pos1+1, "+", // pos2+1, ((id & rcf_rev) != 0)? "-" : "+", // length, s, id); ////////// // add the segment to the table, enlarging the table if needed, but // discarding the segment if it is low-scoring and the table has met its // coverage limit ////////// // if the table is already full and this segment scores less than the // lowest score in the table, discard it if ((st->len > 0) && (st->coverageLimit != 0) && (st->coverage >= st->coverageLimit) && (s < st->lowScore)) return st; // if there's no room for the new segment, re-allocate if (st->len >= st->size) { newSize = st->size + 100 + (st->size / 3); bytesNeeded = segtable_bytes (newSize); if (bytesNeeded > mallocLimit) goto overflow; st = (segtable*) realloc_or_die ("add_segment", st, bytesNeeded); st->size = newSize; } // add the segment, by appending it at the end seg = &st->seg[st->len++]; seg->pos1 = pos1; seg->pos2 = pos2; seg->length = length; seg->s = s; seg->id = id; seg->filter = false; seg->scoreCov = (possum) length; st->coverage += length; if ((st->len == 1) || (s < st->lowScore)) st->lowScore = s; ////////// // handle the transition between the two table states // below-the-coverage-limit: table is kept as a simple list // met-the-coverage-limit: table is kept as a proper min-heap ////////// // if this segment leaves us below the limit, we're done if ((st->coverageLimit == 0) || (st->coverage < st->coverageLimit)) return st; // if this is the first time we've reached the limit, sort the segments to // create a proper min-heap, and add the tied-score information // nota bene: if we reach here, st->coverageLimit > 0 and // st->coverage >= st->coverageLimit if (st->coverage - length < st->coverageLimit) { sort_segments (st, qSegmentsByIncreasingScore); record_tie_scores (st); #ifdef debugBinaryHeap fprintf (stderr, "\nafter sort:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after sort"); #endif // debugBinaryHeap goto prune; } ////////// // maintain the min-heap property ////////// #ifdef debugBinaryHeap //fprintf (stderr, "\nbefore percolation:\n"); //dump_segments (stderr, st, NULL, NULL); #endif // debugBinaryHeap // the rest of the list is a proper min-heap, so percolate the new segment // up the tree, while maintaining the tied-score information // nota bene: if we reach here, length >= 2 tied = false; for (ix=st->len-1 ; ix>0 ; ) { pIx = (ix-1) / 2; seg = &st->seg[ix]; parent = &st->seg[pIx]; if (seg->s >= parent->s) { tied = (seg->s == parent->s); break; } // swap this segment with its parent, and adjust old parent's tied-score // subheap tempSeg = *seg; *seg = *parent; *parent = tempSeg; record_tie_score (st, ix); ix = pIx; } record_tie_score (st, ix); // if the new segment tied an existing score, we must continue to percolate // the tied-score info up the tree if (tied) { stopped = false; for (ix=(ix-1)/2 ; ix>0 ; ix=(ix-1)/2) { if (!record_tie_score (st, ix)) { stopped = true; break; } } if (!stopped) record_tie_score (st, 0); } #ifdef debugBinaryHeap fprintf (stderr, "\nafter percolation:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after percolation"); #endif // debugBinaryHeap ////////// // remove low-scoring segments ////////// prune: // if removing the minimum scoring subheap would bring us below the // limit, no pruning is necessary if (st->coverage - st->seg[0].scoreCov < st->coverageLimit) return st; // otherwise, we must remove subheaps as long as doing so leaves us at or // above the limit while (st->coverage - st->seg[0].scoreCov >= st->coverageLimit) { s = st->seg[0].s; while (st->seg[0].s == s) { remove_root (st); #ifdef debugBinaryHeap fprintf (stderr, "\nafter a pruning:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after pruning"); #endif // debugBinaryHeap } } st->lowScore = st->seg[0].s; #ifdef debugBinaryHeap fprintf (stderr, "\nafter pruning:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after pruning"); #endif // debugBinaryHeap return st; // failure exits #define suggestions " consider using lastz_m40," \ " or setting max_malloc_index for a special build," \ " or raising scoring threshold (--hspthresh or --exact)," \ " or break your target sequence into smaller pieces" overflow: suicidef ("in add_segment()\n" "table size (%s for %s segments) exceeds allocation limit of %s;\n" suggestions, commatize(bytesNeeded), commatize(newSize), commatize(mallocLimit)); return NULL; // (doesn't get here) }
segtable* read_segment_table (FILE* f, char* fName, segtable* st, seq* target, seq* query) { // (parsing variables, preserve between calls) static char line[1024]; static int pendingRewind = false; static int pendingLine = false; static int pendingFirstAfterRewind = false; static int lineNum = 0; static char* tName, *qName; static unspos tStart, tEnd, qStart, qEnd; static char qStrand; static score s; // (normal local variables) int firstAfterRewind; unspos tSeqStart, tSeqEnd, qSeqStart, qSeqEnd; int haveSegments, len, missingEol; char* scan, *field; int numItems, charsUsed; unspos tOffset, tLen, qOffset, qLen, qTrue; char* queryName = ""; char queryStrand = '+'; seqpartition* tSp = &target->partition; seqpartition* qSp = &query->partition; partition* part; u32 tIx; int err; if (fName == NULL) fName = "(filename not known)"; if (st != NULL) { if (qSp->p != NULL) // query is partitioned queryName = "(partitioned query)"; else // query is not partitioned queryName = (query->useFullNames)? query->header : query->shortHeader; queryStrand = ((query->revCompFlags & rcf_rev) != 0)? '-' : '+'; } // read the segments for this query/strand haveSegments = false; missingEol = false; firstAfterRewind = false; if (pendingRewind) { if (st != NULL) { err = fseek (f, 0, SEEK_SET); if (err != 0) goto rewind_failed; lineNum = 0; } pendingRewind = false; firstAfterRewind = true; } if ((st == NULL) && (pendingLine) && (pendingFirstAfterRewind)) pendingLine = pendingFirstAfterRewind = false; while (true) { // get the next line, if we need one; we also check for lines getting // split by fgets (the final line in the file might not have a newline, // but no internal lines can be that way) if (pendingLine) { pendingLine = false; firstAfterRewind = pendingFirstAfterRewind; goto parsing_finished; } else { if (fgets (line, sizeof(line), f) == NULL) break; lineNum++; if (missingEol) goto split_line; len = strlen(line); if (len == 0) continue; missingEol = (line[len-1] != '\n'); } // trim blanks, end of line, and comments, and ignore blank lines if (line[len-1] == '\n') line[--len] = 0; field = strchr (line, '#'); if (field != NULL) *field = 0; trim_string (line); if (line[0] == 0) continue; // see if this is a "rewind" command // $$$ we should make sure there's nothing left in the file if (strcmp (line, "rewind") == 0) { pendingRewind = true; break; } // parse the line scan = line; if (*scan == 0) goto not_enough_fields; tName = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); if (*scan == 0) goto not_enough_fields; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); charsUsed = -1; numItems = sscanf (field, unsposFmtScanf "%n", &tStart, &charsUsed); if ((numItems != 1) || (((u32)charsUsed) != strlen(field))) goto bad_field; if (*scan == 0) goto not_enough_fields; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); charsUsed = -1; numItems = sscanf (field, unsposFmtScanf "%n", &tEnd, &charsUsed); if ((numItems != 1) || (((u32)charsUsed) != strlen(field))) goto bad_field; if (tEnd < tStart) goto bad_target_interval; if (*scan == 0) goto not_enough_fields; qName = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); if (*scan == 0) goto not_enough_fields; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); charsUsed = -1; numItems = sscanf (field, unsposFmtScanf "%n", &qStart, &charsUsed); if ((numItems != 1) || (((u32)charsUsed) != strlen(field))) goto bad_field; if (*scan == 0) goto not_enough_fields; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); charsUsed = -1; numItems = sscanf (field, unsposFmtScanf "%n", &qEnd, &charsUsed); if ((numItems != 1) || (((u32)charsUsed) != strlen(field))) goto bad_field; if (qEnd < qStart) goto bad_query_interval; if (qEnd-qStart != tEnd - tStart) goto interval_length_mismatch; if (*scan == 0) goto not_enough_fields; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); if (strlen(field) != 1) goto bad_field; qStrand = *field; if ((qStrand != '+') && (qStrand != '-')) goto bad_strand; s = 0; if (*scan == 0) goto parsing_finished; field = scan; scan = skip_darkspace (scan); *(scan++) = 0; scan = skip_whitespace (scan); charsUsed = -1; numItems = sscanf (field, scoreFmtScanf "%n", &s, &charsUsed); if ((numItems != 1) || (((u32)charsUsed) != strlen(field))) goto bad_field; if (*scan != 0) goto too_many_fields; parsing_finished: // it's a *syntactically* valid segment, but if we aren't accepting any // more segments, this is a failure if (st == NULL) goto extra_segments; // resolve query interval if (qStrand != queryStrand) goto query_name_mismatch; qOffset = 0; qLen = query->len; if (qSp->p == NULL) // query is not partitioned { if (strcmp (qName, "*") != 0) { if ((queryName != NULL) && (queryName[0] != 0) && (strcmp (qName, queryName) != 0)) goto query_name_mismatch; } qTrue = query->trueLen; } else if (strcmp (qName, "*") == 0) // query is partitioned and goto query_wild_card; // .. name is wildcard else // query is partitioned and { // .. specific name is given part = lookup_named_partition (query, qName); if (part == NULL) goto bad_query_name; qOffset = part->sepPos + 1; qLen = (part+1)->sepPos - qOffset; qTrue = part->trueLen; } qSeqStart = query->start; qSeqEnd = qSeqStart + qLen - 1; if (qStrand == '-') { unspos qNegStart; qNegStart = qTrue+1 - qSeqEnd; qSeqEnd = qTrue+1 - qSeqStart; qSeqStart = qNegStart; } if (qStart < qSeqStart) goto query_interval_before_start; if (qEnd > qSeqEnd) goto query_interval_after_end; // resolve target interval tOffset = 0; tLen = target->len; if (tSp->p == NULL) // target is not partitioned { ; // (nothing to do) } else if (strcmp (tName, "*") == 0) // target is partitioned and goto target_wild_card; // .. name is wildcard else // target is partitioned and { // .. specific name is given part = lookup_named_partition (target, tName); if (part == NULL) goto bad_target_name; tOffset = part->sepPos + 1; tLen = (part+1)->sepPos - tOffset; } tSeqStart = target->start; tSeqEnd = tSeqStart + tLen - 1; if (tStart < tSeqStart) goto target_interval_before_start; if (tEnd > tSeqEnd) goto target_interval_after_end; // (phew!) it's a valid segment, add it to the table; note that we // change to origin zero, and we use the strand as an id st = add_segment (st, tOffset + tStart-tSeqStart, qOffset + qStart-qSeqStart, tEnd+1-tStart, s, /*id*/ qStrand); haveSegments = true; continue; // given target names is a wild card, and target is partitioned, so we // have to add a segment for every sequence in the partition target_wild_card: tSeqStart = target->start; if (tStart < tSeqStart) goto target_interval_before_start; for (tIx=0 ; tIx<tSp->len ; tIx++) { part = &tSp->p[tIx]; tOffset = part->sepPos + 1; tLen = (part+1)->sepPos - tOffset; tSeqEnd = tSeqStart + tLen - 1; if (tEnd > tSeqEnd) continue; // add the segment to the table; note that we change to origin // zero, and we use the strand as an id st = add_segment (st, tOffset + tStart-tSeqStart, qOffset + qStart-qSeqStart, tEnd+1-tStart, s, /*id*/ qStrand); haveSegments = true; } continue; // interval name or strand did not match query; this marks the end of // the list; otherwise, we need to keep looking query_name_mismatch: pendingLine = true; pendingFirstAfterRewind = firstAfterRewind; break; } // success //fprintf (stderr, "=====\n"); //if (st != NULL) // { // fprintf (stderr, "segments for %s %c\n", queryName, queryStrand); // write_segments (stderr, st, target, query); // } //else // { // fprintf (stderr, "no extra segments\n"); // fprintf (stderr, "=====\n"); // } if (st != NULL) st->haveScores = true; return st; ////////// // failure exits ////////// rewind_failed: suicidef ("failed to rewind segments file\n" "in read_segment_table for %s, index fseek(0) returned %d", fName, err); return NULL; split_line: suicidef ("line is too long (%s: line %d)", fName, lineNum-1); return NULL; not_enough_fields: suicidef ("line has too few fields (%s: line %d)", fName, lineNum); return NULL; too_many_fields: suicidef ("line has too many fields (%s: line %d)", fName, lineNum); return NULL; bad_field: suicidef ("bad field (%s: line %d, %s)", fName, lineNum, field); return NULL; bad_target_interval: suicidef ("bad target interval (%s: line %d, " unsposFmt ">" unsposFmt ")", fName, lineNum, tStart, tEnd); return NULL; target_interval_before_start: suicidef ("target interval out of range (%s: line %d, " unsposFmt "<" unsposFmt ")", fName, lineNum, tStart, tSeqStart); return NULL; target_interval_after_end: suicidef ("target interval out of range (%s: line %d, " unsposFmt ">" unsposFmt ")", fName, lineNum, tEnd, tSeqEnd); return NULL; bad_query_interval: suicidef ("bad query interval (%s: line %d, " unsposFmt ">" unsposFmt ")", fName, lineNum, qStart, qEnd); return NULL; query_interval_before_start: suicidef ("query interval out of range (%s: line %d, " unsposFmt "<" unsposFmt ")", fName, lineNum, qStart, qSeqStart); return NULL; query_interval_after_end: suicidef ("query interval out of range (%s: line %d, " unsposFmt ">" unsposFmt ")", fName, lineNum, qEnd, qSeqEnd); return NULL; interval_length_mismatch: suicidef ("intervals have different lengths (%s: line %d, " unsposFmt "!=" unsposFmt ")", fName, lineNum, tEnd+1-tStart, qEnd+1-qStart); return NULL; bad_strand: suicidef ("bad strand (%s: line %d, %c)", fName, lineNum, qStrand); return NULL; bad_target_name: suicidef ("bad target sequence name (%s: line %d, %s)", fName, lineNum, tName); return NULL; bad_query_name: suicidef ("bad query sequence name (%s: line %d, %s)", fName, lineNum, qName); return NULL; query_wild_card: suicidef ("bad query sequence name (%s: line %d, %s)\n" "wildcard segment name (*) is not supported for queries with [multi]", fName, lineNum, qName); return NULL; extra_segments: suicidef ("extra segments in file (%s: line %d, %s/%s)", fName, lineNum, tName, qName); return NULL; }
void print_header (void) { static char* prevName1 = NULL; static char* prevName2 = NULL; static char prevNameBuff1[maxSequenceName+1]; static char prevNameBuff2[maxSequenceName+1]; char* name1, *name2; int outputFormat = currParams->outputFormat; if (prevName1 == NULL) { prevName1 = prevNameBuff1; prevNameBuff1[0] = 0; } if (prevName2 == NULL) { prevName2 = prevNameBuff2; prevNameBuff2[0] = 0; } switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_header (currParams->outputFile, currParams->seq1, currParams->seq2); if (outputFormat == fmtLavText) goto text_format; if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: print_axt_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtMaf: case fmtMafComment: case fmtMafNoComment: print_maf_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtSoftSam: case fmtHardSam: print_sam_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtSoftSamNoHeader: case fmtHardSamNoHeader: ; // (do nothing) break; case fmtCigar: print_cigar_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtGenpaf: print_genpaf_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtGenpafNoHeader: ; // (do nothing) break; case fmtGenpafNameHeader: { name1 = name2 = NULL; if (currParams->seq1->partition.p == NULL) // sequence 1 is not partitioned name1 = (currParams->seq1->useFullNames)? currParams->seq1->header : currParams->seq1->shortHeader; if (currParams->seq2->partition.p == NULL) // sequence 2 is not partitioned name2 = (currParams->seq1->useFullNames)? currParams->seq2->header : currParams->seq2->shortHeader; if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1"; if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2"; if ((strcmp (name1, prevName1) != 0) || (strcmp (name2, prevName2) != 0)) { if (strcmp(currParams->outputInfo,genpafRDotplotScoreKeys) == 0) fprintf (currParams->outputFile, "%s\t%s\tscore\n", name1, name2); else fprintf (currParams->outputFile, "%s\t%s\n", name1, name2); strncpy (/*to*/ prevName1, /*from*/ name1, sizeof(prevNameBuff1)); strncpy (/*to*/ prevName2, /*from*/ name2, sizeof(prevNameBuff2)); } } break; case fmtGenpafBlast: print_blast_header (currParams->outputFile, program_name(), currParams->args, currParams->seq1, currParams->seq2); break; case fmtGenpafBlastNoHeader: ; // (do nothing) break; case fmtText: case fmtZeroText: text_format: print_text_align_header (currParams->outputFile, currParams->seq1, currParams->seq2, (outputFormat!=fmtZeroText)); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_header (currParams->outputFile, currParams->seq1, currParams->seq2); break; case fmtHspComp: case fmtInfStats: case fmtInfScores: inf_scores_format: ; // (do nothing) break; case fmtIdDist: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_header, outputFormat=%d", outputFormat); } if (currParams->dotplotFile != NULL) { name1 = name2 = NULL; if (currParams->seq1->partition.p == NULL) // sequence 1 is not partitioned name1 = (currParams->seq1->useFullNames)? currParams->seq1->header : currParams->seq1->shortHeader; if (currParams->seq2->partition.p == NULL) // sequence 2 is not partitioned name2 = (currParams->seq1->useFullNames)? currParams->seq2->header : currParams->seq2->shortHeader; if ((name1 == NULL) || (name1[0] == 0)) name1 = "seq1"; if ((name2 == NULL) || (name2[0] == 0)) name2 = "seq2"; if ((strcmp (name1, prevName1) != 0) || (strcmp (name2, prevName2) != 0)) { if (strcmp(currParams->dotplotKeys,genpafRDotplotScoreKeys) == 0) fprintf (currParams->dotplotFile, "%s\t%s\tscore\n", name1, name2); else fprintf (currParams->dotplotFile, "%s\t%s\n", name1, name2); strncpy (/*to*/ prevName1, /*from*/ name1, sizeof(prevNameBuff1)); strncpy (/*to*/ prevName2, /*from*/ name2, sizeof(prevNameBuff2)); } } }
void print_census (FILE* f, seq* _seq, census* cen, char delimiter) { seqpartition* sp; partition* nextPart; u32 nextIx; char* name; unspos offset; unspos pos; u32 count; if (cen == NULL) return; // simple print with no sequence names if (_seq == NULL) { for (pos=0 ; pos<cen->len ; pos++) { count = (cen->kind == 'B')? cen->count8 [pos] : (cen->kind == 'W')? cen->count16[pos] : cen->count32[pos]; if (count >= cen->maskThresh) fprintf(f, unsposFmt "%c%u\n", pos+1, delimiter, count); } return; } // print with same sequence name, for non-partitioned sequence sp = &_seq->partition; if (sp->p == NULL) { name = (_seq->useFullNames)? _seq->header : _seq->shortHeader; if ((name == NULL) || (name[0] == 0)) name = "seq1"; for (pos=0 ; pos<cen->len ; pos++) { count = (cen->kind == 'B')? cen->count8 [pos] : (cen->kind == 'W')? cen->count16[pos] : cen->count32[pos]; if (count >= cen->maskThresh) fprintf(f, "%s%c" unsposFmt "%c%u\n", name, delimiter, pos+1, delimiter, count); } return; } // print with sequence names, for partitioned sequence nextPart = sp->p; nextIx = 0; name = NULL; offset = 0; for (pos=0 ; pos<cen->len ; pos++) { if (pos == nextPart->sepBefore) { if (nextIx < sp->len) { name = &sp->pool[nextPart->header]; offset = nextPart->sepBefore + 1; nextPart++; nextIx++; } else name = NULL; } else if (name != NULL) { count = (cen->kind == 'B')? cen->count8 [pos] : (cen->kind == 'W')? cen->count16[pos] : cen->count32[pos]; if (count >= cen->maskThresh) fprintf(f, "%s%c" unsposFmt "%c%u\n", name, delimiter, pos+1-offset, delimiter, count); } else suicidef ("internal error in print_census\n"); } }
char* print_comment_open (void) { int outputFormat = currParams->outputFormat; char* commentPrefix = NULL; switch (outputFormat) { case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavText: case fmtLavInfScores: print_lav_comment_open (currParams->outputFile); break; case fmtGfa: case fmtGfaNoScore: commentPrefix = "#"; break; case fmtAxt: case fmtAxtComment: case fmtAxtGeneral: commentPrefix = "#"; break; case fmtMaf: case fmtMafComment: case fmtMafNoComment: fprintf (stderr, "WARNING. Output is not properly MAF format\n"); commentPrefix = "#"; break; case fmtSoftSam: case fmtSoftSamNoHeader: case fmtHardSam: case fmtHardSamNoHeader: fprintf (stderr, "WARNING. Output is not properly SAM format\n"); commentPrefix = "#"; break; case fmtCigar: fprintf (stderr, "WARNING. Output is not properly CIGAR format\n"); commentPrefix = "#"; break; case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: commentPrefix = "#"; break; case fmtText: case fmtZeroText: ; // (do nothing) break; case fmtHspComp: commentPrefix = "#"; break; case fmtDiffs: case fmtDiffsNoBlocks: ; // (do nothing) break; case fmtInfStats: commentPrefix = "#"; break; case fmtInfScores: commentPrefix = "#"; break; case fmtIdDist: commentPrefix = "#"; break; case fmtDeseed: commentPrefix = "#"; break; case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_comment_open, outputFormat=%d", outputFormat); } return commentPrefix; }
void print_match (unspos pos1, unspos pos2, unspos length, score s) // pos1 and pos2 are the positions of first character in the match, // .. (origin-0). { static u32 printsUntilFlush = matchFlushFrequency; int outputFormat = currParams->outputFormat; if ((currParams->searchLimit > 0) && (printedForQuery >= currParams->searchLimit)) return; printedForQuery++; if (!strandHeaderPrinted) { print_header (); strandHeaderPrinted = true; } if (infer_scores_dbgShowIdentity) { unspos numer, denom; u32 bin; segment_identity (currParams->seq1, pos1, currParams->seq2, pos2, length, &numer, &denom); bin = identity_bin (numer, denom); // nota bene: positions written as 1-based print_generic (currParams->outputFile, unsposSlashFmt " pct_identity=" unsposSlashFmt " (bin as " identityBinFormat ")", pos1+1, pos2+1, numer, denom, bin_to_identity (bin)); } switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, (outputFormat == fmtGfa)? s : 0); break; case fmtLav: case fmtLavComment: case fmtLavText: case fmtLavInfScores: print_lav_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s); if (outputFormat == fmtLavText) goto text_format; if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtLavScore: print_lavscore_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s); break; case fmtAxt: case fmtAxtComment: print_axt_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ outputFormat==fmtAxtComment, /* extras */ NULL); break; case fmtAxtGeneral: print_axt_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ false, /* extras */ currParams->outputInfo); break; case fmtMaf: case fmtMafNoComment: print_maf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ false); break; case fmtMafComment: print_maf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* comments */ true); break; case fmtSoftSam: case fmtSoftSamNoHeader: print_sam_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* softMasking */ true, currParams->samRGTags); break; case fmtHardSam: case fmtHardSamNoHeader: print_sam_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* softMasking */ false, currParams->samRGTags); break; case fmtCigar: print_cigar_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, /* withInfo */ true, /* markMismatches */ false, /* letterAfter */ false, /* hideSingles */ false, /* lowerCase */ false, /* withNewLine */ true); break; case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: print_genpaf_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->outputInfo); break; case fmtText: case fmtZeroText: text_format: print_text_align_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, (outputFormat!=fmtZeroText), currParams->textContext); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, (outputFormat == fmtDiffs), currParams->nIsAmbiguous); break; case fmtHspComp: print_match_composition (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->hitSeed, currParams->step); break; case fmtInfStats: infer_stats_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtInfScores: inf_scores_format: gather_stats_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtIdDist: identity_dist_from_match (currParams->seq1, pos1, currParams->seq2, pos2, length); break; case fmtDeseed: dump_match (currParams->outputFile, currParams->seq1, pos1, currParams->seq2, pos2, length); printf ("\n"); break; case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_match, outputFormat=%d", outputFormat); } if (currParams->dotplotFile != NULL) print_genpaf_match (currParams->dotplotFile, currParams->seq1, pos1, currParams->seq2, pos2, length, s, currParams->dotplotKeys); if (--printsUntilFlush == 0) { fflush (currParams->outputFile); printsUntilFlush = matchFlushFrequency; } }
void print_align_list (alignel* alignList) { int outputFormat = currParams->outputFormat; alignel* a; if ((currParams->searchLimit > 0) && (printedForQuery >= currParams->searchLimit)) return; printedForQuery++; if (!strandHeaderPrinted) { print_header (); strandHeaderPrinted = true; } if (infer_scores_dbgShowIdentity) { unspos numer, denom; u32 bin; for (a=alignList ; a!=NULL ; a=a->next) { alignment_identity (currParams->seq1, currParams->seq2, a, &numer, &denom); bin = identity_bin (numer, denom); // nota bene: positions written as 1-based print_generic (currParams->outputFile, unsposSlashFmt " pct_identity=" unsposSlashFmt " (bin as " identityBinFormat ")", a->beg1, a->beg2, numer, denom, bin_to_identity (bin)); } } switch (outputFormat) { case fmtGfa: case fmtGfaNoScore: print_gfa_align_list (currParams->outputFile, (outputFormat == fmtGfa)? currParams->scoring : NULL, alignList, currParams->seq1, currParams->seq2); break; case fmtLav: case fmtLavComment: case fmtLavScore: case fmtLavInfScores: print_lav_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2); if (outputFormat == fmtLavInfScores) goto inf_scores_format; break; case fmtLavText: for (a=alignList ; a!=NULL ; a=a->next) { print_lav_align (currParams->outputFile, a->seq1, a->beg1-1, a->end1, a->seq2, a->beg2-1, a->end2, a->script, a->s); print_text_align_align (currParams->outputFile, currParams->seq1, a->beg1-1, a->end1, currParams->seq2, a->beg2-1, a->end2, a->script, a->s, false, currParams->textContext); } break; case fmtAxt: case fmtAxtComment: print_axt_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* comments */ outputFormat==fmtAxtComment, /* extras */ NULL); break; case fmtAxtGeneral: print_axt_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* comments */ false, /* extras */ currParams->outputInfo); break; case fmtMaf: case fmtMafNoComment: print_maf_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* comments */ false); break; case fmtMafComment: print_maf_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* comments */ true); break; case fmtSoftSam: case fmtSoftSamNoHeader: print_sam_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* softMasking */ true, currParams->samRGTags); break; case fmtHardSam: case fmtHardSamNoHeader: print_sam_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* softMasking */ false, currParams->samRGTags); break; case fmtCigar: print_cigar_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, /* withInfo */ true, /* markMismatches */ false, /* letterAfter */ false, /* hideSingles */ false, /* lowerCase */ false, /* withNewLine */ true); break; case fmtGenpaf: case fmtGenpafNoHeader: case fmtGenpafNameHeader: case fmtGenpafBlast: case fmtGenpafBlastNoHeader: print_genpaf_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, currParams->outputInfo); break; case fmtText: case fmtZeroText: print_text_align_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, (outputFormat!=fmtZeroText), currParams->textContext); break; case fmtDiffs: case fmtDiffsNoBlocks: print_align_diffs_align_list (currParams->outputFile, alignList, currParams->seq1, currParams->seq2, (outputFormat == fmtDiffs), currParams->nIsAmbiguous); break; case fmtInfStats: infer_stats_from_align_list (alignList, currParams->seq1, currParams->seq2); break; case fmtInfScores: inf_scores_format: gather_stats_from_align_list (alignList, currParams->seq1, currParams->seq2); break; case fmtIdDist: identity_dist_from_align_list (alignList, currParams->seq1, currParams->seq2); break; case fmtHspComp: case fmtDeseed: case fmtNone: ; // (do nothing) break; default: suicidef ("internal error, in print_align_list, outputFormat=%d", outputFormat); } if (currParams->dotplotFile != NULL) print_genpaf_align_list_segments (currParams->dotplotFile, alignList, currParams->seq1, currParams->seq2, currParams->dotplotKeys, currParams->scoring); }