void Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, Boolean accession_only, Boolean search_for_id) { char* seqid_buffer = NULL; Int4 gi = 0; Boolean numeric_id_type = FALSE; *buffer_ptr = NULL; if (sip == NULL) return; /* Check for ad hoc ID's generated by formatdb if the user does not provide any. */ if (search_for_id && (sip->choice != SEQID_GENERAL || StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID"))) { if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else if (accession_only) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH); } else if (ncbi_gi) { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &gi, &seqid_buffer); } else { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBestAccession(sip), &gi, &seqid_buffer); } } if (numeric_id_type && gi > 0) { seqid_buffer = (char*) malloc(16); sprintf(seqid_buffer, "%ld", (long) gi); } if (!seqid_buffer) { /* If it's still NULL make a last ditch effort to get info. */ char* title=NULL; Bioseq* bsp = BioseqLockById(sip); if (bsp) { if (BioseqGetTitle(bsp) != NULL) title = strdup(BioseqGetTitle(bsp)); else title = strdup("No definition line found"); } BioseqUnlock(bsp); if (title) /* Use first token as id. */ seqid_buffer = StringTokMT(title, " \t\n\r", &title); } *buffer_ptr = seqid_buffer; }
static int LIBCALLBACK MegaBlastPrintEndpoints(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; CharPtr subject_descr; SeqIdPtr sip, query_id; CharPtr query_buffer, title; CharPtr subject_buffer; Int4 query_length, q_start, q_end, q_shift=0, s_shift=0; Int4 subject_end; Int4 hsp_index; Boolean numeric_sip_type = FALSE; BLAST_HSPPtr hsp; Int2 context; Char context_sign; Int4 subject_gi, score; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } if (search->rdfp) readdb_get_descriptor(search->rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue; if (db_tag->db && (!StringCmp(db_tag->db, "THC") || !StringICmp(db_tag->db, "TI")) && db_tag->tag->id != 0) { subject_buffer = (CharPtr) Malloc(16); sprintf(subject_buffer, "%ld", (long) db_tag->tag->id); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } } search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt; /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!search->rdfp && search->query_slp->next) { s_shift = SeqLocStart(search->query_slp->next); subject_end = SeqLocStop(search->query_slp->next); } else { s_shift = 0; subject_end = readdb_get_sequence_length(search->rdfp, search->subject_id); } /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) continue; /* Correct query context is already found in BlastGetNonSumStatsEvalue */ context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (hsp->context) { hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length + 1; context_sign = '-'; } else { hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1; if (hsp->query.end > query_length) { hsp->subject.end -= (hsp->query.end - query_length); hsp->query.end = query_length; } context_sign = '+'; } if (hsp->subject.end > subject_end) { hsp->query.end -= (hsp->subject.end - subject_end); hsp->subject.end = subject_end; } hsp->subject.offset++; query_buffer = NULL; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; if (context_sign == '+') { q_start = hsp->query.offset; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset; } /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } hsp->subject.offset += s_shift; hsp->subject.end += s_shift; if (numeric_sip_type) fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); else fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", subject_buffer, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); MemFree(query_buffer); } if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); sip = SeqIdSetFree(sip); return 0; }
static int LIBCALLBACK MegaBlastPrintSegments(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; ReadDBFILEPtr rdfp = search->rdfp; BLAST_HSPPtr hsp; Int4 i, subject_gi; Int2 context; CharPtr query_buffer, title; SeqIdPtr sip, query_id; Int4 hsp_index, score; Uint1Ptr query_seq, subject_seq = NULL; FloatHi perc_ident; Char strand; GapXEditScriptPtr esp; Int4 q_start, q_end, s_start, s_end, query_length, numseg; Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0; Int4Ptr length, start; Uint1Ptr strands; CharPtr subject_descr, subject_buffer, buffer; Char tmp_buffer[BUFFER_LENGTH]; Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH; Boolean numeric_sip_type = FALSE; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } subject_seq = search->subject->sequence_start + 1; if (rdfp) readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH); /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!rdfp && search->query_slp->next) s_shift = SeqLocStart(search->query_slp->next); /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) { continue; } context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; q_off = hsp->query.offset; if (hsp->context) { strand = '-'; hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length; } else { strand = '+'; hsp->query.end = hsp->query.offset + hsp->query.length; } if (strand == '+') { q_start = hsp->query.offset + 1; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset + 1; } s_start = hsp->subject.offset + 1; s_end = hsp->subject.offset + hsp->subject.length; /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } s_start += s_shift; s_end += s_shift; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (numeric_sip_type) sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", (long) subject_gi, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); else sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", subject_buffer, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); buffer_size = StringLen(buffer); query_seq = search->context[context].query->sequence; esp = hsp->gap_info->esp; for (numseg=0; esp; esp = esp->next, numseg++); GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg, &start, &length, &strands, &q_off, &hsp->subject.offset); if (start[0] < 0) { length[0] += start[0]; start[1] -= start[0]; start[0] = 0; } if (start[2*(numseg-1)] + length[numseg-1] > query_length) length[numseg-1] = query_length - start[2*(numseg-1)]; total_ident = 0; align_length = 0; for (i=0; i<numseg; i++) { align_length += length[i]; if (strand == '+') { q_start = start[2*i] + 1; q_end = q_start + length[i] - 1; } else { q_start = query_length - start[2*i]; q_end = q_start - length[i] + 1; } if (start[2*i] != -1 && start[2*i+1] != -1) { num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, start[2*i], start[2*i+1], length[i], FALSE); perc_ident = (FloatHi) num_ident / length[i] * 100; total_ident += num_ident; sprintf(tmp_buffer, " l %d %d %d %d (%.0f)\n", start[2*i+1]+1, q_start, start[2*i+1]+length[i], q_end, perc_ident); if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) { max_buffer_size *= 2; buffer = (CharPtr) Realloc(buffer, max_buffer_size); } StringCat(buffer, tmp_buffer); } } if (100*total_ident >= align_length*search->pbp->mb_params->perc_identity) { StringCat(buffer, "}"); fprintf(fp, "%s\n", buffer); } MemFree(start); MemFree(length); MemFree(strands); MemFree(query_buffer); } /* End loop on hsp's */ if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); MemFree(buffer); sip = SeqIdSetFree(sip); fflush(fp); return 1; }