/***************************************************************** * * map a position on the anchor_bsp (anchor_pos) to a * position on the other_bsp. It is the reverse operation of * MapLocToAnchor * return -1 for failure * ******************************************************************/ Int4 MapAnchorToLoc(SeqAnnotPtr annot, Int4 anchor_pos, BioseqPtr anchor_bsp, BioseqPtr other_bsp) { Int2 type; Int4Ptr x_a, x; Uint2 num; SeqIdPtr anchor_id, other_id; SeqAlignPtr align; Int4 other_pos; if(annot == NULL || anchor_bsp == NULL || other_bsp == NULL) return -1; if(anchor_pos < 0 || anchor_pos > anchor_bsp->length-1) return -1; if(anchor_bsp == other_bsp) return anchor_pos; anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI); if(anchor_id == NULL) anchor_id = anchor_bsp->id; other_id = SeqIdFindBest(other_bsp->id, SEQID_GI); if(other_id == NULL) other_id = other_bsp->id; other_pos = -1; while(annot) { if(annot->type == 2) { type = GetEquivAlignType(annot); if(type == 1) /*this is consistent*/ { align = annot->data; if(get_anchor_coordinates(align, anchor_id, other_id, &x_a, &x, &num)) { if(num >= 10) other_pos = find_this_position_by_anchor (x, x_a, num, anchor_pos, other_bsp->length); MemFree(x_a); MemFree(x); if(other_pos != -1) return other_pos; } } } annot = annot->next; } return other_pos; }
void Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, Boolean accession_only, Boolean search_for_id) { char* seqid_buffer = NULL; Int4 gi = 0; Boolean numeric_id_type = FALSE; *buffer_ptr = NULL; if (sip == NULL) return; /* Check for ad hoc ID's generated by formatdb if the user does not provide any. */ if (search_for_id && (sip->choice != SEQID_GENERAL || StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID"))) { if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else if (accession_only) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH); } else if (ncbi_gi) { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &gi, &seqid_buffer); } else { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBestAccession(sip), &gi, &seqid_buffer); } } if (numeric_id_type && gi > 0) { seqid_buffer = (char*) malloc(16); sprintf(seqid_buffer, "%ld", (long) gi); } if (!seqid_buffer) { /* If it's still NULL make a last ditch effort to get info. */ char* title=NULL; Bioseq* bsp = BioseqLockById(sip); if (bsp) { if (BioseqGetTitle(bsp) != NULL) title = strdup(BioseqGetTitle(bsp)); else title = strdup("No definition line found"); } BioseqUnlock(bsp); if (title) /* Use first token as id. */ seqid_buffer = StringTokMT(title, " \t\n\r", &title); } *buffer_ptr = seqid_buffer; }
/********************************************************************** * * MapLocToAnchor(annot, slp, anchor) * map the current slp to a position on the anchor Bioseq * annot: Seq-annot that may contain the alignment of the consistent markers * slp: the current Bioseq * anchor_id: the Seq-id for the anchor Bioseq, that is the sequece map * ************************************************************************/ SeqLocPtr MapLocToAnchor(SeqAnnotPtr annot, SeqLocPtr slp, BioseqPtr anchor_bsp) { Int2 type; Int4Ptr x_a, x; Uint2 num; SeqIdPtr anchor_id; SeqLocPtr t_slp; Int4 start, stop; SeqAlignPtr align; if(annot == NULL || slp == NULL || anchor_bsp == NULL) return NULL; if(slp->choice != SEQLOC_PNT && slp->choice != SEQLOC_INT) return NULL; anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI); if(anchor_id == NULL) anchor_id = anchor_bsp->id; while(annot) { if(annot->type == 2) { type = GetEquivAlignType(annot); if(type == 1) /*this is consistent*/ { align = annot->data; if(!get_anchor_coordinates(align, anchor_id, SeqLocId(slp), &x_a, &x, &num)) return NULL; if(slp->choice == SEQLOC_INT) { start = find_this_position_by_anchor (x_a, x, num, SeqLocStart(slp), anchor_bsp->length); if(SeqLocStart(slp) != SeqLocStop(slp)) stop = find_this_position_by_anchor (x_a, x, num, SeqLocStop(slp), anchor_bsp->length); else stop = start; t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, anchor_id); } else { start = SeqLocStart(slp); start = find_this_position_by_anchor (x_a, x, num, start, anchor_bsp->length); t_slp = SeqLocPntNew(start, Seq_strand_plus, anchor_id, FALSE); } MemFree(x_a); MemFree(x); return t_slp; } } annot = annot->next; } return NULL; }
static void DoSuggestIntervals ( BioseqPtr bsp, Pointer userdata ) { CharPtr caret5, caret3; CSpeedFlagPtr cfp; Char id [64]; SeqLocPtr loc, slp; Boolean partial5, partial3; SeqAnnotPtr sap; SeqFeatPtr sfp; SeqIdPtr sip; Int4 start, stop; if (bsp == NULL) return; if (! ISA_aa (bsp->mol)) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return; sip = SeqIdFindBest (bsp->id, 0); if (sip == NULL) return; SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1); sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode); if (sap == NULL) return; if (sap->type == 1) { sfp = (SeqFeatPtr) sap->data; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { loc = sfp->location; if (loc != NULL) { fprintf (cfp->ofp, "%s\n", id); slp = SeqLocFindNext (loc, NULL); while (slp != NULL) { start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1; stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1; caret5 = ""; caret3 = ""; CheckSeqLocForPartial (slp, &partial5, &partial3); if (partial5) { caret5 = "<"; } if (partial3) { caret3 = ">"; } fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop); slp = SeqLocFindNext (loc, slp); } } } } SeqAnnotFree (sap); }
static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Char id_txt[255], id_txt2[255]; SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; ValNodePtr line_list = NULL, line_values; if (bsp == NULL) { return NULL; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENBANK || (sip->choice == SEQID_EMBL && sip_gb == NULL) || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) || (sip->choice == SEQID_DDBJ && sip_gb == NULL) || (sip->choice == SEQID_PIR && sip_gb == NULL)) { sip_gb = sip; } else if (sip->choice == SEQID_GI) { sip_gi = sip; } } if (sip_gb == NULL && sip_gi == NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); id_txt2[0] = 0; } else { if (sip_gb == NULL) { id_txt[0] = 0; } else { SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); } if (sip_gi == NULL) { id_txt2[0] = 0; } else { SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { line_values = NULL; ValNodeAddPointer (&line_values, 0, StringSave (id_txt)); if (want_gi) { ValNodeAddPointer (&line_values, 0, StringSave (id_txt2)); } ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list)); ValNodeAddPointer (&line_list, 0, line_values); } return line_list; }
extern CharPtr ErrorDescString (SeqIdPtr sip) { SeqIdPtr bestid; CharPtr errbuf; bestid = SeqIdFindBest(sip, SEQID_GI); errbuf = (CharPtr) MemNew ((size_t) (sizeof (Char) * 32)); SeqIdWrite (bestid, errbuf, PRINTID_FASTA_LONG, 32-1); return errbuf; }
static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Char id_txt[255], id_txt2[255]; SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; if (fp == NULL || bsp == NULL) { return; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENBANK || (sip->choice == SEQID_EMBL && sip_gb == NULL) || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) || (sip->choice == SEQID_DDBJ && sip_gb == NULL) || (sip->choice == SEQID_PIR && sip_gb == NULL)) { sip_gb = sip; } else if (sip->choice == SEQID_GI) { sip_gi = sip; } } if (sip_gb == NULL && sip_gi == NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); id_txt2[0] = 0; } else { if (sip_gb == NULL) { id_txt[0] = 0; } else { SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); } if (sip_gi == NULL) { id_txt2[0] = 0; } else { SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { if (want_gi) { fprintf (fp, "%s\t%s", id_txt, id_txt2); } else { fprintf (fp, "%s", id_txt); } PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list); fprintf (fp, "\n"); } }
/******************************************************************************* Function : DDV_DrawSequenceName() Purpose : draw the name of the sequence (left column of the DDV panel) Parameters : GrData; graphical data (font size, etc) ScaleStyle;style of the ParaG scale top, left; coord to start the draw Return value : none *******************************************************************************/ static void DDV_DrawSequenceName(UnDViewerGraphDataPtr GrData,ParaGPtr pgp, Int2 top,Int2 left,Int4 cur_row,Int4 CurEditRow,Int4 CurMasterRow) { SeqIdPtr sip = NULL; RecT rc; Int2 x,y,decal=1,size;/*text position/size*/ Char szAccess[21]; BioseqPtr bsp; /*get a name*/ bsp = BioseqLockById(pgp->sip); if(bsp) { sip = SeqIdFindBestAccession(bsp->id); BioseqUnlock(bsp); } if (!sip) sip = SeqIdFindBest(pgp->sip, 0); SeqIdWrite(sip, szAccess,PRINTID_TEXTID_ACCESSION, 20); /*compute position*/ if (pgp->ScaleStyle==SCALE_POS_TOP) decal++; /*draw name*/ size=StringWidth(szAccess); x=left/*-GrData->udv_scale.cxLeftScale*/-size; y=top+decal*GrData->udv_font.LineHeight; MoveTo(x,y); if (cur_row==CurEditRow){ Magenta(); } PaintString (szAccess); if (cur_row==CurMasterRow){ Blue(); MoveTo(x,y); LineTo(x+size,y); } /*draw a little box (for selection a full sequence)*/ left+=GrData->udv_font.cxChar; top+=GrData->udv_font.cxChar/2; LoadRect(&rc,left,top,left+GrData->udv_font.cxChar, top+GrData->udv_font.cxChar); Blue(); PaintOval(&rc); Black(); }
static void AnnotateBestOrf ( BioseqPtr bsp, Int2 genCode, Boolean altstart ) { CdRegionPtr crp; Int2 i, best, idx; OrfData od; ProtRefPtr prp; SeqFeatPtr sfp; SeqInt sint; TransTablePtr tbl; ValNode vn; SeqFeatXrefPtr xref; if (bsp == NULL) return; for (i = 0; i < 6; i++) { od.curlen [i] = INT4_MIN; od.bestlen [i] = 0; od.currstart [i] = 0; od.beststart [i] = 0; od.sublen [i] = INT4_MIN; od.inorf [i] = FALSE; } od.altstart = altstart; /* use simultaneous 6-frame translation finite state machine */ tbl = TransTableNew (genCode); if (tbl != NULL) { TransTableProcessBioseq (tbl, LookForOrfs, (Pointer) &od, bsp); } TransTableFree (tbl); best = -1; idx = -1; for (i = 0; i < 6; i++) { if (od.bestlen [i] > best) { best = od.bestlen [i]; idx = i; } } if (idx == -1) return; /* make feature location on largest ORF */ if (idx < 3) { MemSet ((Pointer) &sint, 0, sizeof (SeqInt)); sint.from = od.beststart [idx] + idx; sint.to = sint.from + (od.bestlen [idx]) * 3 + 2; sint.id = SeqIdFindBest (bsp->id, 0); sint.strand = Seq_strand_plus; vn.choice = SEQLOC_INT; vn.extended = 0; vn.data.ptrvalue = (Pointer) &sint; vn.next = NULL; } else { MemSet ((Pointer) &sint, 0, sizeof (SeqInt)); sint.from = od.beststart [idx] + idx - 3; sint.to = sint.from + (od.bestlen [idx]) * 3 + 2; sint.id = SeqIdFindBest (bsp->id, 0); sint.strand = Seq_strand_minus; vn.choice = SEQLOC_INT; vn.extended = 0; vn.data.ptrvalue = (Pointer) &sint; vn.next = NULL; } /* make CDS feature with unknown product */ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_CDREGION, &vn); if (sfp == NULL) return; crp = CreateNewCdRgn (1, FALSE, genCode); if (crp == NULL) return; crp->frame = 1; sfp->data.value.ptrvalue = (Pointer) crp; prp = ProtRefNew (); if (prp == NULL) return; xref = SeqFeatXrefNew (); if (xref == NULL) return; xref->data.choice = SEQFEAT_PROT; xref->data.value.ptrvalue = (Pointer) prp; xref->next = sfp->xref; sfp->xref = xref; prp->name = ValNodeCopyStr (NULL, 0, "unknown"); }
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { SeqPortPtr spp; SeqIdPtr bestid; Uint1 repr, code, residue; CharPtr tmp, title; long outlen, outmax; char localid[256], *sp; /* !!! this may be called several times for a single sequence because SeqEntryExplore looks for parts and joins them... assume seq, seqid, seqlen may contain data (or NULL) */ if (bsp == NULL) return; repr = Bioseq_repr(bsp); if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; (*seqnum)++; if (!(whichSeq == *seqnum || whichSeq == 0)) return; bestid = SeqIdFindBest(bsp->id, (Uint1) 0); title = BioseqGetTitle(bsp); if (idonly) { sprintf(localid, " %d) ", *seqnum); tmp= localid + strlen(localid)-1; } else { strcpy(localid," "); tmp= localid; } tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); tmp = StringMove(tmp, " "); StringNCpy(tmp, title, 200); /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ /* < seqid is fixed storage */ /* strcpy( *seqid, localid); */ /* < seqid is variable sized */ outmax= strlen(localid) + 3; if (*seqid==NULL) { *seqid= (char*) malloc(outmax); if (*seqid==NULL) return; strcpy(*seqid, localid); } else { outmax += strlen(*seqid) + 2; *seqid= (char*) realloc( *seqid, outmax); if (*seqid==NULL) return; if (!idonly) strcat(*seqid, "; "); strcat(*seqid, localid); } if (idonly) { strcat(*seqid,"\n"); return; } if (ISA_na(bsp->mol)) code = Seq_code_iupacna; else code = Seq_code_iupacaa; spp = SeqPortNew(bsp, 0, -1, 0, code); SeqPortSeek(spp, 0, SEEK_SET); sp= *seq; if (sp==NULL) { /* this is always true now !? */ outlen= 0; outmax= 500; sp= (char*) malloc(outmax); } else { outlen= strlen(sp); outmax= outlen + 500; sp= (char*) realloc( sp, outmax); } if (sp==NULL) return; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (outlen>=outmax) { outmax= outlen + 500; sp= (char*) realloc(sp, outmax); if (sp==NULL) return; } sp[outlen++] = residue; } sp= (char*) realloc(sp, outlen+1); if (sp!=NULL) sp[outlen]= '\0'; *seq= sp; *seqlen= outlen; SeqPortFree(spp); return; }
static int LIBCALLBACK MegaBlastPrintEndpoints(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; CharPtr subject_descr; SeqIdPtr sip, query_id; CharPtr query_buffer, title; CharPtr subject_buffer; Int4 query_length, q_start, q_end, q_shift=0, s_shift=0; Int4 subject_end; Int4 hsp_index; Boolean numeric_sip_type = FALSE; BLAST_HSPPtr hsp; Int2 context; Char context_sign; Int4 subject_gi, score; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } if (search->rdfp) readdb_get_descriptor(search->rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue; if (db_tag->db && (!StringCmp(db_tag->db, "THC") || !StringICmp(db_tag->db, "TI")) && db_tag->tag->id != 0) { subject_buffer = (CharPtr) Malloc(16); sprintf(subject_buffer, "%ld", (long) db_tag->tag->id); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } } search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt; /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!search->rdfp && search->query_slp->next) { s_shift = SeqLocStart(search->query_slp->next); subject_end = SeqLocStop(search->query_slp->next); } else { s_shift = 0; subject_end = readdb_get_sequence_length(search->rdfp, search->subject_id); } /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) continue; /* Correct query context is already found in BlastGetNonSumStatsEvalue */ context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (hsp->context) { hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length + 1; context_sign = '-'; } else { hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1; if (hsp->query.end > query_length) { hsp->subject.end -= (hsp->query.end - query_length); hsp->query.end = query_length; } context_sign = '+'; } if (hsp->subject.end > subject_end) { hsp->query.end -= (hsp->subject.end - subject_end); hsp->subject.end = subject_end; } hsp->subject.offset++; query_buffer = NULL; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; if (context_sign == '+') { q_start = hsp->query.offset; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset; } /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } hsp->subject.offset += s_shift; hsp->subject.end += s_shift; if (numeric_sip_type) fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); else fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", subject_buffer, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); MemFree(query_buffer); } if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); sip = SeqIdSetFree(sip); return 0; }
static int LIBCALLBACK MegaBlastPrintSegments(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; ReadDBFILEPtr rdfp = search->rdfp; BLAST_HSPPtr hsp; Int4 i, subject_gi; Int2 context; CharPtr query_buffer, title; SeqIdPtr sip, query_id; Int4 hsp_index, score; Uint1Ptr query_seq, subject_seq = NULL; FloatHi perc_ident; Char strand; GapXEditScriptPtr esp; Int4 q_start, q_end, s_start, s_end, query_length, numseg; Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0; Int4Ptr length, start; Uint1Ptr strands; CharPtr subject_descr, subject_buffer, buffer; Char tmp_buffer[BUFFER_LENGTH]; Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH; Boolean numeric_sip_type = FALSE; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } subject_seq = search->subject->sequence_start + 1; if (rdfp) readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH); /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!rdfp && search->query_slp->next) s_shift = SeqLocStart(search->query_slp->next); /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) { continue; } context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; q_off = hsp->query.offset; if (hsp->context) { strand = '-'; hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length; } else { strand = '+'; hsp->query.end = hsp->query.offset + hsp->query.length; } if (strand == '+') { q_start = hsp->query.offset + 1; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset + 1; } s_start = hsp->subject.offset + 1; s_end = hsp->subject.offset + hsp->subject.length; /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } s_start += s_shift; s_end += s_shift; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (numeric_sip_type) sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", (long) subject_gi, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); else sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", subject_buffer, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); buffer_size = StringLen(buffer); query_seq = search->context[context].query->sequence; esp = hsp->gap_info->esp; for (numseg=0; esp; esp = esp->next, numseg++); GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg, &start, &length, &strands, &q_off, &hsp->subject.offset); if (start[0] < 0) { length[0] += start[0]; start[1] -= start[0]; start[0] = 0; } if (start[2*(numseg-1)] + length[numseg-1] > query_length) length[numseg-1] = query_length - start[2*(numseg-1)]; total_ident = 0; align_length = 0; for (i=0; i<numseg; i++) { align_length += length[i]; if (strand == '+') { q_start = start[2*i] + 1; q_end = q_start + length[i] - 1; } else { q_start = query_length - start[2*i]; q_end = q_start - length[i] + 1; } if (start[2*i] != -1 && start[2*i+1] != -1) { num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, start[2*i], start[2*i+1], length[i], FALSE); perc_ident = (FloatHi) num_ident / length[i] * 100; total_ident += num_ident; sprintf(tmp_buffer, " l %d %d %d %d (%.0f)\n", start[2*i+1]+1, q_start, start[2*i+1]+length[i], q_end, perc_ident); if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) { max_buffer_size *= 2; buffer = (CharPtr) Realloc(buffer, max_buffer_size); } StringCat(buffer, tmp_buffer); } } if (100*total_ident >= align_length*search->pbp->mb_params->perc_identity) { StringCat(buffer, "}"); fprintf(fp, "%s\n", buffer); } MemFree(start); MemFree(length); MemFree(strands); MemFree(query_buffer); } /* End loop on hsp's */ if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); MemFree(buffer); sip = SeqIdSetFree(sip); fflush(fp); return 1; }