static BioseqPtr DOT_GetBspFromGIOrAcc(CharPtr str) { BioseqPtr bsp; Int4 gi; Char ptr; SeqIdPtr sip; ValNode vn; LinkSetPtr lsp; Int4 uid; str = TrimSpacesAroundString(str); ptr = *str; if (IS_ALPHA(ptr)) /* accession */ { sip = SeqIdFromAccessionDotVersion(str); bsp = BioseqLockById(sip); } else /* it's a GI */ { gi = atoi(str); vn.choice = SEQID_GI; vn.data.intvalue = gi; vn.next = NULL; bsp = BioseqLockById(&vn); } return bsp; }
extern DustRegionPtr DustSeqLoc (SeqLocPtr slp, DustDataPtr ddp) { BioseqPtr bsp; Int4 start, end; DustRegionPtr drp; if (slp == NULL || ddp == NULL) return NULL; if (slp->choice != SEQLOC_INT) { ErrPostEx (SEV_ERROR, 2, 1, "Will only dust a single SeqLoc interval"); ErrShow (); return NULL; } if ((bsp = BioseqLockById (SeqLocId (slp))) == NULL) { ErrPostEx (SEV_ERROR, 2, 5, "Bioseq lock failure"); ErrShow (); return NULL; } start = SeqLocStart (slp); end = SeqLocStop (slp); drp = DustBioseq (bsp, start, end, ddp); BioseqUnlock (bsp); return drp; }
void Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, Boolean accession_only, Boolean search_for_id) { char* seqid_buffer = NULL; Int4 gi = 0; Boolean numeric_id_type = FALSE; *buffer_ptr = NULL; if (sip == NULL) return; /* Check for ad hoc ID's generated by formatdb if the user does not provide any. */ if (search_for_id && (sip->choice != SEQID_GENERAL || StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID"))) { if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else if (accession_only) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH); } else if (ncbi_gi) { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &gi, &seqid_buffer); } else { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBestAccession(sip), &gi, &seqid_buffer); } } if (numeric_id_type && gi > 0) { seqid_buffer = (char*) malloc(16); sprintf(seqid_buffer, "%ld", (long) gi); } if (!seqid_buffer) { /* If it's still NULL make a last ditch effort to get info. */ char* title=NULL; Bioseq* bsp = BioseqLockById(sip); if (bsp) { if (BioseqGetTitle(bsp) != NULL) title = strdup(BioseqGetTitle(bsp)); else title = strdup("No definition line found"); } BioseqUnlock(bsp); if (title) /* Use first token as id. */ seqid_buffer = StringTokMT(title, " \t\n\r", &title); } *buffer_ptr = seqid_buffer; }
/******************************************************************************* Function : DDV_DrawSequenceName() Purpose : draw the name of the sequence (left column of the DDV panel) Parameters : GrData; graphical data (font size, etc) ScaleStyle;style of the ParaG scale top, left; coord to start the draw Return value : none *******************************************************************************/ static void DDV_DrawSequenceName(UnDViewerGraphDataPtr GrData,ParaGPtr pgp, Int2 top,Int2 left,Int4 cur_row,Int4 CurEditRow,Int4 CurMasterRow) { SeqIdPtr sip = NULL; RecT rc; Int2 x,y,decal=1,size;/*text position/size*/ Char szAccess[21]; BioseqPtr bsp; /*get a name*/ bsp = BioseqLockById(pgp->sip); if(bsp) { sip = SeqIdFindBestAccession(bsp->id); BioseqUnlock(bsp); } if (!sip) sip = SeqIdFindBest(pgp->sip, 0); SeqIdWrite(sip, szAccess,PRINTID_TEXTID_ACCESSION, 20); /*compute position*/ if (pgp->ScaleStyle==SCALE_POS_TOP) decal++; /*draw name*/ size=StringWidth(szAccess); x=left/*-GrData->udv_scale.cxLeftScale*/-size; y=top+decal*GrData->udv_font.LineHeight; MoveTo(x,y); if (cur_row==CurEditRow){ Magenta(); } PaintString (szAccess); if (cur_row==CurMasterRow){ Blue(); MoveTo(x,y); LineTo(x+size,y); } /*draw a little box (for selection a full sequence)*/ left+=GrData->udv_font.cxChar; top+=GrData->udv_font.cxChar/2; LoadRect(&rc,left,top,left+GrData->udv_font.cxChar, top+GrData->udv_font.cxChar); Blue(); PaintOval(&rc); Black(); }
/** Prints out the description of a query sequence, along * with notification that the query has no hits * @param slp The query Seq-loc * @param format_options Options for formatting * @param outfp File to which the output will be directed */ static void s_AcknowledgeEmptyResults(SeqLoc *slp, BlastFormattingOptions* format_options, const BlastFormattingInfo* format_info, FILE *outfp) { Bioseq *bsp = BioseqLockById(SeqLocId(slp)); if (format_info->head_on_every_query == TRUE) BLAST_PrintOutputHeader(format_info); init_buff_ex(70); AcknowledgeBlastQuery(bsp, 70, outfp, format_options->believe_query, format_options->html); free_buff(); BioseqUnlock(bsp); if (format_info->head_on_every_query == TRUE) { s_BLAST_PrintDatabaseInfo(format_info); fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n"); } fprintf(outfp, " ***** No hits found ******\n\n\n"); }
static BioseqPtr BioseqFromAccession(CharPtr accver, Boolean is_na) { CharPtr accession, version_str; Int4 version=0, gi, number; SeqIdPtr sip = NULL; TextSeqIdPtr tsip; PDBSeqIdPtr psip; BioseqPtr bsp = NULL, bsp_tmp = NULL; SeqPortPtr spp; Int2 retval, buf_length=512; Uint1 buf[512]; char* defline = NULL; char* dummy_ptr = NULL; if (!ID1BioseqFetchEnable ("bl2seq", TRUE)) ErrPostEx(SEV_FATAL, 1, 0, "Entrez access interface currently unavailable\n"); if (!IS_DIGIT(*accver)) { accession = StringTokMT(accver, ".", &version_str); if (version_str) version = atoi(version_str); if((sip = ValNodeNew (NULL)) == NULL) return NULL; if((tsip = TextSeqIdNew ()) == NULL) return NULL; tsip->accession = StringSave(accession); tsip->version = version; /* GenBank, EMBL, and DDBJ. */ sip->choice = SEQID_GENBANK; sip->data.ptrvalue = (Pointer) tsip; gi = ID1FindSeqId (sip); if (gi == 0) { /* SwissProt. */ sip->choice = SEQID_SWISSPROT; gi = ID1FindSeqId (sip); } if (gi == 0) { /* PIR */ sip->choice = SEQID_PIR; gi = ID1FindSeqId (sip); } if (gi == 0) { /* PRF */ sip->choice = SEQID_PRF; gi = ID1FindSeqId (sip); } if (gi == 0) { /* OTHER, probably 'ref' */ sip->choice = SEQID_OTHER; gi = ID1FindSeqId (sip); } if(gi == 0) { /* OK. We failed to find gi using string as TextSeqId. Now trying last time - with PDBSeqIdPtr */ if((psip = PDBSeqIdNew()) == NULL) return NULL; sip->choice = SEQID_PDB; tsip = TextSeqIdFree(tsip); sip->data.ptrvalue = psip; psip->mol = accession; psip->chain = version; gi = ID1FindSeqId (sip); } if (gi == 0) { ErrPostEx(SEV_WARNING, 0, 0, "Sequence %s not found\n", accver); return NULL; } sip = SeqIdFree(sip); } else gi = atoi(accver); ID1BioseqFetchDisable(); if (gi > 0) { /* First attempt to retrieve Bioseq from BLAST databases. */ char* db_name = (is_na ? "nucl_dbs" : "prot_dbs"); ValNodeAddInt(&sip, SEQID_GI, gi); ReadDBBioseqFetchEnable ("bl2seq", db_name, is_na, TRUE); bsp_tmp = BioseqLockById(sip); ReadDBBioseqFetchDisable(); if (!bsp_tmp) { /* Try ID1 again as a last resort. */ ID1BioseqFetchEnable("bl2seq", TRUE); bsp_tmp = BioseqLockById(sip); ID1BioseqFetchDisable(); } sip = SeqIdFree(sip); } if (!bsp_tmp) { ErrPostEx(SEV_WARNING, 0, 0, "Gi %ld not found", gi); return NULL; } if (ISA_na(bsp_tmp->mol) != is_na) { BioseqUnlock(bsp_tmp); if (is_na) ErrPostEx(SEV_FATAL, 1, 0, "%s is a protein sequence, program requires nucleotide", accver); else ErrPostEx(SEV_FATAL, 1, 0, "%s is a nucleotide sequence, program requires protein", accver); return NULL; } bsp = AsnIoMemCopy(bsp_tmp, (AsnReadFunc) BioseqAsnRead, (AsnWriteFunc) BioseqAsnWrite); SeqMgrDeleteFromBioseqIndex(bsp_tmp); BioseqUnlock(bsp_tmp); BioseqPack(bsp); return bsp; }
/********************************************************************* * * make_cds_paragraph(sfp, aa_start, aa_stop) * return a buffer for the display of 3-codon under one amino * acid format. It also includes the new line characters * This is what Jonathan K. desires to have for the sequin * doc object * aa_start, aa_stop: start and stop in the amino acid sequence * *********************************************************************/ NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr pbsp; SeqPortPtr spp; ValNodePtr cvp_node, curr; CodonVectorPtr cvp; CharPtr docbuf = NULL; Int4 num, buf_size; Uint1 residue; Char p_name[30]; Int4 space_len, i; CharPtr buf; Int4 pos; Int4 max_len = 150; Boolean extra_space; if(sfp == NULL || sfp->data.choice !=3) return NULL; if(sfp->product == NULL) return NULL; pbsp = BioseqLockById(SeqLocId(sfp->product)); if(pbsp == NULL) return NULL; cvp_node = aa_to_codon(sfp, aa_start, aa_stop); num = 1; for(curr = cvp_node; curr !=NULL; curr = curr->next) num +=3; buf_size = num * max_len; /* #ifdef WIN_16 if(buf_size > 10000) { Message(MSG_ERROR, "Can not allocate enough space "); return NULL; } #endif */ docbuf = MemNew((size_t)(buf_size) * sizeof(Char)); MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE); /*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/ pos = 0; pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, FALSE, B_SPACE, POS_SPACE); /*print the amino acid sequence into buffer*/ spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa); while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF ) docbuf[pos++] = residue; docbuf[pos++] = '\n'; SeqPortFree(spp); for(curr = cvp_node; curr !=NULL; curr = curr->next) { cvp = curr->data.ptrvalue; SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10); extra_space = (cvp->aa_index == 0); for(i=0; i<3; ++i) { space_len = cvp->aa_index; buf = cvp->buf[i] + cvp->aa_index; if(i == cvp->frame) { pos+= print_label_to_buffer(docbuf+pos, p_name, cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE); } else pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 0, extra_space, FALSE, B_SPACE, POS_SPACE); sprintf(docbuf+pos, "%s\n", buf); pos += (StringLen(buf) +1); } } docbuf[pos++] = '\n'; docbuf[pos] = '\0'; free_cvp_list(cvp_node); BioseqUnlock(pbsp); return docbuf; }
/****************************************************************** * * aa_to_codon(sfp, aa_start, aa_stop) * generate a list of CodonVecotr to show the codons of an * amino acid sequence * sfp: the Seq-feat for cds * aa_start: the start position of protein sequence * aa_stop the stop position of protein sequence * ******************************************************************/ NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr bsp; Int4 frame_offset, start_offset; SeqLocPtr slp = NULL; SeqLocPtr cdloc; CdRegionPtr crp; Uint1 frame; Boolean is_end; /**is the end for process reached?**/ Int4 p_start=0, p_stop=0; /**protein start & stop in defined corresponding CdRegion Seq-loc**/ Int4 line_len; Int4 cur_pos; /**current protein position in process**/ Int4 cd_len; /**length of the cDNA for the coding region**/ Int2 i, j; Int2 k, n; CharPtr PNTR buf; Boolean is_new; /**Is cur_pos at the begin of new Seq-loc?**/ CharPtr temp; SeqPortPtr spp; Uint1 residue; Boolean end_partial; Int4 d_start, seq_pos; Int2 pos; ValNodePtr head= NULL; CodonVectorPtr cvp; Boolean prt_stop_codon; Uint2 exon; if(sfp->data.choice !=3) return NULL; crp = sfp->data.value.ptrvalue; if(!crp) return NULL; frame = crp->frame; cdloc = sfp->location; if(cdloc == NULL ) return NULL; if(frame>0) frame_offset = frame-1; else frame_offset = 0; start_offset = frame_offset; prt_stop_codon = (aa_stop == SeqLocStop(sfp->product)); line_len = (aa_stop - aa_start + 1) + 1; /* +1 for the possible partial start codon*/ if(prt_stop_codon)/*can be either as a stop codon or partial stop*/ ++line_len; buf = MemNew((size_t)3 * sizeof(CharPtr)); for(i =0; i<3; ++i) buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char)); cur_pos= aa_start; cd_len = 0; is_end = FALSE; p_start = 0; slp = NULL; exon = 0; while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL)) { ++exon; cd_len += SeqLocLen(slp); end_partial = ((cd_len - start_offset)%3 != 0); p_stop = (cd_len - start_offset)/3 -1; if(end_partial) ++p_stop; if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial)) { p_stop = aa_stop; /**check if the end is reached**/ is_end = TRUE; } if(p_stop >= cur_pos) /*get the exon*/ { bsp = BioseqLockById(SeqLocId(slp)); if(bsp) { is_new = (p_start == cur_pos); /*start a new exon?*/ cvp = MemNew(sizeof(CodonVector)); cvp->sip = SeqIdDup(find_sip(bsp->id)); cvp->strand = SeqLocStrand(slp); cvp->exonCount = exon; if(is_new) { if(frame_offset == 0) cvp->frame = 0; else cvp->frame = 3- (Uint1)frame_offset; } else cvp->frame = 0; if(cur_pos==0 && frame_offset > 0) /*partial start codon*/ cvp->aa_index = 0; else cvp->aa_index = 1; if(is_new) /**special case of the first partial**/ d_start = SeqLocStart(slp); else { if(frame_offset && p_start >0) ++p_start; d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset; } /**p_start is the start position of aa in the current Seq-loc cur_pos is the current aa that is in process. The offset will help to located the position on the DNA Seq-loc for translation d_start is the position of the starting DNA in the coordinates of DNA segment, used for mark the sequence **/ seq_pos = d_start - SeqLocStart(slp); /**the pos in spp**/ if(SeqLocStrand(slp)== Seq_strand_minus) d_start = SeqLocStop(slp) - seq_pos; cvp->dna_pos = d_start; n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index; /*position in buffer*/ for(i =0; i<3; ++i) make_empty(buf[i], (Int2)line_len); spp = SeqPortNewByLoc(slp, Seq_code_iupacna); SeqPortSeek(spp, seq_pos, SEEK_SET); /**store the partial codons**/ if(is_new && frame_offset > 0) { k = (Int2)frame_offset; while(k > 0) { residue = SeqPortGetResidue(spp); temp = buf[3-k]; /**the position**/ pos = n; temp[pos] = TO_LOWER(residue); --k; } ++n; if(cur_pos!=0) ++cur_pos; } /**load the codons**/ k =0; while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop) { j= (Uint1)k%3; temp = buf[j]; temp[n] = TO_LOWER(residue); if(j ==2) { /**the last base**/ ++n; if(!prt_stop_codon|| !is_end) /*for the last codon*/ /**prt_end controls to print the whole loc**/ ++cur_pos; } ++k; } /**end of while**/ SeqPortFree(spp); for(i =0; i<3; ++i) cvp->buf[i] = StringSave(buf[i]); ValNodeAddPointer(&head, 0, (Pointer)cvp); BioseqUnlock(bsp); }/*end of if(bsp)*/ }/**end of if for matched intervals**/ if(end_partial) p_start = p_stop; else p_start = p_stop +1; frame_offset = (cd_len - start_offset)%3; if(frame_offset >0) frame_offset = 3-frame_offset; }/**end of while(slp && !is_end) **/ for(i=0; i<3; ++i) MemFree(buf[i]); MemFree(buf); return head; }
SeqLocPtr SeqLocDust (SeqLocPtr this_slp, Int2 level, Int2 window, Int2 minwin, Int2 linker) { SeqLocPtr next_slp, slp = NULL; ValNodePtr vnp = NULL; SeqIdPtr id; BioseqPtr bsp; SeqPortPtr spp; DREGION PNTR reg, PNTR regold; Int4 nreg; Int4 start, end, l; Int2 loopDustMax = 0; /* error msg stuff */ ErrSetOptFlags (EO_MSG_CODES); if (!this_slp) { ErrPostEx (SEV_ERROR, 2, 1, "no sequence location given for dusting"); ErrShow (); return slp; } /* place for dusted regions */ regold = reg = MemNew (sizeof (DREGION)); if (!reg) { ErrPostEx (SEV_FATAL, 2, 2, "memory allocation error"); ErrShow (); return slp; } reg->from = 0; reg->to = 0; reg->next = NULL; /* count seqlocs */ next_slp = NULL; while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL) loopDustMax++; if (!loopDustMax) { ErrPostEx (SEV_ERROR, 2, 3, "can not find next seq loc"); ErrShow (); } /* loop for dusting as needed */ next_slp = NULL; while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL) { /* offsets into actual sequence */ start = SeqLocStart (next_slp); end = SeqLocStop (next_slp); /* if all goes okay should get a seqport pointer */ id = SeqLocId (next_slp); if (!id) { ErrPostEx (SEV_ERROR, 2, 4, "no bioseq id"); ErrShow (); continue; } bsp = BioseqLockById (id); if (!bsp) { ErrPostEx (SEV_ERROR, 2, 5, "no bioseq"); ErrShow (); continue; } if (!ISA_na (bsp->mol)) { ErrPostEx (SEV_WARNING, 2, 6, "not nucleic acid"); ErrShow (); BioseqUnlock (bsp); continue; } spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na); BioseqUnlock (bsp); if (!spp) { ErrPostEx (SEV_ERROR, 2, 7, "sequence port open failed"); ErrShow (); continue; } l = spp->totlen; nreg = dust_segs (l, spp, start, reg, (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker); slp = slpDust (spp, slp, id, &vnp, reg, nreg, loopDustMax); /* find tail - this way avoids referencing the pointer */ while (reg->next) reg = reg->next; SeqPortFree (spp); } /* clean up memory */ reg = regold; while (reg) { regold = reg; reg = reg->next; MemFree (regold); } return slp; }
static int LIBCALLBACK MegaBlastPrintEndpoints(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; CharPtr subject_descr; SeqIdPtr sip, query_id; CharPtr query_buffer, title; CharPtr subject_buffer; Int4 query_length, q_start, q_end, q_shift=0, s_shift=0; Int4 subject_end; Int4 hsp_index; Boolean numeric_sip_type = FALSE; BLAST_HSPPtr hsp; Int2 context; Char context_sign; Int4 subject_gi, score; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } if (search->rdfp) readdb_get_descriptor(search->rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue; if (db_tag->db && (!StringCmp(db_tag->db, "THC") || !StringICmp(db_tag->db, "TI")) && db_tag->tag->id != 0) { subject_buffer = (CharPtr) Malloc(16); sprintf(subject_buffer, "%ld", (long) db_tag->tag->id); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } } search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt; /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!search->rdfp && search->query_slp->next) { s_shift = SeqLocStart(search->query_slp->next); subject_end = SeqLocStop(search->query_slp->next); } else { s_shift = 0; subject_end = readdb_get_sequence_length(search->rdfp, search->subject_id); } /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) continue; /* Correct query context is already found in BlastGetNonSumStatsEvalue */ context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (hsp->context) { hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length + 1; context_sign = '-'; } else { hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1; if (hsp->query.end > query_length) { hsp->subject.end -= (hsp->query.end - query_length); hsp->query.end = query_length; } context_sign = '+'; } if (hsp->subject.end > subject_end) { hsp->query.end -= (hsp->subject.end - subject_end); hsp->subject.end = subject_end; } hsp->subject.offset++; query_buffer = NULL; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; if (context_sign == '+') { q_start = hsp->query.offset; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset; } /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } hsp->subject.offset += s_shift; hsp->subject.end += s_shift; if (numeric_sip_type) fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); else fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", subject_buffer, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); MemFree(query_buffer); } if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); sip = SeqIdSetFree(sip); return 0; }
static int LIBCALLBACK MegaBlastPrintSegments(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; ReadDBFILEPtr rdfp = search->rdfp; BLAST_HSPPtr hsp; Int4 i, subject_gi; Int2 context; CharPtr query_buffer, title; SeqIdPtr sip, query_id; Int4 hsp_index, score; Uint1Ptr query_seq, subject_seq = NULL; FloatHi perc_ident; Char strand; GapXEditScriptPtr esp; Int4 q_start, q_end, s_start, s_end, query_length, numseg; Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0; Int4Ptr length, start; Uint1Ptr strands; CharPtr subject_descr, subject_buffer, buffer; Char tmp_buffer[BUFFER_LENGTH]; Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH; Boolean numeric_sip_type = FALSE; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } subject_seq = search->subject->sequence_start + 1; if (rdfp) readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH); /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!rdfp && search->query_slp->next) s_shift = SeqLocStart(search->query_slp->next); /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) { continue; } context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; q_off = hsp->query.offset; if (hsp->context) { strand = '-'; hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length; } else { strand = '+'; hsp->query.end = hsp->query.offset + hsp->query.length; } if (strand == '+') { q_start = hsp->query.offset + 1; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset + 1; } s_start = hsp->subject.offset + 1; s_end = hsp->subject.offset + hsp->subject.length; /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } s_start += s_shift; s_end += s_shift; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (numeric_sip_type) sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", (long) subject_gi, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); else sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", subject_buffer, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); buffer_size = StringLen(buffer); query_seq = search->context[context].query->sequence; esp = hsp->gap_info->esp; for (numseg=0; esp; esp = esp->next, numseg++); GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg, &start, &length, &strands, &q_off, &hsp->subject.offset); if (start[0] < 0) { length[0] += start[0]; start[1] -= start[0]; start[0] = 0; } if (start[2*(numseg-1)] + length[numseg-1] > query_length) length[numseg-1] = query_length - start[2*(numseg-1)]; total_ident = 0; align_length = 0; for (i=0; i<numseg; i++) { align_length += length[i]; if (strand == '+') { q_start = start[2*i] + 1; q_end = q_start + length[i] - 1; } else { q_start = query_length - start[2*i]; q_end = q_start - length[i] + 1; } if (start[2*i] != -1 && start[2*i+1] != -1) { num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, start[2*i], start[2*i+1], length[i], FALSE); perc_ident = (FloatHi) num_ident / length[i] * 100; total_ident += num_ident; sprintf(tmp_buffer, " l %d %d %d %d (%.0f)\n", start[2*i+1]+1, q_start, start[2*i+1]+length[i], q_end, perc_ident); if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) { max_buffer_size *= 2; buffer = (CharPtr) Realloc(buffer, max_buffer_size); } StringCat(buffer, tmp_buffer); } } if (100*total_ident >= align_length*search->pbp->mb_params->perc_identity) { StringCat(buffer, "}"); fprintf(fp, "%s\n", buffer); } MemFree(start); MemFree(length); MemFree(strands); MemFree(query_buffer); } /* End loop on hsp's */ if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); MemFree(buffer); sip = SeqIdSetFree(sip); fflush(fp); return 1; }
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, SeqLoc* query_slp, SeqLoc* mask_loc_head, BlastFormattingInfo* format_info, Blast_SummaryReturn* sum_returns) { SeqLoc* mask_loc; SeqLoc* next_mask_loc = NULL; SeqLoc* tmp_loc = NULL; Uint1 align_type; Boolean db_is_na; Int4 query_index; SeqLoc* slp; SeqLoc* mask_slp; AsnIo* aip = NULL; MBXml* xmlp = NULL; FILE *outfp = NULL; BlastFormattingOptions* format_options; EAlignView align_view; Boolean ungapped; ASSERT(format_info && format_info->format_options && format_info->search_options && query_slp); format_options = format_info->format_options; align_view = format_options->align_view; ungapped = !format_info->search_options->score_options->gapped_calculation; if (align_view == eAlignViewXml) { const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion of query sequence is desired in the XML output header. */ xmlp = format_info->xmlp; if (!xmlp) { xmlp = format_info->xmlp = s_MBXmlInit(format_info->aip, format_info->program_name, format_info->db_name, query_slp, kXmlFlag, sum_returns->search_params); } } else if (align_view == eAlignViewAsnText || align_view == eAlignViewAsnBinary) aip = format_info->aip; else outfp = format_info->outfp; align_type = GetOldAlignType(format_info->search_options->program, &db_is_na); if (format_info->db_name) { /* Enable fetching from the BLAST database. */ ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE); /* If database is translated, set the genetic code for tranlation. */ if (Blast_SubjectIsTranslated(format_info->search_options->program)) { ReadDBBioseqSetDbGeneticCode(format_info->search_options-> db_options->genetic_code); } } if(format_info->search_options->score_options->is_ooframe) { ErrPostEx(SEV_WARNING, 0, 0, "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments"); } slp = query_slp; mask_loc = mask_loc_head; for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next) { Bioseq* bsp = NULL; SeqAlignPtr seqalign = seqalign_arr->array[query_index]; /* Find which query the current SeqAlign is for */ SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign); if (seqalign == NULL) { if (align_view < eAlignViewXml) s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp); /* this query has no results. */ else if (align_view == eAlignViewXml) { /* Retrieve this query's Bioseq */ Iteration* iterp; /* Call to TxGetQueryIdFromSeqAlign returned NULL. */ query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, "No hits found", bsp, NULL); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); BioseqUnlock(bsp); } else if (align_view == eAlignViewTabularWithComments) { query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BioseqUnlock(bsp); } continue; } format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */ /* Find the masking location for this query. Initialize next_mask_loc to the current start of the chain, in case nothing for this query will be found. */ next_mask_loc = mask_loc; for ( ; mask_loc; mask_loc = mask_loc->next) { mask_slp = (SeqLoc*) mask_loc->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) { break; } } /* Unlink the masking location for this query and save the next one */ if (mask_loc) { for (next_mask_loc = mask_loc; next_mask_loc->next; next_mask_loc = next_mask_loc->next) { mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) != SIC_YES) { break; } } tmp_loc = next_mask_loc; next_mask_loc = next_mask_loc->next; tmp_loc->next = NULL; } /* On the next iteration we can start from the next query */ /* Retrieve this query's Bioseq */ bsp = BioseqLockById(query_id); if (align_view < eAlignViewXml) { if (format_info->head_on_every_query == TRUE) BLAST_PrintOutputHeader(format_info); init_buff_ex(70); AcknowledgeBlastQuery(bsp, 70, outfp, format_options->believe_query, format_options->html); free_buff(); if (format_info->head_on_every_query == TRUE) { s_BLAST_PrintDatabaseInfo(format_info); fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n"); } } if (align_view == eAlignViewTabular || align_view == eAlignViewTabularWithComments) { if (align_view == eAlignViewTabularWithComments) PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BlastPrintTabulatedResults(seqalign, bsp, NULL, format_options->number_of_alignments, format_info->program_name, ungapped, format_options->believe_query, 0, 0, outfp, (Boolean)(align_view == eAlignViewTabularWithComments)); } else if(align_view == eAlignViewXml) { Iteration* iterp; ASSERT(xmlp && xmlp->aip); /* The index of this "query iteration" is the query_index in the current formatting round, plus the number of previously formatted queries. */ iterp = s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, NULL, bsp, mask_loc); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } else { SeqAnnot* seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; if (aip) { SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL); AsnIoReset(aip); } if (outfp) { BlastPruneSapStruct* prune; Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix); ObjMgrSetHold(); init_buff_ex(85); PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, format_options->print_options, FIRST_PASS, NULL, format_options->number_of_descriptions, NULL, NULL); free_buff(); /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign * we're making a COPY of the seqalign to print it out! Clearly * this could use a better design */ prune = BlastPruneHitsFromSeqAlign(seqalign, format_options->number_of_alignments, NULL); seqannot->data = prune->sap; if(format_info->search_options->score_options->is_ooframe) { OOFShowBlastAlignment(prune->sap, mask_loc, outfp, format_options->align_options, NULL); } else if (align_view != eAlignViewPairwise) { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, NULL); } else { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, FormatScoreFunc); } s_DeleteMatrix(matrix); seqannot->data = seqalign; prune = BlastPruneSapStructDestruct(prune); ObjMgrClearHold(); } /* Set data to NULL, because we do not free Seq-align here. */ seqannot->data = NULL; seqannot = SeqAnnotFree(seqannot); } BioseqUnlock(bsp); /* Relink the mask locations so chain can be freed in the end. The 'tmp_loc' variable points to the location that was unlinked. */ if (tmp_loc) tmp_loc->next = next_mask_loc; mask_loc = next_mask_loc; ObjMgrFreeCache(0); } /* End loop on seqaligns for different queries */ /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */ if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) { /* extra output only if no hits at all, otherwise "for loop" logic should take care*/ Iteration* iterp; iterp = IterationNew(); iterp->iter_num = 1; iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped); ASSERT(xmlp && xmlp->aip); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } if (format_info->db_name) { /* Free the database translation tables, if applicable. */ TransTableFreeAll(); ReadDBBioseqFetchDisable(); } /* Update the count of the formatted queries. */ format_info->num_formatted += num_queries; return 0; }
/** Creates the header part of an XML report for a BLAST search. * @param program Program name [in] * @param database Database name [in] * @param query_loc Query Seq-loc [in] * @param flags Flag to indicate whether query sequence should be included in * the output. [in] * @param search_param Search parameters [in] */ static BlastOutput* s_CreateBlastOutputHead(const char* program, const char* database, SeqLoc* query_loc, Int4 flags, const Blast_SearchParams* search_param) { BlastOutput* boutp; Char buffer[1024]; char* program_to_use = NULL; if((boutp = BlastOutputNew()) == NULL) return FALSE; if (strcmp(program, "rpsblast") == 0) program_to_use = strdup("blastp"); else if (strcmp(program, "rpstblastn") == 0) program_to_use = strdup("blastx"); else program_to_use = strdup(program); /* For optimization BLOSUM62 may be loaded ones */ if (query_loc) { SeqId* sip = SeqLocId(query_loc); Bioseq* bsp; SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, sizeof(buffer)); boutp->query_ID = strdup(buffer); bsp = BioseqLockById(sip); if(bsp != NULL) { if (BioseqGetTitle(bsp) != NULL) boutp->query_def = strdup(BioseqGetTitle(bsp)); else boutp->query_def = strdup("No definition line found"); } BioseqUnlock(bsp); boutp->query_len = SeqLocLen(query_loc); if(flags & BXML_INCLUDE_QUERY) { boutp->query_seq = (char *) calloc(boutp->query_len+1, 1); SeqPortStreamLoc(query_loc, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, boutp->query_seq, NULL); } else { boutp->query_seq = NULL; /* Do we need sequence here??? */ } } /* Program name. Use the local version of the program. No need to copy it since it was locally allocated. */ boutp->program = program_to_use; /* Database name */ if (database) boutp->db = strdup(database); /* Version text */ sprintf(buffer, "%s %s [%s]", program_to_use, BlastGetVersionNumber(), BlastGetReleaseDate()); boutp->version = strdup(buffer); /* Reference */ boutp->reference = BlastGetReference(FALSE); /* Filling parameters */ boutp->param = ParametersNew(); boutp->param->expect = search_param->expect; boutp->param->gap_open = search_param->gap_open; boutp->param->gap_extend = search_param->gap_extension; if (search_param->matrix) boutp->param->matrix = strdup(search_param->matrix); boutp->param->sc_match = search_param->match; boutp->param->sc_mismatch = search_param->mismatch; boutp->param->include = search_param->ethresh; if (search_param->filter_string) boutp->param->filter = strdup(search_param->filter_string); return boutp; }
Int2 PHIBlastFormatResults(ValNode* phivnps, SeqLoc* query_slp, const BlastFormattingInfo* format_info, Blast_SummaryReturn* sum_returns) { Boolean db_is_na; Bioseq* query_bsp = NULL; FILE *outfp = NULL; ValNode* pruneSeed = NULL; Uint1 featureOrder[FEATDEF_ANY]; Uint1 groupOrder[FEATDEF_ANY]; SeqLoc* seed_seqloc = NULL; /* SeqLoc containing pattern locations. */ EBlastProgramType program; BlastFormattingOptions* format_options; if (!format_info || !format_info->outfp || !query_slp) return -1; format_options = format_info->format_options; program = format_info->search_options->program; ASSERT(Blast_ProgramIsPhiBlast(program)); outfp = format_info->outfp; s_PHIBlastFormatPatternInfo(sum_returns, outfp); /* Old toolkit might have different values for program numbers, so use old toolkit function to determine alignment type. */ if (program == eBlastTypePhiBlastn) db_is_na = TRUE; else db_is_na = FALSE; if (format_info->db_name) ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE); pruneSeed = SeedPruneHitsFromSeedReturn(phivnps, format_options->number_of_descriptions); s_PHIBlastCreateSeedSeqLoc(sum_returns->pattern_info, query_slp, &seed_seqloc); PrintDefLinesExtra(pruneSeed, 80, outfp, format_options->print_options, FIRST_PASS, NULL, seed_seqloc); if (format_options->number_of_alignments < format_options->number_of_descriptions) { pruneSeed = SeedPruneHitsFromSeedReturn(phivnps, format_options->number_of_alignments); } query_bsp = BioseqLockById(SeqLocId(query_slp)); memset(featureOrder, 0, sizeof(featureOrder)); memset(groupOrder, 0, sizeof(groupOrder)); featureOrder[FEATDEF_REGION] = 1; groupOrder[FEATDEF_REGION] = 1; if (format_options->align_view != eAlignViewPairwise) { ShowTextAlignFromAnnotExtra(query_bsp, pruneSeed, seed_seqloc, 60, outfp, featureOrder, groupOrder, format_options->align_options, NULL, NULL, NULL); } else { ShowTextAlignFromAnnotExtra(query_bsp, pruneSeed, seed_seqloc, 60, outfp, featureOrder, groupOrder, format_options->align_options, NULL, NULL, FormatScoreFunc); } SeqLocSetFree(seed_seqloc); if (format_info->db_name) ReadDBBioseqFetchDisable(); return 0; }