/** Fills the Iteration ASN.1 structure, for part of the BLAST XML report * corresponding to one query. * @param seqalign Seq-align list with results [in] * @param sum_returns Search summary data [in] * @param is_ooframe Was out-of-frame gapping used in this search? [in] * @param ungapped Was this an ungapped search? [in] * @param iter_num Index of this "iteration" (query). [in] * @param message Error or warning message [in] * @param query Query Bioseq [in] * @param mask_loc List of masking locations [in] * @return Populated structure. */ static Iteration* s_XMLBuildOneQueryIteration(SeqAlign* seqalign, Blast_SummaryReturn* sum_returns, Boolean is_ooframe, Boolean ungapped, Int4 iter_num, char* message, Bioseq* query, ValNode* mask_loc) { Iteration* iterp = IterationNew(); iterp->iter_num = iter_num; if (query) { char buffer[1024]; SeqIdWrite(query->id, buffer, PRINTID_FASTA_LONG, sizeof(buffer)); iterp->query_ID = strdup(buffer); if(BioseqGetTitle(query) != NULL) iterp->query_def = strdup(BioseqGetTitle(query)); else iterp->query_def = strdup("No definition line found"); iterp->query_len = query->length; } if(seqalign != NULL) { iterp->hits = BXMLSeqAlignToHits(seqalign, ungapped, is_ooframe, mask_loc); } iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped); if (message) iterp->message = strdup(message); return iterp; }
void Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, Boolean accession_only, Boolean search_for_id) { char* seqid_buffer = NULL; Int4 gi = 0; Boolean numeric_id_type = FALSE; *buffer_ptr = NULL; if (sip == NULL) return; /* Check for ad hoc ID's generated by formatdb if the user does not provide any. */ if (search_for_id && (sip->choice != SEQID_GENERAL || StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID"))) { if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else if (accession_only) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH); } else if (ncbi_gi) { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &gi, &seqid_buffer); } else { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBestAccession(sip), &gi, &seqid_buffer); } } if (numeric_id_type && gi > 0) { seqid_buffer = (char*) malloc(16); sprintf(seqid_buffer, "%ld", (long) gi); } if (!seqid_buffer) { /* If it's still NULL make a last ditch effort to get info. */ char* title=NULL; Bioseq* bsp = BioseqLockById(sip); if (bsp) { if (BioseqGetTitle(bsp) != NULL) title = strdup(BioseqGetTitle(bsp)); else title = strdup("No definition line found"); } BioseqUnlock(bsp); if (title) /* Use first token as id. */ seqid_buffer = StringTokMT(title, " \t\n\r", &title); } *buffer_ptr = seqid_buffer; }
/***************************************************************************** * * PrintIdDefLine * SeqEntryExplore callback routine that prints the seqids and definition * lines. * *****************************************************************************/ static void PrintIdDefLine (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) { BioseqPtr bsp; FILE * fp; Char buf[40]; CharPtr title = NULL; if (IS_Bioseq (sep)) { *buf = '\0'; bsp = (BioseqPtr) sep->data.ptrvalue; fp = (FILE *) data; title = BioseqGetTitle(bsp); /* this does not deal with all cases */ SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG); /* print SeqId */ if (pmon != NULL) MonitorStrValue(pmon, buf); fprintf(fp, ">%s", buf); if (title != NULL) fprintf(fp, " %s", title); fprintf(fp, "\n"); } return; }
static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp, FILE *fp, Boolean is_na) { Char buffer [255]; Uint1 code; Int2 count; Uint1 repr; Uint1 residue; SeqPortPtr spp; CharPtr title; CharPtr tmp; if (bsp != NULL && fp != NULL) { if ((Boolean) ISA_na (bsp->mol) == is_na) { repr = Bioseq_repr (bsp); if (repr == Seq_repr_raw || repr == Seq_repr_const) { title = BioseqGetTitle (bsp); tmp = StringMove (buffer, ">"); tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG); tmp = StringMove (tmp, " "); StringNCpy (tmp, title, 200); fprintf (fp, "%s\n", buffer); if (pmon != NULL) MonitorStrValue(pmon, buffer); if (is_na) { code = Seq_code_iupacna; } else { code = Seq_code_iupacaa; } if (sfp != NULL) { spp = SeqPortNewByLoc (sfp->location, code); } else { spp = SeqPortNew (bsp, 0, -1, 0, code); } if (spp != NULL) { count = 0; while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) { if (! IS_residue (residue)) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; switch (residue) { case SEQPORT_VIRT : fprintf (fp, "[Gap]\n"); break; case SEQPORT_EOS : fprintf (fp, "[EOS]\n"); break; default : fprintf (fp, "[Invalid Residue]\n"); break; } } else { buffer [count] = residue; count++; if (count >= CHARSPERLINE) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; } } } if (count != 0) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); } SeqPortFree (spp); } } } } }
Int2 Main_old (void) { AsnIoPtr aip; BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, subject_bsp = NULL; BioseqPtr bsp1, bsp2; BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL; BLAST_OptionsBlkPtr options=NULL; Boolean seq1_is_na, seq2_is_na; CharPtr params_buffer=NULL; DbtagPtr dbtagptr; Uint1 align_type; Uint4 align_options; SeqAlignPtr seqalign; SeqAnnotPtr seqannot; SeqEntryPtr sep = NULL, sep1 = NULL; CharPtr program_name, blast_outputfile; FILE *outfp; ValNodePtr mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL; BLAST_MatrixPtr matrix; Int4Ptr PNTR txmatrix; int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL; Boolean entrez_lookup = FALSE; Boolean html, seqannot_output, believe_query; Uint1 tabular_output; Boolean gapped_calculation; entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue; html = (Boolean) myargs[ARG_HTML].intvalue; seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL); blast_outputfile = myargs [ARG_OUT].strvalue; program_name = StringSave(myargs[ARG_PROGRAM].strvalue); if (StringCmp(program_name, "blastn") && StringCmp(program_name, "blastp") && StringCmp(program_name, "blastx") && StringCmp(program_name, "tblastn") && StringCmp(program_name, "tblastx")) { ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n"); return (1); } align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na); if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return (1); } gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue; believe_query = (seqannot_output || entrez_lookup); options = BLASTOptionNewEx(program_name, gapped_calculation, (Boolean) myargs[ARG_USEMEGABLAST].intvalue); if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1, &(options->query_lcase_mask), believe_query) == FALSE) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences"); return (1); } if (!entrez_lookup) { if (!believe_query) fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL); fake_subject_bsp = BioseqNew(); fake_subject_bsp->descr = subject_bsp->descr; fake_subject_bsp->repr = subject_bsp->repr; fake_subject_bsp->mol = subject_bsp->mol; fake_subject_bsp->length = subject_bsp->length; fake_subject_bsp->seq_data = subject_bsp->seq_data; fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type; dbtagptr = DbtagNew(); dbtagptr->db = StringSave("BL_ORD_ID"); dbtagptr->tag = ObjectIdNew(); if (BioseqGetTitle(subject_bsp) != NULL) dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp)); else dbtagptr->tag->str = StringSave("No definition line found"); ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr); bsp1 = (believe_query ? query_bsp : fake_bsp); bsp2 = fake_subject_bsp; } else { bsp1 = query_bsp; bsp2 = subject_bsp; } tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; if (myargs[ARG_SEARCHSP].floatvalue) options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue; options->filter_string = StringSave(myargs[ARG_FILTER].strvalue); options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue; if (StringICmp("blastn", program_name) == 0) { options->penalty = myargs[ARG_MISMATCH].intvalue; options->reward = myargs[ARG_MATCH].intvalue; } options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue; options->discontinuous = FALSE; if (myargs[ARG_XDROP].intvalue != 0) { options->gap_x_dropoff = myargs[ARG_XDROP].intvalue; } if (myargs[ARG_WORDSIZE].intvalue != 0) options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue; if (options->is_megablast_search) { options->cutoff_s2 = options->wordsize*options->reward; } options->matrix = MemFree(options->matrix); BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); if (myargs[ARG_GAPOPEN].intvalue != -1) options->gap_open = myargs[ARG_GAPOPEN].intvalue; if (myargs[ARG_GAPEXT].intvalue != -1) options->gap_extend = myargs[ARG_GAPEXT].intvalue; options->strand_option = myargs[ARG_STRAND].intvalue; /* Input longest intron length is in nucleotide scale; in the lower level code it will be used in protein scale */ if (myargs[ARG_INTRON].intvalue > 0) options->longest_intron = myargs[ARG_INTRON].intvalue; if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) { seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, options, &other_returns, &error_returns, handle_results); } else { SeqLocPtr slp1=NULL, slp2=NULL; if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE) return 1; seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL); SeqLocFree(slp1); SeqLocFree(slp2); } if (error_returns) { BlastErrorPrint(error_returns); for (vnp = error_returns; vnp; vnp = vnp->next) { BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue); } ValNodeFree(error_returns); } ka_params = NULL; ka_params_gap = NULL; params_buffer = NULL; mask_loc = NULL; matrix = NULL; txmatrix = NULL; for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXKABLK_NOGAP: ka_params = vnp->data.ptrvalue; break; case TXKABLK_GAP: ka_params_gap = vnp->data.ptrvalue; break; case TXPARAMETERS: params_buffer = vnp->data.ptrvalue; break; case TXMATRIX: matrix = vnp->data.ptrvalue; if (matrix && !tabular_output) txmatrix = BlastMatrixToTxMatrix(matrix); break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue); break; default: break; } } if (!tabular_output || seqannot_output) { align_options = 0; align_options += TXALIGN_MATRIX_VAL; align_options += TXALIGN_SHOW_QS; align_options += TXALIGN_COMPRESS; align_options += TXALIGN_END_NUM; if (StringICmp("blastx", program_name) == 0) { align_options += TXALIGN_BLASTX_SPECIAL; } if (html) align_options += TXALIGN_HTML; seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; aip = NULL; if (seqannot_output) aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w"); if (aip && seqannot) { SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL); AsnIoReset(aip); aip = AsnIoClose(aip); } } if (!tabular_output) { AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html); ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc); seqannot = SeqAnnotFree(seqannot); if (txmatrix) txmatrix = TxMatrixDestruct(txmatrix); init_buff_ex(85); if (ka_params) { PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE); } if (ka_params_gap) { PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE); } PrintTildeSepLines(params_buffer, 70, outfp); free_buff(); } else { PrintTabularOutputHeader(NULL, query_bsp, NULL, program_name, 0, believe_query, outfp); BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 1, program_name, !gapped_calculation, believe_query, 0, 0, outfp, FALSE); SeqAlignSetFree(seqalign); } matrix = BLAST_MatrixDestruct(matrix); MemFree(ka_params); MemFree(ka_params_gap); MemFree(params_buffer); mask_loc_start = mask_loc; while (mask_loc) { SeqLocSetFree(mask_loc->data.ptrvalue); mask_loc = mask_loc->next; } ValNodeFree(mask_loc_start); fake_bsp = BlastDeleteFakeBioseq(fake_bsp); other_returns = ValNodeFree(other_returns); options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask); options = BLASTOptionDelete(options); MemFree(program_name); FileClose(outfp); if (entrez_lookup) { BioseqFree(query_bsp); BioseqFree(subject_bsp); } else { SeqEntryFree(sep); SeqEntryFree(sep1); } return 0; }
Int2 Main_new(void) { BioseqPtr query_bsp=NULL, subject_bsp=NULL; BioseqPtr bsp1=NULL, bsp2=NULL; BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL; BlastFormattingInfo* format_info = NULL; BLAST_SummaryOptions* options=NULL; Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew(); Boolean believe_query= FALSE; Boolean seq1_is_na, seq2_is_na; /* seq1/2 is DNA if TRUE. */ Boolean seqannot_output; /* SeqAlign will be output. */ Boolean entrez_lookup; /* QUery/subject fetched from Entrez. */ Boolean mask_at_hash=FALSE; /* masking only on lookup table if TRUE. */ DbtagPtr dbtagptr; EBlastProgramType program_number; Int2 status; /* return value */ EAlignView align_view = eAlignViewPairwise; /* Used for formatting */ SeqAlignPtr seqalign=NULL; SeqEntryPtr sep=NULL, sep1=NULL; SeqLocPtr slp1, slp2; /* Used for actual search. */ SeqLocPtr filter_loc=NULL; /* Location of regions filtered (returned by engine) */ SeqLocPtr lcase_mask=NULL; /* For lower-case masking info from query FASTA. */ SeqLoc* repeat_mask = NULL; /* Repeat mask locations */ Uint1 strand_option = 0; /* FIXME */ SBlastOptions* search_options = NULL; /* Needed for formatting. */ SBlastSeqalignArray* seqalign_arr = NULL; GeneticCodeSingletonInit(); strand_option = (Uint1) myargs[ARG_STRAND].intvalue; entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue; seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL); believe_query = (seqannot_output || entrez_lookup); /* Non-zero value for -m option means tabular output. */ if (myargs[ARG_FORMAT].intvalue != 0) align_view = eAlignViewTabularWithComments; BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number); seq1_is_na = (program_number == eBlastTypeBlastn || program_number == eBlastTypeBlastx || program_number == eBlastTypeRpsTblastn || program_number == eBlastTypeTblastx); seq2_is_na = (program_number == eBlastTypeBlastn || program_number == eBlastTypeTblastn || program_number == eBlastTypeTblastx); if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1, &lcase_mask, believe_query) == FALSE) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences"); return (1); } if (!entrez_lookup) { if (!believe_query) fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL); fake_subject_bsp = BioseqNew(); fake_subject_bsp->descr = subject_bsp->descr; fake_subject_bsp->repr = subject_bsp->repr; fake_subject_bsp->mol = subject_bsp->mol; fake_subject_bsp->length = subject_bsp->length; fake_subject_bsp->seq_data = subject_bsp->seq_data; fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type; dbtagptr = DbtagNew(); dbtagptr->db = StringSave("BL_ORD_ID"); dbtagptr->tag = ObjectIdNew(); if (BioseqGetTitle(subject_bsp) != NULL) dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp)); else dbtagptr->tag->str = StringSave("No definition line found"); ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr); bsp1 = (believe_query ? query_bsp : fake_bsp); bsp2 = fake_subject_bsp; } else { /* Query and subject Bioseqs are already "fake". */ bsp1 = query_bsp; bsp2 = subject_bsp; } if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE) return 1; if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE) return 1; /* Find repeat mask, if necessary */ if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue, &repeat_mask, &extra_returns->error)) != 0) { if (extra_returns && extra_returns->error) { ErrSev max_sev = SBlastMessageErrPost(extra_returns->error); if (max_sev >= SEV_ERROR) return status; } } /* Combine repeat mask with lower case mask */ if (repeat_mask) lcase_mask = ValNodeLink(&lcase_mask, repeat_mask); status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr, &filter_loc, &mask_at_hash, &extra_returns); /* Free the lower case mask in SeqLoc form. */ lcase_mask = Blast_ValNodeMaskListFree(lcase_mask); /* Post warning or error messages, no matter what the search status was. */ SBlastMessageErrPost(extra_returns->error); if (status != 0) { ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed"); return status; } if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) { AsnIoPtr asnout = AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w"); GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout); asnout = AsnIoClose(asnout); } /* Pass NULL for the database name, since there is no database. */ BlastFormattingInfoNewBasic(align_view, options, slp1, myargs[ARG_OUT].strvalue, &search_options, &format_info); /* Always show gis in the output, hence pass TRUE for respective argument. */ BlastFormattingInfoSetUpOptions(format_info, 0, 1, (Boolean) myargs[ARG_HTML].intvalue, (Boolean) myargs[ARG_USEMEGABLAST].intvalue, TRUE, believe_query); /* If masking was at hash only, free the masking locations, * to prevent them from being used for formatting. */ if (SBlastOptionsGetMaskAtHash(search_options)) filter_loc = Blast_ValNodeMaskListFree(filter_loc); /* Format the results */ status = BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info, extra_returns); status = Blast_PrintOutputFooter(format_info, extra_returns); /* Free masking locations if they haven't been freed already. */ filter_loc = Blast_ValNodeMaskListFree(filter_loc); format_info = BlastFormattingInfoFree(format_info); extra_returns = Blast_SummaryReturnFree(extra_returns); search_options = SBlastOptionsFree(search_options); if (entrez_lookup) { BioseqFree(query_bsp); BioseqFree(subject_bsp); } else { SeqEntryFree(sep); SeqEntryFree(sep1); } options = BLAST_SummaryOptionsFree(options); seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr); slp1 = SeqLocSetFree(slp1); slp2 = SeqLocSetFree(slp2); fake_bsp = BlastDeleteFakeBioseq(fake_bsp); GeneticCodeSingletonFini(); return 0; }
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { SeqPortPtr spp; SeqIdPtr bestid; Uint1 repr, code, residue; CharPtr tmp, title; long outlen, outmax; char localid[256], *sp; /* !!! this may be called several times for a single sequence because SeqEntryExplore looks for parts and joins them... assume seq, seqid, seqlen may contain data (or NULL) */ if (bsp == NULL) return; repr = Bioseq_repr(bsp); if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; (*seqnum)++; if (!(whichSeq == *seqnum || whichSeq == 0)) return; bestid = SeqIdFindBest(bsp->id, (Uint1) 0); title = BioseqGetTitle(bsp); if (idonly) { sprintf(localid, " %d) ", *seqnum); tmp= localid + strlen(localid)-1; } else { strcpy(localid," "); tmp= localid; } tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); tmp = StringMove(tmp, " "); StringNCpy(tmp, title, 200); /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ /* < seqid is fixed storage */ /* strcpy( *seqid, localid); */ /* < seqid is variable sized */ outmax= strlen(localid) + 3; if (*seqid==NULL) { *seqid= (char*) malloc(outmax); if (*seqid==NULL) return; strcpy(*seqid, localid); } else { outmax += strlen(*seqid) + 2; *seqid= (char*) realloc( *seqid, outmax); if (*seqid==NULL) return; if (!idonly) strcat(*seqid, "; "); strcat(*seqid, localid); } if (idonly) { strcat(*seqid,"\n"); return; } if (ISA_na(bsp->mol)) code = Seq_code_iupacna; else code = Seq_code_iupacaa; spp = SeqPortNew(bsp, 0, -1, 0, code); SeqPortSeek(spp, 0, SEEK_SET); sp= *seq; if (sp==NULL) { /* this is always true now !? */ outlen= 0; outmax= 500; sp= (char*) malloc(outmax); } else { outlen= strlen(sp); outmax= outlen + 500; sp= (char*) realloc( sp, outmax); } if (sp==NULL) return; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (outlen>=outmax) { outmax= outlen + 500; sp= (char*) realloc(sp, outmax); if (sp==NULL) return; } sp[outlen++] = residue; } sp= (char*) realloc(sp, outlen+1); if (sp!=NULL) sp[outlen]= '\0'; *seq= sp; *seqlen= outlen; SeqPortFree(spp); return; }
static int LIBCALLBACK MegaBlastPrintEndpoints(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; CharPtr subject_descr; SeqIdPtr sip, query_id; CharPtr query_buffer, title; CharPtr subject_buffer; Int4 query_length, q_start, q_end, q_shift=0, s_shift=0; Int4 subject_end; Int4 hsp_index; Boolean numeric_sip_type = FALSE; BLAST_HSPPtr hsp; Int2 context; Char context_sign; Int4 subject_gi, score; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } if (search->rdfp) readdb_get_descriptor(search->rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue; if (db_tag->db && (!StringCmp(db_tag->db, "THC") || !StringICmp(db_tag->db, "TI")) && db_tag->tag->id != 0) { subject_buffer = (CharPtr) Malloc(16); sprintf(subject_buffer, "%ld", (long) db_tag->tag->id); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } } search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt; /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!search->rdfp && search->query_slp->next) { s_shift = SeqLocStart(search->query_slp->next); subject_end = SeqLocStop(search->query_slp->next); } else { s_shift = 0; subject_end = readdb_get_sequence_length(search->rdfp, search->subject_id); } /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) continue; /* Correct query context is already found in BlastGetNonSumStatsEvalue */ context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (hsp->context) { hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length + 1; context_sign = '-'; } else { hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1; if (hsp->query.end > query_length) { hsp->subject.end -= (hsp->query.end - query_length); hsp->query.end = query_length; } context_sign = '+'; } if (hsp->subject.end > subject_end) { hsp->query.end -= (hsp->subject.end - subject_end); hsp->subject.end = subject_end; } hsp->subject.offset++; query_buffer = NULL; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; if (context_sign == '+') { q_start = hsp->query.offset; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset; } /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } hsp->subject.offset += s_shift; hsp->subject.end += s_shift; if (numeric_sip_type) fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); else fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", subject_buffer, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); MemFree(query_buffer); } if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); sip = SeqIdSetFree(sip); return 0; }
static int LIBCALLBACK MegaBlastPrintSegments(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; ReadDBFILEPtr rdfp = search->rdfp; BLAST_HSPPtr hsp; Int4 i, subject_gi; Int2 context; CharPtr query_buffer, title; SeqIdPtr sip, query_id; Int4 hsp_index, score; Uint1Ptr query_seq, subject_seq = NULL; FloatHi perc_ident; Char strand; GapXEditScriptPtr esp; Int4 q_start, q_end, s_start, s_end, query_length, numseg; Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0; Int4Ptr length, start; Uint1Ptr strands; CharPtr subject_descr, subject_buffer, buffer; Char tmp_buffer[BUFFER_LENGTH]; Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH; Boolean numeric_sip_type = FALSE; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } subject_seq = search->subject->sequence_start + 1; if (rdfp) readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH); /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!rdfp && search->query_slp->next) s_shift = SeqLocStart(search->query_slp->next); /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) { continue; } context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; q_off = hsp->query.offset; if (hsp->context) { strand = '-'; hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length; } else { strand = '+'; hsp->query.end = hsp->query.offset + hsp->query.length; } if (strand == '+') { q_start = hsp->query.offset + 1; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset + 1; } s_start = hsp->subject.offset + 1; s_end = hsp->subject.offset + hsp->subject.length; /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } s_start += s_shift; s_end += s_shift; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (numeric_sip_type) sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", (long) subject_gi, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); else sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", subject_buffer, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); buffer_size = StringLen(buffer); query_seq = search->context[context].query->sequence; esp = hsp->gap_info->esp; for (numseg=0; esp; esp = esp->next, numseg++); GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg, &start, &length, &strands, &q_off, &hsp->subject.offset); if (start[0] < 0) { length[0] += start[0]; start[1] -= start[0]; start[0] = 0; } if (start[2*(numseg-1)] + length[numseg-1] > query_length) length[numseg-1] = query_length - start[2*(numseg-1)]; total_ident = 0; align_length = 0; for (i=0; i<numseg; i++) { align_length += length[i]; if (strand == '+') { q_start = start[2*i] + 1; q_end = q_start + length[i] - 1; } else { q_start = query_length - start[2*i]; q_end = q_start - length[i] + 1; } if (start[2*i] != -1 && start[2*i+1] != -1) { num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, start[2*i], start[2*i+1], length[i], FALSE); perc_ident = (FloatHi) num_ident / length[i] * 100; total_ident += num_ident; sprintf(tmp_buffer, " l %d %d %d %d (%.0f)\n", start[2*i+1]+1, q_start, start[2*i+1]+length[i], q_end, perc_ident); if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) { max_buffer_size *= 2; buffer = (CharPtr) Realloc(buffer, max_buffer_size); } StringCat(buffer, tmp_buffer); } } if (100*total_ident >= align_length*search->pbp->mb_params->perc_identity) { StringCat(buffer, "}"); fprintf(fp, "%s\n", buffer); } MemFree(start); MemFree(length); MemFree(strands); MemFree(query_buffer); } /* End loop on hsp's */ if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); MemFree(buffer); sip = SeqIdSetFree(sip); fflush(fp); return 1; }
/** Creates the header part of an XML report for a BLAST search. * @param program Program name [in] * @param database Database name [in] * @param query_loc Query Seq-loc [in] * @param flags Flag to indicate whether query sequence should be included in * the output. [in] * @param search_param Search parameters [in] */ static BlastOutput* s_CreateBlastOutputHead(const char* program, const char* database, SeqLoc* query_loc, Int4 flags, const Blast_SearchParams* search_param) { BlastOutput* boutp; Char buffer[1024]; char* program_to_use = NULL; if((boutp = BlastOutputNew()) == NULL) return FALSE; if (strcmp(program, "rpsblast") == 0) program_to_use = strdup("blastp"); else if (strcmp(program, "rpstblastn") == 0) program_to_use = strdup("blastx"); else program_to_use = strdup(program); /* For optimization BLOSUM62 may be loaded ones */ if (query_loc) { SeqId* sip = SeqLocId(query_loc); Bioseq* bsp; SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, sizeof(buffer)); boutp->query_ID = strdup(buffer); bsp = BioseqLockById(sip); if(bsp != NULL) { if (BioseqGetTitle(bsp) != NULL) boutp->query_def = strdup(BioseqGetTitle(bsp)); else boutp->query_def = strdup("No definition line found"); } BioseqUnlock(bsp); boutp->query_len = SeqLocLen(query_loc); if(flags & BXML_INCLUDE_QUERY) { boutp->query_seq = (char *) calloc(boutp->query_len+1, 1); SeqPortStreamLoc(query_loc, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, boutp->query_seq, NULL); } else { boutp->query_seq = NULL; /* Do we need sequence here??? */ } } /* Program name. Use the local version of the program. No need to copy it since it was locally allocated. */ boutp->program = program_to_use; /* Database name */ if (database) boutp->db = strdup(database); /* Version text */ sprintf(buffer, "%s %s [%s]", program_to_use, BlastGetVersionNumber(), BlastGetReleaseDate()); boutp->version = strdup(buffer); /* Reference */ boutp->reference = BlastGetReference(FALSE); /* Filling parameters */ boutp->param = ParametersNew(); boutp->param->expect = search_param->expect; boutp->param->gap_open = search_param->gap_open; boutp->param->gap_extend = search_param->gap_extension; if (search_param->matrix) boutp->param->matrix = strdup(search_param->matrix); boutp->param->sc_match = search_param->match; boutp->param->sc_mismatch = search_param->mismatch; boutp->param->include = search_param->ethresh; if (search_param->filter_string) boutp->param->filter = strdup(search_param->filter_string); return boutp; }
Int2 Main(void) { AsnIoPtr aip; SeqEntryPtr sep; BioseqPtr PNTR seqlist; Int4 seqnum, i, numseg, lens[10], j; Int2 ctr; SeqPortPtr spp; Uint1 residue; FILE* fp; CharPtr title; Char buffer[101]; MonitorPtr mon; /* check command line arguments */ if ( ! GetArgs("SeqTest",NUMARG, myargs)) return 1; mon = MonitorStrNew("SeqTest", 40); SetProgMon(StdProgMon, (Pointer)mon); /* ** Load SeqEntry object loader and sequence alphabets */ if (! SeqEntryLoad()) { Message(MSG_ERROR, "SeqEntryLoad failed"); return 1; } /* ** Use the file "example.prt" as the ASN I/O stream. This file ** can be found in the ncbi/demo. It is in ASN.1 Print Value format. */ if ((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL) return 1; /* ** Write the output to "seqtest.out". */ fp = FileOpen(myargs[1].strvalue, "w"); fprintf(fp, "Sequence summary:\n\n"); /* ** Read in the whole entry into the Sequence Entry Pointer, sep. ** Close the ASN stream, which in turn closes the input file. */ sep = SeqEntryAsnRead(aip, NULL); aip = AsnIoClose(aip); mon = MonitorFree(mon); SetProgMon(NULL, NULL); /* ** Determine how many Bioseqs are in this SeqEntry. Allocate ** enough memory to hold a list of pointer to all of these ** Bioseqs. Invoke an Explore function to "visit"each Bioseq. ** We are allowed to pass one pointer for use by the exploring ** function, in this case, "BuildList". */ seqnum = BioseqCount(sep); seqlist = MemNew((size_t)(seqnum * sizeof(BioseqPtr))); BioseqExplore(sep, (Pointer) seqlist, BuildList); /* ** For each Bioseq in the SeqEntry write out it's title ** len, number of gaps, and number of segments. Write out ** the length of each segment, up to 10. */ for(i = 0; i < seqnum; i++) { numseg = BioseqCountSegs(seqlist[i]); title = BioseqGetTitle(seqlist[i]); FilePuts((VoidPtr)title, fp); FilePuts("\n", fp); fprintf(fp, "len=%ld gaps=%ld segs=%ld\n", BioseqGetLen(seqlist[i]), BioseqGetGaps(seqlist[i]), numseg); if ((numseg > 1) && (numseg <= 10)) { BioseqGetSegLens (seqlist[i], lens); for (j = 0; j < numseg; j++) fprintf(fp, " len = %ld\n", lens[j]); } FilePuts("\n", fp); } spp = SeqPortNew(seqlist[0], 0, -1, 0, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); fprintf(fp, "SeqPort: plus strand with SeqPortGetResidue\n\n"); i = 0; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (! IS_residue(residue)) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; switch (residue) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } else { buffer[i] = residue; i++; if (i == 60) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; } } } if (i) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); } fprintf(fp, "[EOF]\n"); SeqPortFree(spp); fprintf(fp, "\nSeqPort on minus with SeqPortRead\n\n"); spp = SeqPortNew(seqlist[0], 0, -1, Seq_strand_minus, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); do { ctr = SeqPortRead(spp, (Uint1Ptr)buffer, 60); if (ctr > 0) { buffer[ctr] = '\0'; fprintf(fp, "%s\n", buffer); } else { ctr *= -1; switch (ctr) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; case SEQPORT_EOF: fprintf(fp, "[EOF]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } } while (ctr != SEQPORT_EOF); SeqPortFree(spp); /* ** Write out the nucleic acid sequences in this SeqEntry */ fprintf(fp, "\nNucleic Acids in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, TRUE); /* ** Write out the protein sequences in this SeqEntry. */ fprintf(fp, "\nProteins in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, FALSE); /* ** Close the output file and free up allocated space. */ fclose(fp); MemFree(seqlist); SeqEntryFree(sep); return 0; }