static Int2 Main_old (void) { AsnIoPtr aip, xml_aip = NULL; BioseqPtr query_bsp, PNTR query_bsp_array; BioSourcePtr source; BLAST_MatrixPtr matrix; BLAST_OptionsBlkPtr options; BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL; BlastPruneSapStructPtr prune; Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE; Boolean html=FALSE; CharPtr params_buffer=NULL; Int4 number_of_descriptions, number_of_alignments; SeqAlignPtr seqalign, PNTR seqalign_array; SeqAnnotPtr seqannot; SeqEntryPtr PNTR sepp; TxDfDbInfoPtr dbinfo=NULL, dbinfo_head; Uint1 align_type, align_view, out_type; Uint4 align_options, print_options; ValNodePtr mask_loc, mask_loc_start, next_mask_loc; ValNodePtr vnp, other_returns, error_returns; CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile; FILE *infp, *outfp, *mqfp=NULL; Int4 index, num_bsps, total_length, total_processed = 0; Int2 ctr = 1; Char prefix[2]; SeqLocPtr last_mask, mask_slp; Boolean done, hits_found; Boolean lcase_masking; MBXmlPtr mbxp = NULL; Boolean traditional_formatting; blast_program = "blastn"; blast_database = myargs [ARG_DB].strvalue; blast_inputfile = myargs [ARG_QUERY].strvalue; blast_outputfile = myargs [ARG_OUT].strvalue; if (myargs[ARG_HTML].intvalue) html = TRUE; if ((infp = FileOpen(blast_inputfile, "r")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile); return (1); } align_view = (Int1) myargs[ARG_FORMAT].intvalue; /* Geo mod: -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on */ out_type=(Int1) myargs[ARG_OUTTYPE].intvalue; if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) { align_view = 12 + (out_type-MGBLAST_FLTHITS ); out_type=MBLAST_ALIGNMENTS; //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1 if (align_view>12) { // this is MGBLAST_HITGAPS output gap_Info=TRUE; if (dbgaps_buf==NULL) dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1); if (qgaps_buf==NULL) qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1); } } outfp = NULL; traditional_formatting = (out_type == MBLAST_ALIGNMENTS || out_type == MBLAST_DELAYED_TRACEBACK); if ((!traditional_formatting || (align_view != 7 && align_view != 10 && align_view != 11)) && blast_outputfile != NULL) { if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return (1); } } //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na); align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na); /* if (!traditional_formatting) believe_query = TRUE; else believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue; */ //Geo mod: believe_query=FALSE; //If ASN.1 output is requested and believe_query is not set to TRUE, // exit with an error. if (!believe_query && (myargs[ARG_ASNOUT].strvalue || align_view == 10 || align_view == 11)) { ErrPostEx(SEV_FATAL, 1, 0, "-J option must be TRUE to produce ASN.1 output; before " "changing -J to TRUE please also ensure that all query " "sequence identifiers are unique"); return -1; } options = BLASTOptionNewEx(blast_program, TRUE, TRUE); if (options == NULL) return 3; options->do_sum_stats = FALSE; options->is_neighboring = FALSE; options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue; number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue; number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue; options->hitlist_size = MAX(number_of_descriptions, number_of_alignments); if (myargs[ARG_XDROP].intvalue != 0) options->gap_x_dropoff = myargs[ARG_XDROP].intvalue; if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0) options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue; if (myargs[ARG_XDROP_FINAL].intvalue != 0) options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue; if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0) options->filter_string = StringSave("D"); else options->filter_string = StringSave(myargs[ARG_FILTER].strvalue); show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue; options->penalty = myargs[ARG_MISMATCH].intvalue; options->reward = myargs[ARG_MATCH].intvalue; if (myargs[ARG_GAPOPEN].intvalue >= 0) options->gap_open = myargs[ARG_GAPOPEN].intvalue; if (myargs[ARG_GAPEXT].intvalue >= 0) options->gap_extend = myargs[ARG_GAPEXT].intvalue; if (options->gap_open == 0 && options->reward % 2 == 0 && options->gap_extend == options->reward / 2 - options->penalty) /* This is the default value */ options->gap_extend = 0; options->genetic_code = 1; options->db_genetic_code = 1; /* Default; it's not needed here anyway */ options->number_of_cpus = myargs[ARG_THREADS].intvalue; if (myargs[ARG_WORDSIZE].intvalue != 0) options->wordsize = myargs[ARG_WORDSIZE].intvalue; if (myargs[ARG_MINSCORE].intvalue == 0) options->cutoff_s2 = options->wordsize*options->reward; else options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue; options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue; options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue; options->perform_culling = FALSE; /* Kludge */ options->block_width = myargs[ARG_MAXPOS].intvalue; options->strand_option = myargs[ARG_STRAND].intvalue; options->window_size = myargs[ARG_WINDOW].intvalue; #ifdef DO_NOT_SUPPRESS_BLAST_OP options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue; if (myargs[ARG_TEMPL_LEN].intvalue != 0) options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue; options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue; #endif lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue; /* Allow dynamic programming gapped extension only with affine gap scores */ if (options->gap_open != 0 || options->gap_extend != 0) options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue; print_options = 0; align_options = 0; align_options += TXALIGN_COMPRESS; align_options += TXALIGN_END_NUM; if (show_gi) { align_options += TXALIGN_SHOW_GI; print_options += TXALIGN_SHOW_GI; } if (align_view) { align_options += TXALIGN_MASTER; if (align_view == 1 || align_view == 3) align_options += TXALIGN_MISMATCH; if (align_view == 3 || align_view == 4 || align_view == 6) align_options += TXALIGN_FLAT_INS; if (align_view == 5 || align_view == 6) align_options += TXALIGN_BLUNT_END; } else { align_options += TXALIGN_MATRIX_VAL; align_options += TXALIGN_SHOW_QS; } if (html) { align_options += TXALIGN_HTML; print_options += TXALIGN_HTML; } if (myargs[ARG_GILIST].strvalue) options->gifile = StringSave(myargs[ARG_GILIST].strvalue); if (out_type == MBLAST_ENDPOINTS) options->no_traceback = 1; else if (out_type == MBLAST_DELAYED_TRACEBACK) options->no_traceback = 2; else options->no_traceback = 0; options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue; options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue; options->hsp_num_max = myargs[ARG_MAXHSP].intvalue; if (!believe_query) options->megablast_full_deflines = TRUE; /*if (options->megablast_full_deflines) believe_query = FALSE;*/ query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr)); sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr)); StrCpy(prefix, ""); global_fp = outfp; options->output = outfp; if (traditional_formatting) { if (align_view < 7) { if (html) { fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n"); fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" " "VLINK=\"#660099\" ALINK=\"#660099\">\n"); fprintf(outfp, "<PRE>\n"); } init_buff_ex(90); BlastPrintVersionInfo("mgblast", html, outfp); fprintf(outfp, "\n"); MegaBlastPrintReference(html, 90, outfp); fprintf(outfp, "\n"); if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html)) return 1; free_buff(); #ifdef OS_UNIX fprintf(global_fp, "%s", "Searching"); #endif } } aip = NULL; if (myargs[ARG_ASNOUT].strvalue != NULL) { if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue); return 1; } } else if (align_view == 10 || align_view == 11) { const char* mode = (align_view == 10) ? "w" : "wb"; if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return 1; } } if (align_view == 7) { xml_aip = AsnIoOpen(blast_outputfile, "wx"); } if (myargs[ARG_QUERYLOC].strvalue) { Int4 start, end; Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end); options->required_start = start - 1; options->required_end = end -1; } done = FALSE; while (!done) { num_bsps = 0; total_length = 0; done = TRUE; SeqMgrHoldIndexing(TRUE); mask_slp = last_mask = NULL; while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL, believe_query, prefix, &ctr, &mask_slp)) != NULL) { if (!lcase_masking) /* Lower case ignored */ mask_slp = SeqLocFree(mask_slp); if (mask_slp) { if (!last_mask) options->query_lcase_mask = last_mask = mask_slp; else { last_mask->next = mask_slp; last_mask = last_mask->next; } mask_slp = NULL; } query_bsp = NULL; SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc); //debug: /* char query_buffer[255]; SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); fprintf(stderr, "===> query_buf=%s\n", query_buffer); */ if (query_bsp == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n"); return 2; } source = BioSourceNew(); source->org = OrgRefNew(); source->org->orgname = OrgNameNew(); source->org->orgname->gcode = options->genetic_code; ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source); query_bsp_array[num_bsps++] = query_bsp; total_length += query_bsp->length; if (total_length > myargs[ARG_MAXQUERY].intvalue || num_bsps >= MAX_NUM_QUERIES) { done = FALSE; break; } } if (num_bsps == 0) break; SeqMgrHoldIndexing(FALSE); other_returns = NULL; error_returns = NULL; if (out_type==MBLAST_ENDPOINTS) seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintEndpoints); else if (out_type==MBLAST_SEGMENTS) seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintSegments); else if (out_type==MBLAST_ALIGN_INFO) { /* -- Geo mod: do not print header PrintTabularOutputHeader(blast_database, (num_bsps==1) ? query_bsp_array[0] : NULL, NULL, "megablast", 0, believe_query, global_fp);*/ seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintAlignInfo); } else if (out_type==MBLAST_ALIGNMENTS) { seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, NULL, NULL, 0, NULL); } #ifdef OS_UNIX fflush(global_fp); #endif if (error_returns) { BlastErrorPrint(error_returns); for (vnp = error_returns; vnp; vnp = vnp->next) { BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue); } ValNodeFree(error_returns); } if (traditional_formatting) { dbinfo = NULL; ka_params = NULL; ka_params_gap = NULL; params_buffer = NULL; mask_loc = NULL; matrix = NULL; for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXDBINFO: dbinfo = vnp->data.ptrvalue; break; case TXKABLK_NOGAP: ka_params = vnp->data.ptrvalue; break; case TXKABLK_GAP: ka_params_gap = vnp->data.ptrvalue; break; case TXPARAMETERS: params_buffer = vnp->data.ptrvalue; break; case TXMATRIX: matrix = vnp->data.ptrvalue; break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue); break; default: break; } } #ifdef OS_UNIX if(align_view < 7) { fprintf(global_fp, "%s\n", " done"); } #endif if (myargs[ARG_MASKEDQUERY].strvalue) { if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL) ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n", myargs[ARG_MASKEDQUERY].strvalue); } hits_found = FALSE; mask_loc_start = next_mask_loc = mask_loc; mask_loc = NULL; if (align_view == 7) { mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, options, query_bsp_array[0], 0); } if (seqalign_array) { //results returned back for processing ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE); for (index=0; index<num_bsps; index++) { seqalign = seqalign_array[index]; if (next_mask_loc && SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), query_bsp_array[index]->id) == SIC_YES) { mask_loc = (SeqLocPtr) MemDup(next_mask_loc, sizeof(SeqLoc)); next_mask_loc = next_mask_loc->next; mask_loc->next = NULL; } if (mqfp) { /* convert mask locations from all sources into a single seqloc */ mask_slp = NULL; if (mask_loc) mask_slp = blastMergeFilterLocs(mask_slp, (SeqLocPtr)mask_loc->data.ptrvalue, FALSE, 0, 0); PrintMaskedSequence(query_bsp_array[index], mask_slp, mqfp, 50, lcase_masking); SeqLocSetFree(mask_slp); } if (seqalign==NULL) { mask_loc = MemFree(mask_loc); continue; } hits_found = TRUE; if (align_view < 7) { init_buff_ex(70); AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, believe_query, html); free_buff(); } if (align_view == 8 || align_view == 9) { if (align_view == 9) PrintTabularOutputHeader(blast_database, query_bsp_array[index], NULL, blast_program, 0, believe_query, global_fp); /* debug: char qbuf[512]; strcpy(qbuf, BioseqGetTitle(query_bsp_array[index])); fprintf(stderr, "---> Here: query title=%s\n", qbuf); */ BlastPrintTabulatedResults(seqalign, query_bsp_array[index], NULL, number_of_alignments, blast_program, !options->gapped_calculation, believe_query, 0, 0, global_fp, (align_view == 9)); ObjMgrFreeCache(0); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } //Geo mod: else if (align_view>=12) { MGBlastPrintTab(seqalign, query_bsp_array[index], number_of_alignments, !options->gapped_calculation, global_fp); ObjMgrFreeCache(0); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } else if(align_view == 7) { IterationPtr iterp; iterp = BXMLBuildOneQueryIteration(seqalign, NULL, FALSE, !options->gapped_calculation, index, NULL, query_bsp_array[index], mask_loc); IterationAsnWrite(iterp, mbxp->aip, mbxp->atp); AsnIoFlush(mbxp->aip); IterationFree(iterp); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; if (aip) { SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL); AsnIoReset(aip); } if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */ prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL); ObjMgrSetHold(); init_buff_ex(85); PrintDefLinesFromSeqAlign(prune->sap, 80, outfp, print_options, FIRST_PASS, NULL); free_buff(); prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune); seqannot->data = prune->sap; if (align_view != 0) ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, NULL); else ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc); seqannot->data = seqalign; prune = BlastPruneSapStructDestruct(prune); ObjMgrClearHold(); ObjMgrFreeCache(0); } seqannot = SeqAnnotFree(seqannot); mask_loc = MemFree(mask_loc); } /* End loop on seqaligns for different queries */ ReadDBBioseqFetchDisable(); } if (mbxp != NULL) { MBXmlClose(mbxp, other_returns, !options->gapped_calculation); } if (mqfp) FileClose(mqfp); if (!hits_found && align_view < 7) fprintf(outfp, "\n\n ***** No hits found ******\n\n"); matrix = BLAST_MatrixDestruct(matrix); if(html) fprintf(outfp, "<PRE>\n"); init_buff_ex(85); dbinfo_head = dbinfo; if(align_view < 7) { while (dbinfo) { PrintDbReport(dbinfo, 70, outfp); dbinfo = dbinfo->next; } } dbinfo_head = TxDfDbInfoDestruct(dbinfo_head); if (ka_params) { if(align_view < 7) PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE); MemFree(ka_params); } if (ka_params_gap) { if(align_view < 7) PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE); MemFree(ka_params_gap); } if(align_view < 7) PrintTildeSepLines(params_buffer, 70, outfp); MemFree(params_buffer); free_buff(); mask_loc = mask_loc_start; while (mask_loc) { SeqLocSetFree(mask_loc->data.ptrvalue); mask_loc = mask_loc->next; } ValNodeFree(mask_loc_start); } else { //not traditional formatting /* Just destruct all other_returns parts */ for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXDBINFO: TxDfDbInfoDestruct(vnp->data.ptrvalue); break; case TXKABLK_NOGAP: case TXKABLK_GAP: case TXPARAMETERS: MemFree(vnp->data.ptrvalue); break; case TXMATRIX: BLAST_MatrixDestruct(vnp->data.ptrvalue); break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: mask_loc = vnp->data.ptrvalue; SeqLocSetFree(mask_loc); default: break; } } } other_returns = ValNodeFree(other_returns); MemFree(seqalign_array); options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask); /* Freeing SeqEntries can be very expensive, do this only if this is not the last iteration of search */ if (!done) { for (index=0; index<num_bsps; index++) { sepp[index] = SeqEntryFree(sepp[index]); query_bsp_array[index] = NULL; } } total_processed += num_bsps; } /* End of loop on complete searches */ aip = AsnIoClose(aip); /*if (align_view == 7) xml_aip = AsnIoClose(xml_aip);*/ if (align_view < 7 && html) fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n"); if (align_view < 7 && myargs[ARG_LOGINFO].intvalue) fprintf(outfp, "Mega BLAST run finished, processed %d queries\n", total_processed); MemFree(query_bsp_array); MemFree(sepp); MemFree(qgaps_buf); MemFree(dbgaps_buf); options = BLASTOptionDelete(options); FileClose(infp); FileClose(outfp); return 0; }
Int2 Main_old (void) { AsnIoPtr aip; BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, subject_bsp = NULL; BioseqPtr bsp1, bsp2; BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL; BLAST_OptionsBlkPtr options=NULL; Boolean seq1_is_na, seq2_is_na; CharPtr params_buffer=NULL; DbtagPtr dbtagptr; Uint1 align_type; Uint4 align_options; SeqAlignPtr seqalign; SeqAnnotPtr seqannot; SeqEntryPtr sep = NULL, sep1 = NULL; CharPtr program_name, blast_outputfile; FILE *outfp; ValNodePtr mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL; BLAST_MatrixPtr matrix; Int4Ptr PNTR txmatrix; int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL; Boolean entrez_lookup = FALSE; Boolean html, seqannot_output, believe_query; Uint1 tabular_output; Boolean gapped_calculation; entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue; html = (Boolean) myargs[ARG_HTML].intvalue; seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL); blast_outputfile = myargs [ARG_OUT].strvalue; program_name = StringSave(myargs[ARG_PROGRAM].strvalue); if (StringCmp(program_name, "blastn") && StringCmp(program_name, "blastp") && StringCmp(program_name, "blastx") && StringCmp(program_name, "tblastn") && StringCmp(program_name, "tblastx")) { ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n"); return (1); } align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na); if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return (1); } gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue; believe_query = (seqannot_output || entrez_lookup); options = BLASTOptionNewEx(program_name, gapped_calculation, (Boolean) myargs[ARG_USEMEGABLAST].intvalue); if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1, &(options->query_lcase_mask), believe_query) == FALSE) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences"); return (1); } if (!entrez_lookup) { if (!believe_query) fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL); fake_subject_bsp = BioseqNew(); fake_subject_bsp->descr = subject_bsp->descr; fake_subject_bsp->repr = subject_bsp->repr; fake_subject_bsp->mol = subject_bsp->mol; fake_subject_bsp->length = subject_bsp->length; fake_subject_bsp->seq_data = subject_bsp->seq_data; fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type; dbtagptr = DbtagNew(); dbtagptr->db = StringSave("BL_ORD_ID"); dbtagptr->tag = ObjectIdNew(); if (BioseqGetTitle(subject_bsp) != NULL) dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp)); else dbtagptr->tag->str = StringSave("No definition line found"); ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr); bsp1 = (believe_query ? query_bsp : fake_bsp); bsp2 = fake_subject_bsp; } else { bsp1 = query_bsp; bsp2 = subject_bsp; } tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; if (myargs[ARG_SEARCHSP].floatvalue) options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue; options->filter_string = StringSave(myargs[ARG_FILTER].strvalue); options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue; if (StringICmp("blastn", program_name) == 0) { options->penalty = myargs[ARG_MISMATCH].intvalue; options->reward = myargs[ARG_MATCH].intvalue; } options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue; options->discontinuous = FALSE; if (myargs[ARG_XDROP].intvalue != 0) { options->gap_x_dropoff = myargs[ARG_XDROP].intvalue; } if (myargs[ARG_WORDSIZE].intvalue != 0) options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue; if (options->is_megablast_search) { options->cutoff_s2 = options->wordsize*options->reward; } options->matrix = MemFree(options->matrix); BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); if (myargs[ARG_GAPOPEN].intvalue != -1) options->gap_open = myargs[ARG_GAPOPEN].intvalue; if (myargs[ARG_GAPEXT].intvalue != -1) options->gap_extend = myargs[ARG_GAPEXT].intvalue; options->strand_option = myargs[ARG_STRAND].intvalue; /* Input longest intron length is in nucleotide scale; in the lower level code it will be used in protein scale */ if (myargs[ARG_INTRON].intvalue > 0) options->longest_intron = myargs[ARG_INTRON].intvalue; if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) { seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, options, &other_returns, &error_returns, handle_results); } else { SeqLocPtr slp1=NULL, slp2=NULL; if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE) return 1; seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL); SeqLocFree(slp1); SeqLocFree(slp2); } if (error_returns) { BlastErrorPrint(error_returns); for (vnp = error_returns; vnp; vnp = vnp->next) { BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue); } ValNodeFree(error_returns); } ka_params = NULL; ka_params_gap = NULL; params_buffer = NULL; mask_loc = NULL; matrix = NULL; txmatrix = NULL; for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXKABLK_NOGAP: ka_params = vnp->data.ptrvalue; break; case TXKABLK_GAP: ka_params_gap = vnp->data.ptrvalue; break; case TXPARAMETERS: params_buffer = vnp->data.ptrvalue; break; case TXMATRIX: matrix = vnp->data.ptrvalue; if (matrix && !tabular_output) txmatrix = BlastMatrixToTxMatrix(matrix); break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue); break; default: break; } } if (!tabular_output || seqannot_output) { align_options = 0; align_options += TXALIGN_MATRIX_VAL; align_options += TXALIGN_SHOW_QS; align_options += TXALIGN_COMPRESS; align_options += TXALIGN_END_NUM; if (StringICmp("blastx", program_name) == 0) { align_options += TXALIGN_BLASTX_SPECIAL; } if (html) align_options += TXALIGN_HTML; seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; aip = NULL; if (seqannot_output) aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w"); if (aip && seqannot) { SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL); AsnIoReset(aip); aip = AsnIoClose(aip); } } if (!tabular_output) { AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html); ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc); seqannot = SeqAnnotFree(seqannot); if (txmatrix) txmatrix = TxMatrixDestruct(txmatrix); init_buff_ex(85); if (ka_params) { PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE); } if (ka_params_gap) { PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE); } PrintTildeSepLines(params_buffer, 70, outfp); free_buff(); } else { PrintTabularOutputHeader(NULL, query_bsp, NULL, program_name, 0, believe_query, outfp); BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 1, program_name, !gapped_calculation, believe_query, 0, 0, outfp, FALSE); SeqAlignSetFree(seqalign); } matrix = BLAST_MatrixDestruct(matrix); MemFree(ka_params); MemFree(ka_params_gap); MemFree(params_buffer); mask_loc_start = mask_loc; while (mask_loc) { SeqLocSetFree(mask_loc->data.ptrvalue); mask_loc = mask_loc->next; } ValNodeFree(mask_loc_start); fake_bsp = BlastDeleteFakeBioseq(fake_bsp); other_returns = ValNodeFree(other_returns); options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask); options = BLASTOptionDelete(options); MemFree(program_name); FileClose(outfp); if (entrez_lookup) { BioseqFree(query_bsp); BioseqFree(subject_bsp); } else { SeqEntryFree(sep); SeqEntryFree(sep1); } return 0; }
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, SeqLoc* query_slp, SeqLoc* mask_loc_head, BlastFormattingInfo* format_info, Blast_SummaryReturn* sum_returns) { SeqLoc* mask_loc; SeqLoc* next_mask_loc = NULL; SeqLoc* tmp_loc = NULL; Uint1 align_type; Boolean db_is_na; Int4 query_index; SeqLoc* slp; SeqLoc* mask_slp; AsnIo* aip = NULL; MBXml* xmlp = NULL; FILE *outfp = NULL; BlastFormattingOptions* format_options; EAlignView align_view; Boolean ungapped; ASSERT(format_info && format_info->format_options && format_info->search_options && query_slp); format_options = format_info->format_options; align_view = format_options->align_view; ungapped = !format_info->search_options->score_options->gapped_calculation; if (align_view == eAlignViewXml) { const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion of query sequence is desired in the XML output header. */ xmlp = format_info->xmlp; if (!xmlp) { xmlp = format_info->xmlp = s_MBXmlInit(format_info->aip, format_info->program_name, format_info->db_name, query_slp, kXmlFlag, sum_returns->search_params); } } else if (align_view == eAlignViewAsnText || align_view == eAlignViewAsnBinary) aip = format_info->aip; else outfp = format_info->outfp; align_type = GetOldAlignType(format_info->search_options->program, &db_is_na); if (format_info->db_name) { /* Enable fetching from the BLAST database. */ ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE); /* If database is translated, set the genetic code for tranlation. */ if (Blast_SubjectIsTranslated(format_info->search_options->program)) { ReadDBBioseqSetDbGeneticCode(format_info->search_options-> db_options->genetic_code); } } if(format_info->search_options->score_options->is_ooframe) { ErrPostEx(SEV_WARNING, 0, 0, "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments"); } slp = query_slp; mask_loc = mask_loc_head; for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next) { Bioseq* bsp = NULL; SeqAlignPtr seqalign = seqalign_arr->array[query_index]; /* Find which query the current SeqAlign is for */ SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign); if (seqalign == NULL) { if (align_view < eAlignViewXml) s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp); /* this query has no results. */ else if (align_view == eAlignViewXml) { /* Retrieve this query's Bioseq */ Iteration* iterp; /* Call to TxGetQueryIdFromSeqAlign returned NULL. */ query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, "No hits found", bsp, NULL); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); BioseqUnlock(bsp); } else if (align_view == eAlignViewTabularWithComments) { query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BioseqUnlock(bsp); } continue; } format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */ /* Find the masking location for this query. Initialize next_mask_loc to the current start of the chain, in case nothing for this query will be found. */ next_mask_loc = mask_loc; for ( ; mask_loc; mask_loc = mask_loc->next) { mask_slp = (SeqLoc*) mask_loc->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) { break; } } /* Unlink the masking location for this query and save the next one */ if (mask_loc) { for (next_mask_loc = mask_loc; next_mask_loc->next; next_mask_loc = next_mask_loc->next) { mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) != SIC_YES) { break; } } tmp_loc = next_mask_loc; next_mask_loc = next_mask_loc->next; tmp_loc->next = NULL; } /* On the next iteration we can start from the next query */ /* Retrieve this query's Bioseq */ bsp = BioseqLockById(query_id); if (align_view < eAlignViewXml) { if (format_info->head_on_every_query == TRUE) BLAST_PrintOutputHeader(format_info); init_buff_ex(70); AcknowledgeBlastQuery(bsp, 70, outfp, format_options->believe_query, format_options->html); free_buff(); if (format_info->head_on_every_query == TRUE) { s_BLAST_PrintDatabaseInfo(format_info); fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n"); } } if (align_view == eAlignViewTabular || align_view == eAlignViewTabularWithComments) { if (align_view == eAlignViewTabularWithComments) PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BlastPrintTabulatedResults(seqalign, bsp, NULL, format_options->number_of_alignments, format_info->program_name, ungapped, format_options->believe_query, 0, 0, outfp, (Boolean)(align_view == eAlignViewTabularWithComments)); } else if(align_view == eAlignViewXml) { Iteration* iterp; ASSERT(xmlp && xmlp->aip); /* The index of this "query iteration" is the query_index in the current formatting round, plus the number of previously formatted queries. */ iterp = s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, NULL, bsp, mask_loc); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } else { SeqAnnot* seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; if (aip) { SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL); AsnIoReset(aip); } if (outfp) { BlastPruneSapStruct* prune; Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix); ObjMgrSetHold(); init_buff_ex(85); PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, format_options->print_options, FIRST_PASS, NULL, format_options->number_of_descriptions, NULL, NULL); free_buff(); /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign * we're making a COPY of the seqalign to print it out! Clearly * this could use a better design */ prune = BlastPruneHitsFromSeqAlign(seqalign, format_options->number_of_alignments, NULL); seqannot->data = prune->sap; if(format_info->search_options->score_options->is_ooframe) { OOFShowBlastAlignment(prune->sap, mask_loc, outfp, format_options->align_options, NULL); } else if (align_view != eAlignViewPairwise) { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, NULL); } else { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, FormatScoreFunc); } s_DeleteMatrix(matrix); seqannot->data = seqalign; prune = BlastPruneSapStructDestruct(prune); ObjMgrClearHold(); } /* Set data to NULL, because we do not free Seq-align here. */ seqannot->data = NULL; seqannot = SeqAnnotFree(seqannot); } BioseqUnlock(bsp); /* Relink the mask locations so chain can be freed in the end. The 'tmp_loc' variable points to the location that was unlinked. */ if (tmp_loc) tmp_loc->next = next_mask_loc; mask_loc = next_mask_loc; ObjMgrFreeCache(0); } /* End loop on seqaligns for different queries */ /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */ if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) { /* extra output only if no hits at all, otherwise "for loop" logic should take care*/ Iteration* iterp; iterp = IterationNew(); iterp->iter_num = 1; iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped); ASSERT(xmlp && xmlp->aip); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } if (format_info->db_name) { /* Free the database translation tables, if applicable. */ TransTableFreeAll(); ReadDBBioseqFetchDisable(); } /* Update the count of the formatted queries. */ format_info->num_formatted += num_queries; return 0; }