Esempio n. 1
0
static Int2 Main_old (void)
 
{
   AsnIoPtr aip, xml_aip = NULL;
   BioseqPtr query_bsp, PNTR query_bsp_array;
   BioSourcePtr source;
   BLAST_MatrixPtr matrix;
   BLAST_OptionsBlkPtr options;
   BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
   BlastPruneSapStructPtr prune;
   Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
   Boolean html=FALSE;
   CharPtr params_buffer=NULL;
   Int4 number_of_descriptions, number_of_alignments;
   SeqAlignPtr  seqalign, PNTR seqalign_array;
   SeqAnnotPtr seqannot;
   SeqEntryPtr PNTR sepp;
   TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
   Uint1 align_type, align_view, out_type;
   Uint4 align_options, print_options;
   ValNodePtr mask_loc, mask_loc_start, next_mask_loc;
   ValNodePtr vnp, other_returns, error_returns;
   
   CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
   FILE *infp, *outfp, *mqfp=NULL;
   Int4 index, num_bsps, total_length, total_processed = 0;
   Int2 ctr = 1;
   Char prefix[2];
   SeqLocPtr last_mask, mask_slp;
   Boolean done, hits_found;
   Boolean lcase_masking;
   MBXmlPtr mbxp = NULL;
   Boolean traditional_formatting;

    blast_program = "blastn";
    blast_database = myargs [ARG_DB].strvalue;
    blast_inputfile = myargs [ARG_QUERY].strvalue;
    blast_outputfile = myargs [ARG_OUT].strvalue;
    if (myargs[ARG_HTML].intvalue)
        html = TRUE;

    if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
       ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile);
       return (1);
    }

    align_view = (Int1) myargs[ARG_FORMAT].intvalue;
    /* Geo mod: 
      -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on
    */
    out_type=(Int1) myargs[ARG_OUTTYPE].intvalue;
    if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) {
      align_view = 12 + (out_type-MGBLAST_FLTHITS ); 
      out_type=MBLAST_ALIGNMENTS;
      //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format
      // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1
       if (align_view>12) { // this is MGBLAST_HITGAPS output
            gap_Info=TRUE;
            if (dbgaps_buf==NULL)
                  dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1);
            if (qgaps_buf==NULL) 
                qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1);
            }
      }

    outfp = NULL;

    traditional_formatting = 
        (out_type == MBLAST_ALIGNMENTS ||
         out_type == MBLAST_DELAYED_TRACEBACK);

    if ((!traditional_formatting ||
            (align_view != 7 && align_view != 10 && align_view != 11)) && 
            blast_outputfile != NULL) {
       if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
          ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
          return (1);
       }
    }

    //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    /*
    if (!traditional_formatting)
        believe_query = TRUE;
    else
        believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
    */
    //Geo mod: 
    believe_query=FALSE;
    //If ASN.1 output is requested and believe_query is not set to TRUE,
    //   exit with an error.    
    if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
                           align_view == 10 || align_view == 11)) {
        ErrPostEx(SEV_FATAL, 1, 0, 
                  "-J option must be TRUE to produce ASN.1 output; before "
                  "changing -J to TRUE please also ensure that all query "
                  "sequence identifiers are unique");
        return -1;
    }
        
    options = BLASTOptionNewEx(blast_program, TRUE, TRUE);
    if (options == NULL)
        return 3;

    options->do_sum_stats = FALSE;
    options->is_neighboring = FALSE;
        options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
    number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;    
    number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;    
    options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);

    if (myargs[ARG_XDROP].intvalue != 0)
           options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
    if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0)
           options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue;
        if (myargs[ARG_XDROP_FINAL].intvalue != 0)
           options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;

    if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0)
       options->filter_string = StringSave("D");
    else
       options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
    
    show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
    options->penalty = myargs[ARG_MISMATCH].intvalue;
    options->reward = myargs[ARG_MATCH].intvalue;
        if (myargs[ARG_GAPOPEN].intvalue >= 0)
        options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue >= 0)
        options->gap_extend = myargs[ARG_GAPEXT].intvalue;

    if (options->gap_open == 0 && options->reward % 2 == 0 && 
        options->gap_extend == options->reward / 2 - options->penalty)
       /* This is the default value */
    options->gap_extend = 0;

    options->genetic_code = 1;
    options->db_genetic_code = 1; /* Default; it's not needed here anyway */
    options->number_of_cpus = myargs[ARG_THREADS].intvalue;
    if (myargs[ARG_WORDSIZE].intvalue != 0)
           options->wordsize = myargs[ARG_WORDSIZE].intvalue;
        if (myargs[ARG_MINSCORE].intvalue == 0)
           options->cutoff_s2 = options->wordsize*options->reward;
        else 
           options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue;

        options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
        options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;

    options->perform_culling = FALSE;
    /* Kludge */
    options->block_width  = myargs[ARG_MAXPOS].intvalue;

    options->strand_option = myargs[ARG_STRAND].intvalue;
        options->window_size = myargs[ARG_WINDOW].intvalue;
#ifdef DO_NOT_SUPPRESS_BLAST_OP        
        options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue;
        if (myargs[ARG_TEMPL_LEN].intvalue != 0)
            options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue;
        options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue;
#endif
        lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue;
        /* Allow dynamic programming gapped extension only with affine 
           gap scores */
        if (options->gap_open != 0 || options->gap_extend != 0)
           options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue;

        print_options = 0;
        align_options = 0;
        align_options += TXALIGN_COMPRESS;
        align_options += TXALIGN_END_NUM;
        if (show_gi) {
       align_options += TXALIGN_SHOW_GI;
       print_options += TXALIGN_SHOW_GI;
        }
            
        if (align_view) {
       align_options += TXALIGN_MASTER;
       if (align_view == 1 || align_view == 3)
          align_options += TXALIGN_MISMATCH;
       if (align_view == 3 || align_view == 4 || align_view == 6)
          align_options += TXALIGN_FLAT_INS;
       if (align_view == 5 || align_view == 6)
          align_options += TXALIGN_BLUNT_END;
        } else {
       align_options += TXALIGN_MATRIX_VAL;
       align_options += TXALIGN_SHOW_QS;
    }

    if (html) {
       align_options += TXALIGN_HTML;
       print_options += TXALIGN_HTML;
    }

    if (myargs[ARG_GILIST].strvalue)
       options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
   
    if (out_type == MBLAST_ENDPOINTS)
      options->no_traceback = 1;
   else if (out_type == MBLAST_DELAYED_TRACEBACK)
       options->no_traceback = 2;
    else
       options->no_traceback = 0;

    options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue;
    options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue;
    options->hsp_num_max = myargs[ARG_MAXHSP].intvalue;

    if (!believe_query)
           options->megablast_full_deflines = TRUE;
        /*if (options->megablast_full_deflines)
          believe_query = FALSE;*/

    query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr));
    sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr));

    StrCpy(prefix, "");

    global_fp = outfp;
        options->output = outfp;

    if (traditional_formatting) {
       if (align_view < 7) {
              if (html) {
                 fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
                 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
                         "VLINK=\"#660099\" ALINK=\"#660099\">\n");
                 fprintf(outfp, "<PRE>\n");
              }
              init_buff_ex(90);
              BlastPrintVersionInfo("mgblast", html, outfp);
              fprintf(outfp, "\n");
              MegaBlastPrintReference(html, 90, outfp);
              fprintf(outfp, "\n");
              
              if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
                 return 1;
              
              free_buff();
    
#ifdef OS_UNIX
              fprintf(global_fp, "%s", "Searching");
#endif
           }
    }
    
        aip = NULL;
        if (myargs[ARG_ASNOUT].strvalue != NULL) {
           if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
              ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
              return 1;
           }
        }
        else if (align_view == 10 || align_view == 11)
        {
            const char* mode = (align_view == 10) ? "w" : "wb";
            if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
                    ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
                    return 1;
            }
        }


        if (align_view == 7) {
           xml_aip = AsnIoOpen(blast_outputfile, "wx");
        }

        if (myargs[ARG_QUERYLOC].strvalue) {       
            Int4 start, end;
            Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
            options->required_start = start - 1;
            options->required_end = end -1;
        }

    done = FALSE;
    while (!done) {
       num_bsps = 0;
       total_length = 0;
       done = TRUE;
       SeqMgrHoldIndexing(TRUE);
       mask_slp = last_mask = NULL;
   
       while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL,
                               believe_query, prefix, &ctr, 
                               &mask_slp)) != NULL) {
              if (!lcase_masking) /* Lower case ignored */
                 mask_slp = SeqLocFree(mask_slp);
         if (mask_slp) {
           if (!last_mask)
              options->query_lcase_mask = last_mask = mask_slp;
           else {
              last_mask->next = mask_slp;
              last_mask = last_mask->next;
              }
           mask_slp = NULL;
           }
          query_bsp = NULL;
         SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc);
         //debug:
         /*
         char query_buffer[255];
         SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
         fprintf(stderr, "===> query_buf=%s\n", query_buffer);
         */
         if (query_bsp == NULL) {
           ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
           return 2;
          }
          
          source = BioSourceNew();
          source->org = OrgRefNew();
          source->org->orgname = OrgNameNew();
          source->org->orgname->gcode = options->genetic_code;
          ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
          
          query_bsp_array[num_bsps++] = query_bsp;
          
          total_length += query_bsp->length;
          if (total_length > myargs[ARG_MAXQUERY].intvalue || 
          num_bsps >= MAX_NUM_QUERIES) {
         done = FALSE;
         break;
          }
       }

           if (num_bsps == 0)
               break;

       SeqMgrHoldIndexing(FALSE);
       other_returns = NULL;
       error_returns = NULL;
       
       if (out_type==MBLAST_ENDPOINTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0, 
                             MegaBlastPrintEndpoints);
       else if (out_type==MBLAST_SEGMENTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                             MegaBlastPrintSegments);
       else if (out_type==MBLAST_ALIGN_INFO) {
              /* -- Geo mod: do not print header
              PrintTabularOutputHeader(blast_database, 
                                       (num_bsps==1) ? query_bsp_array[0] : NULL,
                                       NULL, "megablast", 0, believe_query,
                                       global_fp);*/
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                                MegaBlastPrintAlignInfo);
       } else if (out_type==MBLAST_ALIGNMENTS) {
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                  blast_database, options, &other_returns, 
                                  &error_returns, align_view < 7 ? tick_callback : NULL,
                                  NULL, NULL, 0, NULL);
          }
       
#ifdef OS_UNIX
       fflush(global_fp);
#endif

       if (error_returns) {
             BlastErrorPrint(error_returns);
              for (vnp = error_returns; vnp; vnp = vnp->next) {
                 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
              }
              ValNodeFree(error_returns);
           }
              
              
       if (traditional_formatting) {
          dbinfo = NULL;
          ka_params = NULL;
          ka_params_gap = NULL;
          params_buffer = NULL;
          mask_loc = NULL;
          matrix = NULL;
          for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXDBINFO:
              dbinfo = vnp->data.ptrvalue;
              break;
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
          }    
          
#ifdef OS_UNIX
        if(align_view < 7) {
                 fprintf(global_fp, "%s\n", " done");
              }
#endif
          
        if (myargs[ARG_MASKEDQUERY].strvalue) {
                 if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL)
                    ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n",
                              myargs[ARG_MASKEDQUERY].strvalue);
              }

        hits_found = FALSE;

        mask_loc_start = next_mask_loc = mask_loc;
        mask_loc = NULL;

        if (align_view == 7) {
           mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, 
                             options, query_bsp_array[0], 0);
           }

        if (seqalign_array) { //results returned back for processing
             ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE);
             for (index=0; index<num_bsps; index++) {
                    seqalign = seqalign_array[index];
                    if (next_mask_loc && 
                        SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), 
                                  query_bsp_array[index]->id) == SIC_YES) {
                       mask_loc = (SeqLocPtr) 
                       MemDup(next_mask_loc, sizeof(SeqLoc));
                       next_mask_loc = next_mask_loc->next;
                       mask_loc->next = NULL;
                    }
                    if (mqfp) {
                       /* convert mask locations from all sources into
                          a single seqloc */
                       mask_slp = NULL;
                       if (mask_loc) 
                          mask_slp = blastMergeFilterLocs(mask_slp, 
                              (SeqLocPtr)mask_loc->data.ptrvalue,
                              FALSE, 0, 0);
                       PrintMaskedSequence(query_bsp_array[index], mask_slp,
                                           mqfp, 50, lcase_masking);
                       SeqLocSetFree(mask_slp);
                       }
                    if (seqalign==NULL) {
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    hits_found = TRUE;
                    if (align_view < 7) {
                       init_buff_ex(70);
                       AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, 
                                             believe_query, html);
                       free_buff();
                       }
                    if (align_view == 8 || align_view == 9) {
                       if (align_view == 9)
                          PrintTabularOutputHeader(blast_database, 
                             query_bsp_array[index], NULL, blast_program, 0,
                             believe_query, global_fp);
                       /* debug:
                       char qbuf[512];
                       strcpy(qbuf, BioseqGetTitle(query_bsp_array[index]));
                       fprintf(stderr, "---> Here: query title=%s\n", qbuf);
                       */
                       BlastPrintTabulatedResults(seqalign, 
                           query_bsp_array[index], NULL, number_of_alignments,
                            blast_program, !options->gapped_calculation, 
                            believe_query, 0, 0, 
                            global_fp, (align_view == 9));
                            

                       ObjMgrFreeCache(0);

                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    } 
                       //Geo mod:   
                   else if (align_view>=12)  {
                        MGBlastPrintTab(seqalign, 
                            query_bsp_array[index], number_of_alignments,
                            !options->gapped_calculation, 
                            global_fp);
                        ObjMgrFreeCache(0);

                        SeqAlignSetFree(seqalign);
                        mask_loc = MemFree(mask_loc);
                        continue;
                        }
                    else if(align_view == 7) {
                       IterationPtr iterp;

                       iterp = BXMLBuildOneQueryIteration(seqalign, 
                                  NULL, FALSE, 
                                  !options->gapped_calculation, index, 
                                  NULL, query_bsp_array[index], mask_loc);
                       IterationAsnWrite(iterp, mbxp->aip, mbxp->atp);
                       AsnIoFlush(mbxp->aip);
                       IterationFree(iterp);
                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    seqannot = SeqAnnotNew();
                    seqannot->type = 2;
                    AddAlignInfoToSeqAnnot(seqannot, align_type);
                    seqannot->data = seqalign;
                    if (aip) {
                       SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
                       AsnIoReset(aip);
                    }
                    if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL);
                       ObjMgrSetHold();
                       init_buff_ex(85);
                       PrintDefLinesFromSeqAlign(prune->sap, 80,
                                                 outfp, print_options, FIRST_PASS, NULL);
                       free_buff();
                       
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune);
                       seqannot->data = prune->sap;
                       if (align_view != 0)
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL,
                                                 NULL, align_options, NULL, 
                                                 mask_loc, NULL);
                       else
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc);
                       seqannot->data = seqalign;
                       prune = BlastPruneSapStructDestruct(prune);
                       ObjMgrClearHold();
                       ObjMgrFreeCache(0);
                    }
                    seqannot = SeqAnnotFree(seqannot);
                    mask_loc = MemFree(mask_loc);
                 } /* End loop on seqaligns for different queries */
                 ReadDBBioseqFetchDisable();
              } 

              if (mbxp != NULL) {
                 MBXmlClose(mbxp, other_returns, !options->gapped_calculation);
              }

              if (mqfp)
                 FileClose(mqfp);

              if (!hits_found && align_view < 7)
                 fprintf(outfp, "\n\n ***** No hits found ******\n\n");

              matrix = BLAST_MatrixDestruct(matrix);
          
              if(html) 
                 fprintf(outfp, "<PRE>\n");
              init_buff_ex(85);
              dbinfo_head = dbinfo;
              if(align_view < 7) {
                 while (dbinfo) {
                    PrintDbReport(dbinfo, 70, outfp);
                    dbinfo = dbinfo->next;
                 }
              }
              dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
              
              if (ka_params) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
                 MemFree(ka_params);
              }
              if (ka_params_gap) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
                 MemFree(ka_params_gap);
              }
              if(align_view < 7)
                 PrintTildeSepLines(params_buffer, 70, outfp);
              MemFree(params_buffer);
              free_buff();
              mask_loc = mask_loc_start;
              while (mask_loc) {
                 SeqLocSetFree(mask_loc->data.ptrvalue);
                 mask_loc = mask_loc->next;
              }
              ValNodeFree(mask_loc_start);
       } else { //not traditional formatting
          /* Just destruct all other_returns parts */
          for (vnp=other_returns; vnp; vnp = vnp->next) {
         switch (vnp->choice) {
         case TXDBINFO:
            TxDfDbInfoDestruct(vnp->data.ptrvalue);
            break;
         case TXKABLK_NOGAP:
         case TXKABLK_GAP:
         case TXPARAMETERS:
            MemFree(vnp->data.ptrvalue);
            break;
         case TXMATRIX:
            BLAST_MatrixDestruct(vnp->data.ptrvalue);
            break;
         case SEQLOC_MASKING_NOTSET:
         case SEQLOC_MASKING_PLUS1:
         case SEQLOC_MASKING_PLUS2:
         case SEQLOC_MASKING_PLUS3:
         case SEQLOC_MASKING_MINUS1:
         case SEQLOC_MASKING_MINUS2:
         case SEQLOC_MASKING_MINUS3:
                    mask_loc = vnp->data.ptrvalue;
                    SeqLocSetFree(mask_loc);
         default:
            break;
         }
          }
       }
       other_returns = ValNodeFree(other_returns);
       MemFree(seqalign_array);
           options->query_lcase_mask = 
              SeqLocSetFree(options->query_lcase_mask);

       /* Freeing SeqEntries can be very expensive, do this only if 
          this is not the last iteration of search */
       if (!done) { 
          for (index=0; index<num_bsps; index++) {
         sepp[index] = SeqEntryFree(sepp[index]);
         query_bsp_array[index] = NULL;
          }       
           }
           total_processed += num_bsps;
    } /* End of loop on complete searches */
        
        aip = AsnIoClose(aip);

        /*if (align_view == 7)
          xml_aip = AsnIoClose(xml_aip);*/

        if (align_view < 7 && html) 
           fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
        if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
           fprintf(outfp, "Mega BLAST run finished, processed %d queries\n",
                   total_processed);
    MemFree(query_bsp_array);
    MemFree(sepp);
    MemFree(qgaps_buf);
    MemFree(dbgaps_buf);
    options = BLASTOptionDelete(options);
    FileClose(infp);
        FileClose(outfp);
    
    return 0;
}
Esempio n. 2
0
Int2 Main_old (void)
 
{
	
	AsnIoPtr aip;
	BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, 
                  subject_bsp = NULL;
        BioseqPtr bsp1, bsp2;
	BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
	BLAST_OptionsBlkPtr options=NULL;
	Boolean seq1_is_na, seq2_is_na;
	CharPtr params_buffer=NULL;
        DbtagPtr        dbtagptr;
	Uint1 align_type;
	Uint4 align_options;
	SeqAlignPtr  seqalign;
        SeqAnnotPtr seqannot;
	SeqEntryPtr sep = NULL, sep1 = NULL;
	CharPtr program_name, blast_outputfile;
	FILE *outfp;
	ValNodePtr  mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
	BLAST_MatrixPtr matrix;
        Int4Ptr PNTR txmatrix;
        int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
        Boolean entrez_lookup = FALSE;
        Boolean html, seqannot_output, believe_query;
        Uint1 tabular_output;
        Boolean gapped_calculation;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        html = (Boolean) myargs[ARG_HTML].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);

        blast_outputfile = myargs [ARG_OUT].strvalue;

	program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
	if (StringCmp(program_name, "blastn") && 
	    StringCmp(program_name, "blastp") && 
	    StringCmp(program_name, "blastx") && 
	    StringCmp(program_name, "tblastn") && 
	    StringCmp(program_name, "tblastx")) {
		ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
		return (1);
	}
	   
	align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);

	if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
	{
		ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
		return (1);
	}

        gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
        believe_query = (seqannot_output || entrez_lookup); 

        options = BLASTOptionNewEx(program_name, gapped_calculation,
                                   (Boolean) myargs[ARG_USEMEGABLAST].intvalue);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &(options->query_lcase_mask), 
                                believe_query) == FALSE)
        {
            ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
            return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else {
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; 


    	if (myargs[ARG_SEARCHSP].floatvalue)
           options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;


	options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
	options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;

        if (StringICmp("blastn", program_name) == 0)
        {
                options->penalty = myargs[ARG_MISMATCH].intvalue;
                options->reward = myargs[ARG_MATCH].intvalue;
        }

	options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;

	options->discontinuous = FALSE;

        if (myargs[ARG_XDROP].intvalue != 0)
	{
               options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
	}
        if (myargs[ARG_WORDSIZE].intvalue != 0)
               options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;

	if (options->is_megablast_search) {
	   options->cutoff_s2 = options->wordsize*options->reward;
        }
	options->matrix = MemFree(options->matrix);
        BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 

        if (myargs[ARG_GAPOPEN].intvalue != -1)
              options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue != -1)
               options->gap_extend = myargs[ARG_GAPEXT].intvalue;

	options->strand_option = myargs[ARG_STRAND].intvalue;

        /* Input longest intron length is in nucleotide scale; in the lower 
           level code it will be used in protein scale */
        if (myargs[ARG_INTRON].intvalue > 0) 
           options->longest_intron = myargs[ARG_INTRON].intvalue;


        if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
           seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, 
              options, &other_returns, &error_returns, handle_results);
        } else {
            SeqLocPtr slp1=NULL, slp2=NULL;
            if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
                return 1;
           seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
           SeqLocFree(slp1);
           SeqLocFree(slp2);
        }

        if (error_returns) {
           BlastErrorPrint(error_returns);
           for (vnp = error_returns; vnp; vnp = vnp->next) {
              BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
           }
           ValNodeFree(error_returns);
        }
       
        ka_params = NULL;
        ka_params_gap = NULL;
        params_buffer = NULL;
        mask_loc = NULL;
        matrix = NULL;
        txmatrix = NULL;
        for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              if (matrix && !tabular_output)
                 txmatrix = BlastMatrixToTxMatrix(matrix);
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
        }	
        if (!tabular_output || seqannot_output) {
           align_options = 0;
           align_options += TXALIGN_MATRIX_VAL;
           align_options += TXALIGN_SHOW_QS;
           align_options += TXALIGN_COMPRESS;
           align_options += TXALIGN_END_NUM;
           if (StringICmp("blastx", program_name) == 0) {
              align_options += TXALIGN_BLASTX_SPECIAL;
           }
           
           if (html)
              align_options += TXALIGN_HTML;

           seqannot = SeqAnnotNew();
           seqannot->type = 2;
           AddAlignInfoToSeqAnnot(seqannot, align_type);
           seqannot->data = seqalign;
           aip = NULL;
           if (seqannot_output)
              aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
           
           if (aip && seqannot) {
              SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
              AsnIoReset(aip);
              aip = AsnIoClose(aip);
           }
        }
        if (!tabular_output) {    
           AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
           ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
           
           seqannot = SeqAnnotFree(seqannot);
           if (txmatrix)
              txmatrix = TxMatrixDestruct(txmatrix);
           init_buff_ex(85);
        
           if (ka_params) {
              PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
           }
        
           if (ka_params_gap) {
              PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
           }
        
           PrintTildeSepLines(params_buffer, 70, outfp);
           free_buff();
        } else {
           PrintTabularOutputHeader(NULL, query_bsp, NULL, 
              program_name, 0, believe_query, outfp);

           BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 
              1, program_name, !gapped_calculation,
              believe_query, 0, 0, outfp, FALSE);
           SeqAlignSetFree(seqalign);
        }

        matrix = BLAST_MatrixDestruct(matrix);
        MemFree(ka_params);
        MemFree(ka_params_gap);
        MemFree(params_buffer);
    
        mask_loc_start = mask_loc;
        while (mask_loc) {
           SeqLocSetFree(mask_loc->data.ptrvalue);
           mask_loc = mask_loc->next;
        }
        ValNodeFree(mask_loc_start);
        
        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);

        other_returns = ValNodeFree(other_returns);
    options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
	options = BLASTOptionDelete(options);
	MemFree(program_name);
	FileClose(outfp);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }
	return 0;
}
Esempio n. 3
0
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, 
        SeqLoc* query_slp, SeqLoc* mask_loc_head, 
        BlastFormattingInfo* format_info,
        Blast_SummaryReturn* sum_returns)
{  
   SeqLoc* mask_loc;
   SeqLoc* next_mask_loc = NULL;
   SeqLoc* tmp_loc = NULL;
   Uint1 align_type;
   Boolean db_is_na;
   Int4 query_index;
   SeqLoc* slp;
   SeqLoc* mask_slp;
   AsnIo* aip = NULL;
   MBXml* xmlp = NULL;
   FILE *outfp = NULL;
   BlastFormattingOptions* format_options;
   EAlignView align_view;
   Boolean ungapped;

   ASSERT(format_info && format_info->format_options && 
          format_info->search_options && query_slp);

   format_options = format_info->format_options;
   align_view = format_options->align_view;
   ungapped = 
       !format_info->search_options->score_options->gapped_calculation;

   if (align_view == eAlignViewXml) {
       const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion
                                   of query sequence is desired in the XML
                                   output header. */
       xmlp = format_info->xmlp;
       if (!xmlp) {
           xmlp = format_info->xmlp = 
               s_MBXmlInit(format_info->aip, format_info->program_name, 
                           format_info->db_name, query_slp, kXmlFlag, 
                           sum_returns->search_params);
       }
   } else if (align_view == eAlignViewAsnText || 
              align_view == eAlignViewAsnBinary)
       aip = format_info->aip; 
   else 
       outfp = format_info->outfp;

   align_type = 
       GetOldAlignType(format_info->search_options->program, &db_is_na);

   if (format_info->db_name) {
       /* Enable fetching from the BLAST database. */
      ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE);
      /* If database is translated, set the genetic code for tranlation. */
      if (Blast_SubjectIsTranslated(format_info->search_options->program)) {
          ReadDBBioseqSetDbGeneticCode(format_info->search_options->
                                       db_options->genetic_code);
      }
   }

   if(format_info->search_options->score_options->is_ooframe) {
        ErrPostEx(SEV_WARNING, 0, 0, 
         "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments");
   }


   slp = query_slp;
   mask_loc = mask_loc_head;
  
   for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next)
   {
      Bioseq* bsp = NULL;
      SeqAlignPtr seqalign = seqalign_arr->array[query_index];
      /* Find which query the current SeqAlign is for */
      SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign);
      if (seqalign == NULL)
      {
            if (align_view < eAlignViewXml)
                s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp);  /* this query has no results. */
            else if (align_view == eAlignViewXml)
            {
                /* Retrieve this query's Bioseq */
                Iteration* iterp;
                /* Call to TxGetQueryIdFromSeqAlign returned NULL. */
                query_id = SeqLocId(slp);
      		bsp = BioseqLockById(query_id);
                iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         "No hits found", bsp, NULL);
                IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
                AsnIoFlush(xmlp->aip);
                IterationFree(iterp);
      		BioseqUnlock(bsp);
            }
            else if (align_view == eAlignViewTabularWithComments)
            {
                 query_id = SeqLocId(slp);
      		 bsp = BioseqLockById(query_id);
                 PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
      		 BioseqUnlock(bsp);
            }
            continue;
      }
      format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */

      /* Find the masking location for this query. Initialize next_mask_loc
	 to the current start of the chain, in case nothing for this query 
	 will be found. */
      next_mask_loc = mask_loc;
      for ( ; mask_loc; mask_loc = mask_loc->next) {
         mask_slp = (SeqLoc*) mask_loc->data.ptrvalue;
         if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) {
            break;
         }
      }
      /* Unlink the masking location for this query and save the next one */
      if (mask_loc) {
         for (next_mask_loc = mask_loc; next_mask_loc->next; 
              next_mask_loc = next_mask_loc->next) {
            mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue;
            if (SeqIdComp(query_id, SeqLocId(mask_slp))
                != SIC_YES) {
               break;
            }
         }
         tmp_loc = next_mask_loc;
         next_mask_loc = next_mask_loc->next;
         tmp_loc->next = NULL;
      }

      /* On the next iteration we can start from the next query */

      /* Retrieve this query's Bioseq */
      bsp = BioseqLockById(query_id);

      if (align_view < eAlignViewXml) {
         if (format_info->head_on_every_query == TRUE)
             BLAST_PrintOutputHeader(format_info);

         init_buff_ex(70);
         AcknowledgeBlastQuery(bsp, 70, outfp, 
            format_options->believe_query, format_options->html);
         free_buff();

         if (format_info->head_on_every_query == TRUE)
         {
             s_BLAST_PrintDatabaseInfo(format_info);
             fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n");
         }
      }
      if (align_view == eAlignViewTabular || 
          align_view == eAlignViewTabularWithComments) {
         if (align_view == eAlignViewTabularWithComments)
            PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
         
         BlastPrintTabulatedResults(seqalign, bsp, NULL, 
            format_options->number_of_alignments, format_info->program_name, 
            ungapped, format_options->believe_query, 0, 0, 
            outfp, (Boolean)(align_view == eAlignViewTabularWithComments));
      } else if(align_view == eAlignViewXml) {
         Iteration* iterp;
         
         ASSERT(xmlp && xmlp->aip);
         /* The index of this "query iteration" is the query_index in the 
            current formatting round, plus the number of previously formatted
            queries. */
         iterp = 
             s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, 
                                         ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         NULL, bsp, mask_loc);
         IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
         AsnIoFlush(xmlp->aip);
         IterationFree(iterp);
      } else {
         SeqAnnot* seqannot = SeqAnnotNew();
         seqannot->type = 2;
         AddAlignInfoToSeqAnnot(seqannot, align_type);
         seqannot->data = seqalign;
         if (aip) {
            SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL);
            AsnIoReset(aip);
         } 
         if (outfp) {
            BlastPruneSapStruct* prune;
            Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix);
            ObjMgrSetHold();
            init_buff_ex(85);
            PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, 
               format_options->print_options, FIRST_PASS, NULL,
               format_options->number_of_descriptions, NULL, NULL);
            free_buff();
            
            /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign
             * we're making a COPY of the seqalign to print it out! Clearly
             * this could use a better design */
            prune = BlastPruneHitsFromSeqAlign(seqalign, 
                       format_options->number_of_alignments, NULL);
            seqannot->data = prune->sap;

            if(format_info->search_options->score_options->is_ooframe) {
               OOFShowBlastAlignment(prune->sap, mask_loc, outfp, 
                                     format_options->align_options, NULL);
            } else if (align_view != eAlignViewPairwise) {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, NULL);
            } else {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, 
                  FormatScoreFunc);
            }
            s_DeleteMatrix(matrix);
            seqannot->data = seqalign;
            prune = BlastPruneSapStructDestruct(prune);
            ObjMgrClearHold();
         }
         /* Set data to NULL, because we do not free Seq-align here. */
         seqannot->data = NULL;
         seqannot = SeqAnnotFree(seqannot);
      }
      BioseqUnlock(bsp);
      /* Relink the mask locations so chain can be freed in the end.
       The 'tmp_loc' variable points to the location that was unlinked. */
      if (tmp_loc)
          tmp_loc->next = next_mask_loc;
      
      mask_loc = next_mask_loc;
      ObjMgrFreeCache(0);

   } /* End loop on seqaligns for different queries */

   /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */
   if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) {
     /* extra output only if no hits at all, otherwise "for loop" logic should take care*/
     Iteration* iterp;    
     iterp = IterationNew();
     iterp->iter_num = 1;
     iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped);

     ASSERT(xmlp && xmlp->aip);
     IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
     AsnIoFlush(xmlp->aip);
     IterationFree(iterp);

   }

   if (format_info->db_name) {
       /* Free the database translation tables, if applicable. */
       TransTableFreeAll();
       ReadDBBioseqFetchDisable();
   }

   /* Update the count of the formatted queries. */
   format_info->num_formatted += num_queries;

   return 0;
}