Ejemplo n.º 1
0
Int2
BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions* *options, Boolean gapped)

{
	*options = (BlastExtensionOptions*) 
           calloc(1, sizeof(BlastExtensionOptions));

	if (*options == NULL)
		return BLASTERR_MEMORY;

	if (program != eBlastTypeBlastn &&
        program != eBlastTypePhiBlastn) /* protein-protein options. */
	{
		(*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT;
		(*options)->gap_x_dropoff_final = 
                   BLAST_GAP_X_DROPOFF_FINAL_PROT;
    } else {
        (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
        (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
    }

    (*options)->ePrelimGapExt = eDynProgScoreOnly;
    (*options)->eTbackExt = eDynProgTbck;
    (*options)->compositionBasedStats = eNoCompositionBasedStats;

    /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when
     * program is blastp? */
    if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) {
        (*options)->compositionBasedStats = eCompositionBasedStats;
    }

    (*options)->program_number = program;

	return 0;
}
Ejemplo n.º 2
0
Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number, 
        BlastHitSavingOptions** options,
        Boolean gapped_calculation)
{
   *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions));
   
   if (*options == NULL)
      return BLASTERR_INVALIDPARAM;

   (*options)->hitlist_size = BLAST_HITLIST_SIZE;
   (*options)->expect_value = BLAST_EXPECT_VALUE;
   (*options)->program_number = program_number;

   /* By default, sum statistics is used for all translated searches 
    * (except RPS BLAST), and for all ungapped searches.
    */
   if (program_number == eBlastTypeRpsTblastn) {
	   (*options)->do_sum_stats = FALSE;
   } else if (!gapped_calculation ||
	   Blast_QueryIsTranslated(program_number) ||
	   Blast_SubjectIsTranslated(program_number)) {
       (*options)->do_sum_stats = TRUE;
   } else {
       (*options)->do_sum_stats = FALSE;
   }

   return 0;

}
Ejemplo n.º 3
0
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold)
{

    const double kB62_threshold = 11;

    if (program_number == eBlastTypeBlastn)
      return 0;

    if (matrixName == NULL)
      return BLASTERR_INVALIDPARAM;

    if(strcasecmp(matrixName, "BLOSUM62") == 0)
        *threshold = kB62_threshold;
    else if(strcasecmp(matrixName, "BLOSUM45") == 0)
        *threshold = 14;
    else if(strcasecmp(matrixName, "BLOSUM62_20") == 0)
        *threshold = 100;
    else if(strcasecmp(matrixName, "BLOSUM80") == 0)
        *threshold = 12;
    else if(strcasecmp(matrixName, "PAM30") == 0)
        *threshold = 16;
    else if(strcasecmp(matrixName, "PAM70") == 0)
        *threshold = 14;
    else
        *threshold = kB62_threshold;

    if (Blast_SubjectIsTranslated(program_number) == TRUE)
        *threshold += 2;  /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */
    else if (Blast_QueryIsTranslated(program_number) == TRUE)
        *threshold += 1;

    return 0;
}
Ejemplo n.º 4
0
int CCmdLineBlastXML2ReportData::GetDbGeneticCode() const
{
	if(Blast_SubjectIsTranslated(m_Options->GetProgramType()))
		return m_Options->GetDbGeneticCode();

	return 0;
}
Ejemplo n.º 5
0
Int2
BLAST_FillExtensionOptions(BlastExtensionOptions* options, 
   EBlastProgramType program, Int4 greedy, double x_dropoff, 
   double x_dropoff_final)
{
   if (!options)
      return BLASTERR_INVALIDPARAM;

   if (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn) {
      if (greedy) {
         options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
         options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
         options->ePrelimGapExt = eGreedyScoreOnly;
         options->eTbackExt = eGreedyTbck;
      } else {
         options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
         options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
         options->ePrelimGapExt = eDynProgScoreOnly;
         options->eTbackExt = eDynProgTbck;
      }
   }

   if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) {
       options->compositionBasedStats = eCompositionBasedStats;
   }

   if (x_dropoff)
      options->gap_x_dropoff = x_dropoff;
   if (x_dropoff_final) {
      options->gap_x_dropoff_final = x_dropoff_final;
   } else {
      /* Final X-dropoff can't be smaller than preliminary X-dropoff */
      options->gap_x_dropoff_final = 
         MAX(options->gap_x_dropoff_final, x_dropoff);
   }

   return 0;

}
Ejemplo n.º 6
0
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, 
        SeqLoc* query_slp, SeqLoc* mask_loc_head, 
        BlastFormattingInfo* format_info,
        Blast_SummaryReturn* sum_returns)
{  
   SeqLoc* mask_loc;
   SeqLoc* next_mask_loc = NULL;
   SeqLoc* tmp_loc = NULL;
   Uint1 align_type;
   Boolean db_is_na;
   Int4 query_index;
   SeqLoc* slp;
   SeqLoc* mask_slp;
   AsnIo* aip = NULL;
   MBXml* xmlp = NULL;
   FILE *outfp = NULL;
   BlastFormattingOptions* format_options;
   EAlignView align_view;
   Boolean ungapped;

   ASSERT(format_info && format_info->format_options && 
          format_info->search_options && query_slp);

   format_options = format_info->format_options;
   align_view = format_options->align_view;
   ungapped = 
       !format_info->search_options->score_options->gapped_calculation;

   if (align_view == eAlignViewXml) {
       const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion
                                   of query sequence is desired in the XML
                                   output header. */
       xmlp = format_info->xmlp;
       if (!xmlp) {
           xmlp = format_info->xmlp = 
               s_MBXmlInit(format_info->aip, format_info->program_name, 
                           format_info->db_name, query_slp, kXmlFlag, 
                           sum_returns->search_params);
       }
   } else if (align_view == eAlignViewAsnText || 
              align_view == eAlignViewAsnBinary)
       aip = format_info->aip; 
   else 
       outfp = format_info->outfp;

   align_type = 
       GetOldAlignType(format_info->search_options->program, &db_is_na);

   if (format_info->db_name) {
       /* Enable fetching from the BLAST database. */
      ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE);
      /* If database is translated, set the genetic code for tranlation. */
      if (Blast_SubjectIsTranslated(format_info->search_options->program)) {
          ReadDBBioseqSetDbGeneticCode(format_info->search_options->
                                       db_options->genetic_code);
      }
   }

   if(format_info->search_options->score_options->is_ooframe) {
        ErrPostEx(SEV_WARNING, 0, 0, 
         "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments");
   }


   slp = query_slp;
   mask_loc = mask_loc_head;
  
   for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next)
   {
      Bioseq* bsp = NULL;
      SeqAlignPtr seqalign = seqalign_arr->array[query_index];
      /* Find which query the current SeqAlign is for */
      SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign);
      if (seqalign == NULL)
      {
            if (align_view < eAlignViewXml)
                s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp);  /* this query has no results. */
            else if (align_view == eAlignViewXml)
            {
                /* Retrieve this query's Bioseq */
                Iteration* iterp;
                /* Call to TxGetQueryIdFromSeqAlign returned NULL. */
                query_id = SeqLocId(slp);
      		bsp = BioseqLockById(query_id);
                iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         "No hits found", bsp, NULL);
                IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
                AsnIoFlush(xmlp->aip);
                IterationFree(iterp);
      		BioseqUnlock(bsp);
            }
            else if (align_view == eAlignViewTabularWithComments)
            {
                 query_id = SeqLocId(slp);
      		 bsp = BioseqLockById(query_id);
                 PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
      		 BioseqUnlock(bsp);
            }
            continue;
      }
      format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */

      /* Find the masking location for this query. Initialize next_mask_loc
	 to the current start of the chain, in case nothing for this query 
	 will be found. */
      next_mask_loc = mask_loc;
      for ( ; mask_loc; mask_loc = mask_loc->next) {
         mask_slp = (SeqLoc*) mask_loc->data.ptrvalue;
         if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) {
            break;
         }
      }
      /* Unlink the masking location for this query and save the next one */
      if (mask_loc) {
         for (next_mask_loc = mask_loc; next_mask_loc->next; 
              next_mask_loc = next_mask_loc->next) {
            mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue;
            if (SeqIdComp(query_id, SeqLocId(mask_slp))
                != SIC_YES) {
               break;
            }
         }
         tmp_loc = next_mask_loc;
         next_mask_loc = next_mask_loc->next;
         tmp_loc->next = NULL;
      }

      /* On the next iteration we can start from the next query */

      /* Retrieve this query's Bioseq */
      bsp = BioseqLockById(query_id);

      if (align_view < eAlignViewXml) {
         if (format_info->head_on_every_query == TRUE)
             BLAST_PrintOutputHeader(format_info);

         init_buff_ex(70);
         AcknowledgeBlastQuery(bsp, 70, outfp, 
            format_options->believe_query, format_options->html);
         free_buff();

         if (format_info->head_on_every_query == TRUE)
         {
             s_BLAST_PrintDatabaseInfo(format_info);
             fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n");
         }
      }
      if (align_view == eAlignViewTabular || 
          align_view == eAlignViewTabularWithComments) {
         if (align_view == eAlignViewTabularWithComments)
            PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
         
         BlastPrintTabulatedResults(seqalign, bsp, NULL, 
            format_options->number_of_alignments, format_info->program_name, 
            ungapped, format_options->believe_query, 0, 0, 
            outfp, (Boolean)(align_view == eAlignViewTabularWithComments));
      } else if(align_view == eAlignViewXml) {
         Iteration* iterp;
         
         ASSERT(xmlp && xmlp->aip);
         /* The index of this "query iteration" is the query_index in the 
            current formatting round, plus the number of previously formatted
            queries. */
         iterp = 
             s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, 
                                         ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         NULL, bsp, mask_loc);
         IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
         AsnIoFlush(xmlp->aip);
         IterationFree(iterp);
      } else {
         SeqAnnot* seqannot = SeqAnnotNew();
         seqannot->type = 2;
         AddAlignInfoToSeqAnnot(seqannot, align_type);
         seqannot->data = seqalign;
         if (aip) {
            SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL);
            AsnIoReset(aip);
         } 
         if (outfp) {
            BlastPruneSapStruct* prune;
            Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix);
            ObjMgrSetHold();
            init_buff_ex(85);
            PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, 
               format_options->print_options, FIRST_PASS, NULL,
               format_options->number_of_descriptions, NULL, NULL);
            free_buff();
            
            /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign
             * we're making a COPY of the seqalign to print it out! Clearly
             * this could use a better design */
            prune = BlastPruneHitsFromSeqAlign(seqalign, 
                       format_options->number_of_alignments, NULL);
            seqannot->data = prune->sap;

            if(format_info->search_options->score_options->is_ooframe) {
               OOFShowBlastAlignment(prune->sap, mask_loc, outfp, 
                                     format_options->align_options, NULL);
            } else if (align_view != eAlignViewPairwise) {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, NULL);
            } else {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, 
                  FormatScoreFunc);
            }
            s_DeleteMatrix(matrix);
            seqannot->data = seqalign;
            prune = BlastPruneSapStructDestruct(prune);
            ObjMgrClearHold();
         }
         /* Set data to NULL, because we do not free Seq-align here. */
         seqannot->data = NULL;
         seqannot = SeqAnnotFree(seqannot);
      }
      BioseqUnlock(bsp);
      /* Relink the mask locations so chain can be freed in the end.
       The 'tmp_loc' variable points to the location that was unlinked. */
      if (tmp_loc)
          tmp_loc->next = next_mask_loc;
      
      mask_loc = next_mask_loc;
      ObjMgrFreeCache(0);

   } /* End loop on seqaligns for different queries */

   /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */
   if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) {
     /* extra output only if no hits at all, otherwise "for loop" logic should take care*/
     Iteration* iterp;    
     iterp = IterationNew();
     iterp->iter_num = 1;
     iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped);

     ASSERT(xmlp && xmlp->aip);
     IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
     AsnIoFlush(xmlp->aip);
     IterationFree(iterp);

   }

   if (format_info->db_name) {
       /* Free the database translation tables, if applicable. */
       TransTableFreeAll();
       ReadDBBioseqFetchDisable();
   }

   /* Update the count of the formatted queries. */
   format_info->num_formatted += num_queries;

   return 0;
}
Ejemplo n.º 7
0
Int2 SBlastOptionsNew(const char* program_name, SBlastOptions** options_out,
                      Blast_SummaryReturn* extra_returns)
{
   QuerySetUpOptions* query_options=NULL;	
   LookupTableOptions* lookup_options=NULL;
   BlastInitialWordOptions* word_options=NULL;
   BlastScoringOptions* score_options=NULL;
   BlastExtensionOptions* ext_options=NULL;
   BlastHitSavingOptions* hit_options=NULL;
   BlastEffectiveLengthsOptions* eff_len_options=NULL;
   PSIBlastOptions* psi_options = NULL;
   BlastDatabaseOptions* db_options = NULL;
   SBlastOptions* options;
   EBlastProgramType program = eBlastTypeUndefined;
   Int2 status = 0;

   if (!options_out || !extra_returns)
       return -1;

   BlastProgram2Number(program_name, &program);
   if (program == eBlastTypeUndefined) {
       char message[256];

       sprintf(message, 
               "Program name %s is not supported. The supported programs "
               "are blastn, blastp, blastx, tblastn, tblastx, rpsblast, "
               "rpstblastn\n", program_name);
       SBlastMessageWrite(&extra_returns->error, SEV_ERROR, message, NULL, FALSE);
       return -1;
   }

   status = 
       BLAST_InitDefaultOptions(program, &lookup_options, &query_options, 
           &word_options, &ext_options, &hit_options, &score_options, 
           &eff_len_options, &psi_options, &db_options);
   
   if (status) {
       *options_out = NULL;
       SBlastMessageWrite(&extra_returns->error, SEV_ERROR, "Failed to initialize default options\n", NULL, FALSE);
       return status;
   }

   if (Blast_SubjectIsTranslated(program) || program == eBlastTypeRpsTblastn) {
        Uint1* gc = NULL;
        BLAST_GeneticCodeFind(db_options->genetic_code, &gc);
        GenCodeSingletonAdd(db_options->genetic_code, gc);
        free(gc);
   }
   
   *options_out = options = (SBlastOptions*) calloc(1, sizeof(SBlastOptions));
   options->program = program;
   options->query_options = query_options;
   options->lookup_options = lookup_options;
   options->word_options = word_options;
   options->ext_options = ext_options;
   options->score_options = score_options;
   options->hit_options = hit_options;
   options->eff_len_options = eff_len_options;
   options->psi_options = psi_options;
   options->db_options = db_options;
   options->num_cpus = 1;
   options->believe_query = FALSE;

   /* Set default filter string to low complexity filtering. */
   SBlastOptionsSetFilterString(options, "T");

   return status;
}