Ejemplo n.º 1
0
extern void UniqueOrfs (SeqLocPtr PNTR pslpFound)
{
  SeqLocPtr slpFound, slpNew, slp;
  SeqIdPtr  id;

  slpFound = *pslpFound;
  slpNew = NULL;
  while (slpFound != NULL)
  {
    if (!SeqLocMatch (slpNew, slpFound))
      SeqLocLink (&slpNew, SeqLocDup (slpFound));
    slpFound = slpFound->next;
  }
  slpFound = *pslpFound;
  while (slpFound != NULL)
  {
    slp = slpFound->next;
    id = SeqLocId (slpFound);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slpFound);
    slpFound = slp;
  }
  *pslpFound = slpNew;
  return;
}
Ejemplo n.º 2
0
static Boolean MakeGlobalTable (GatherContextPtr gcp)
{
  Gather_CDSPtr gcdsp;
  BioseqPtr     bsp;
  SeqLocPtr     slp, slpt;

  if (gcp == NULL)
    return FALSE;
  if ((gcdsp = (Gather_CDSPtr) gcp->userdata) == NULL)
    return FALSE;
  if (gcp->thistype != OBJ_BIOSEQ)
    return TRUE;
  if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
    return TRUE;

  if (gcdsp->tableGlobal != NULL)
    return TRUE;

  slp = gcdsp->slpGlobal;
  while (slp != NULL)
  {
    gcdsp->globalcount++;
    if (gcdsp->tableGlobal == NULL)
      gcdsp->tableGlobal = NewCodonTable ();
    slpt = SeqLocDup (slp);
    AddSeqLocToCodonTable (gcdsp->tableGlobal, bsp, slpt, TRUE);
    SeqLocFree (slpt);
    slp = slp->next;
  }
  return TRUE;
}
Ejemplo n.º 3
0
extern SeqLocPtr SeqLocDup (SeqLocPtr slpold)
{
  SeqLocPtr slpnew, slpn, slp;
  SeqIdPtr  id;

  if (slpold == NULL)
    return NULL;

  slpnew = (SeqLocPtr) AsnIoMemCopy ((Pointer) slpold,
                                     (AsnReadFunc) SeqLocAsnRead,
                                     (AsnWriteFunc) SeqLocAsnWrite);
  slp = slpnew->next;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slpnew->next = NULL;

  return slpnew;
}
Ejemplo n.º 4
0
extern SeqLocPtr CheckOverlap (SeqLocPtr slp, Int4 ctermsig)
{
  SeqLocPtr slphead = NULL, slptmp;
  Int4      start;

  if (ctermsig == 0)
    return slp;

  while (slp != NULL)
  {
    start = SeqLocStop (slp);
    if (start < ctermsig)
    {
      slptmp = slp->next;
      slp->next = NULL;
      SeqLocFree (slp);
      slp = slptmp;
      continue;
    }
    if (slphead == NULL)
      slphead = slp;
    slp = slp->next;
  }
  return slphead;
}
Ejemplo n.º 5
0
extern FloatHiPtr BiasScoreBioseq (BioseqPtr bsp, Int4Ptr tableGlobal,
                                   Int4 tripletwindow, Int4 xframe,
                                   Uint1 xstrand)
{
  FloatHiPtr score;
  Int4       iscore;

  SeqLocPtr  slp;
  SeqIntPtr  sint;
  Int4       start, stop, xstop, xwindow;
  Int4Ptr    cutp;

  if (bsp == NULL)
    return NULL;
  if (!ISA_na (bsp->mol))
    return NULL;
  if (bsp->length < tripletwindow)
    return NULL;

  slp = ValNodeNew (NULL);
  sint = SeqIntNew ();
  slp->choice = SEQLOC_INT;
  slp->data.ptrvalue = sint;

  xwindow = tripletwindow;
  xstop = (bsp->length + 3 - xframe - xwindow) / 3;
  score = (FloatHiPtr) MemNew ((size_t) (sizeof (FloatHi) * xstop));
  xwindow -= 3;
  xstop--;

  start = xframe;
  stop = start + xwindow - 1;
  sint->from = start;
  sint->to = stop;
  sint->strand = xstrand;
  cutp = CodonTableFromSeqLoc (bsp, slp);

  iscore = 0;

  xstop = bsp->length - 3;
  for (start = stop + 1; start <= xstop; start += 3)
  {
    sint->from = start;
    sint->to = start + 2;
    AddSeqLocToCodonTable (cutp, bsp, slp, TRUE);
    score[iscore++] = Confide (cutp, tableGlobal);
    sint->from -= xwindow;
    sint->to -= xwindow;
    AddSeqLocToCodonTable (cutp, bsp, slp, FALSE);
  }

  FreeCodonTable (cutp);
  slp->data.ptrvalue = (Pointer) SeqIntFree (sint);
  SeqLocFree (slp);
  return score;
}
Ejemplo n.º 6
0
extern void RemoveInternalOrfs (SeqLocPtr PNTR slpFound)
{
  SeqLocPtr slp1, slp2, slp = NULL;
  SeqIdPtr  id;
  Int4      start1, stop1, start2, stop2;
  Boolean   flagInternal;

  slp1 = *slpFound;
  while (slp1 != NULL)
  {
    start1 = SeqLocStart (slp1);
    stop1 = SeqLocStop (slp1);
    flagInternal = FALSE;
    slp2 = *slpFound;
    while (slp2 != NULL)
    {
      start2 = SeqLocStart (slp2);
      stop2 = SeqLocStop (slp2);
      if ((start1 > start2 && start1 < stop2) &&
          (stop1 > start2 && stop1 < stop2))
      {
        flagInternal = TRUE;
        break;
      }
      slp2 = slp2->next;
    }
    if (!flagInternal)
      SeqLocLink (&slp, SeqLocDup (slp1));
    slp1 = slp1->next;
  }
  slp1 = *slpFound;
  while (slp1 != NULL)
  {
    slp2 = slp1->next;
    id = SeqLocId (slp1);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp1);
    slp1 = slp2;
  }
  *slpFound = slp;
  return;
}
Ejemplo n.º 7
0
static Boolean StandardMean (GatherContextPtr gcp)
{
  Gather_CDSPtr gcdsp;
  BioseqPtr     bsp;
  SeqLocPtr     slp, slpt;
  Int4Ptr       cutp;
  FloatHi       score;

  if (gcp == NULL)
    return FALSE;
  if ((gcdsp = (Gather_CDSPtr) gcp->userdata) == NULL)
    return FALSE;
  if (gcp->thistype != OBJ_BIOSEQ)
    return TRUE;
  if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
    return TRUE;

  if (gcdsp->stdcount != 0)
    return TRUE;

  slp = gcdsp->slpAll;
  while (slp != NULL)
  {
    cutp = NewCodonTable ();
    slpt = SeqLocDup (slp);
    AddSeqLocToCodonTable (cutp, bsp, slpt, TRUE);
    score = Confide (cutp, gcdsp->tableGlobal);
    FreeCodonTable (cutp);
    gcdsp->mean += score;
    gcdsp->stdcount++;
    if (gcdsp->LOscore == -1)
      gcdsp->LOscore = score;
    if (score < gcdsp->LOscore)
      gcdsp->LOscore = score;
    if (score > gcdsp->HIscore)
      gcdsp->HIscore = score;
    SeqLocFree (slpt);
    slp = slp->next;
  }
  return TRUE;
}
Ejemplo n.º 8
0
static Boolean RefineCodonUsage (GatherContextPtr gcp)
{
  Gather_CDSPtr gcdsp;
  BioseqPtr     bsp;
  SeqLocPtr     slp, slpt;
  Int4Ptr       cutp;
  FloatHi       score;

  if (gcp == NULL)
    return FALSE;
  if ((gcdsp = (Gather_CDSPtr) gcp->userdata) == NULL)
    return FALSE;
  if (gcp->thistype != OBJ_BIOSEQ)
    return TRUE;
  if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
    return TRUE;

  if (gcdsp->tableRefine != NULL)
    return TRUE;

  slp = gcdsp->slpAll;
  while (slp != NULL)
  {
    cutp = NewCodonTable ();
    slpt = SeqLocDup (slp);
    AddSeqLocToCodonTable (cutp, bsp, slpt, TRUE);
    score = Confide (cutp, gcdsp->tableGlobal);
    FreeCodonTable (cutp);
    if (score < gcdsp->scorecut)
    {
      gcdsp->refinecount++;
      if (gcdsp->tableRefine == NULL)
        gcdsp->tableRefine = NewCodonTable ();
      AddSeqLocToCodonTable (gcdsp->tableRefine, bsp, slpt, TRUE);
      SeqLocLink (&(gcdsp->slpRefine), SeqLocDup (slp));
    }
    SeqLocFree (slpt);
    slp = slp->next;
  }
  return TRUE;
}
Ejemplo n.º 9
0
Int2 Main_old (void)
 
{
	
	AsnIoPtr aip;
	BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, 
                  subject_bsp = NULL;
        BioseqPtr bsp1, bsp2;
	BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
	BLAST_OptionsBlkPtr options=NULL;
	Boolean seq1_is_na, seq2_is_na;
	CharPtr params_buffer=NULL;
        DbtagPtr        dbtagptr;
	Uint1 align_type;
	Uint4 align_options;
	SeqAlignPtr  seqalign;
        SeqAnnotPtr seqannot;
	SeqEntryPtr sep = NULL, sep1 = NULL;
	CharPtr program_name, blast_outputfile;
	FILE *outfp;
	ValNodePtr  mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
	BLAST_MatrixPtr matrix;
        Int4Ptr PNTR txmatrix;
        int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
        Boolean entrez_lookup = FALSE;
        Boolean html, seqannot_output, believe_query;
        Uint1 tabular_output;
        Boolean gapped_calculation;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        html = (Boolean) myargs[ARG_HTML].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);

        blast_outputfile = myargs [ARG_OUT].strvalue;

	program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
	if (StringCmp(program_name, "blastn") && 
	    StringCmp(program_name, "blastp") && 
	    StringCmp(program_name, "blastx") && 
	    StringCmp(program_name, "tblastn") && 
	    StringCmp(program_name, "tblastx")) {
		ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
		return (1);
	}
	   
	align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);

	if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
	{
		ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
		return (1);
	}

        gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
        believe_query = (seqannot_output || entrez_lookup); 

        options = BLASTOptionNewEx(program_name, gapped_calculation,
                                   (Boolean) myargs[ARG_USEMEGABLAST].intvalue);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &(options->query_lcase_mask), 
                                believe_query) == FALSE)
        {
            ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
            return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else {
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; 


    	if (myargs[ARG_SEARCHSP].floatvalue)
           options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;


	options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
	options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;

        if (StringICmp("blastn", program_name) == 0)
        {
                options->penalty = myargs[ARG_MISMATCH].intvalue;
                options->reward = myargs[ARG_MATCH].intvalue;
        }

	options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;

	options->discontinuous = FALSE;

        if (myargs[ARG_XDROP].intvalue != 0)
	{
               options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
	}
        if (myargs[ARG_WORDSIZE].intvalue != 0)
               options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;

	if (options->is_megablast_search) {
	   options->cutoff_s2 = options->wordsize*options->reward;
        }
	options->matrix = MemFree(options->matrix);
        BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 

        if (myargs[ARG_GAPOPEN].intvalue != -1)
              options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue != -1)
               options->gap_extend = myargs[ARG_GAPEXT].intvalue;

	options->strand_option = myargs[ARG_STRAND].intvalue;

        /* Input longest intron length is in nucleotide scale; in the lower 
           level code it will be used in protein scale */
        if (myargs[ARG_INTRON].intvalue > 0) 
           options->longest_intron = myargs[ARG_INTRON].intvalue;


        if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
           seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, 
              options, &other_returns, &error_returns, handle_results);
        } else {
            SeqLocPtr slp1=NULL, slp2=NULL;
            if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
                return 1;
           seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
           SeqLocFree(slp1);
           SeqLocFree(slp2);
        }

        if (error_returns) {
           BlastErrorPrint(error_returns);
           for (vnp = error_returns; vnp; vnp = vnp->next) {
              BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
           }
           ValNodeFree(error_returns);
        }
       
        ka_params = NULL;
        ka_params_gap = NULL;
        params_buffer = NULL;
        mask_loc = NULL;
        matrix = NULL;
        txmatrix = NULL;
        for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              if (matrix && !tabular_output)
                 txmatrix = BlastMatrixToTxMatrix(matrix);
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
        }	
        if (!tabular_output || seqannot_output) {
           align_options = 0;
           align_options += TXALIGN_MATRIX_VAL;
           align_options += TXALIGN_SHOW_QS;
           align_options += TXALIGN_COMPRESS;
           align_options += TXALIGN_END_NUM;
           if (StringICmp("blastx", program_name) == 0) {
              align_options += TXALIGN_BLASTX_SPECIAL;
           }
           
           if (html)
              align_options += TXALIGN_HTML;

           seqannot = SeqAnnotNew();
           seqannot->type = 2;
           AddAlignInfoToSeqAnnot(seqannot, align_type);
           seqannot->data = seqalign;
           aip = NULL;
           if (seqannot_output)
              aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
           
           if (aip && seqannot) {
              SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
              AsnIoReset(aip);
              aip = AsnIoClose(aip);
           }
        }
        if (!tabular_output) {    
           AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
           ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
           
           seqannot = SeqAnnotFree(seqannot);
           if (txmatrix)
              txmatrix = TxMatrixDestruct(txmatrix);
           init_buff_ex(85);
        
           if (ka_params) {
              PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
           }
        
           if (ka_params_gap) {
              PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
           }
        
           PrintTildeSepLines(params_buffer, 70, outfp);
           free_buff();
        } else {
           PrintTabularOutputHeader(NULL, query_bsp, NULL, 
              program_name, 0, believe_query, outfp);

           BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 
              1, program_name, !gapped_calculation,
              believe_query, 0, 0, outfp, FALSE);
           SeqAlignSetFree(seqalign);
        }

        matrix = BLAST_MatrixDestruct(matrix);
        MemFree(ka_params);
        MemFree(ka_params_gap);
        MemFree(params_buffer);
    
        mask_loc_start = mask_loc;
        while (mask_loc) {
           SeqLocSetFree(mask_loc->data.ptrvalue);
           mask_loc = mask_loc->next;
        }
        ValNodeFree(mask_loc_start);
        
        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);

        other_returns = ValNodeFree(other_returns);
    options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
	options = BLASTOptionDelete(options);
	MemFree(program_name);
	FileClose(outfp);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }
	return 0;
}
Ejemplo n.º 10
0
static void ConsignProc (ButtoN b)
{
  XOSPtr         xosp;
  XISPtr         xisp;

  ComPatPtr      cpp, cpph;
  ValNodePtr     orflist;
  SeqLocPtr      slp, slpn;
  Int4           start, stop;
  Uint1          strand;
  SeqPortPtr     spp;
  Uint1Ptr       aaseq;
  Int4           ntpos, aapos;
  Uint1          cdn[3];

  SeqAlignPtr    sap, sapn;

  FloatHi        probcut;
  Int4           clustmin, findmin;

  Int4           i, n, endpos, XLength, XScale, shift;
  Int4           iframe, frame, top, orftop[6];
  FloatHiPtr     score, expandscore;
  FloatHi        maxscore;
  Int4Ptr        tableGlobal;

  SeqGraphPtr  sgp, sgpn;
  WindoW       w;
  VieweR       v;
  GrouP        g;
  SegmenT      seg;
  GraphSentPtr gsp;
  Char         numberbuffer[32];

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  if (xosp->bsp == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
    return;
  }

  WatchCursor ();
  cpph = cpp = ReadPrositePattern (xosp->pattern_file, TRUE, -1, NULL, NULL);
  if (cpph == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
               "read failed %s", xosp->pattern_file);
    ErrShow ();
    ArrowCursor ();
    return;
  }

  xosp->orflist =  GetOrfList (xosp->bsp, (Int2) (xosp->orfcut));
  xosp->orflist =  ClearNonMetOrfs (xosp->orflist);
  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice == 0)
    {
      orflist = orflist->next;
      continue;
    }
    if (slp->choice == SEQLOC_MIX)
      slp = (SeqLocPtr) slp->data.ptrvalue;
    start = SeqLocStart (slp);
    stop = SeqLocStop (slp);
    strand = SeqLocStrand (slp);
    if (strand != Seq_strand_both)
      strand = Seq_strand_both;
    if (stop - start + 1 >= xosp->minimumseed)
    {
      spp = SeqPortNew (xosp->bsp, start, stop, strand, Seq_code_ncbi4na);
      aaseq = (Uint1Ptr) MemNew ((size_t)
                                 (sizeof (Uint1) * (((stop-start)/3)+2)));
      ntpos = start;
      aapos = 0;
      while (ntpos < start+3)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcdi);
        aapos++;
      }
      while (ntpos <= stop)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcd);
        aapos++;
      }
      SeqPortFree (spp);
      aaseq[aapos] = 0;
      cpp = cpph;
      while (cpp != NULL)
      {
        sap = PatternMatch (aaseq, 0, Seq_strand_plus, SeqLocId (slp),
                            cpp, 0, Seq_strand_unknown, FALSE);
        if (sap != NULL)
          break;
        cpp = cpp->nextpattern;
      }
      MemFree (aaseq);
      if (sap != NULL)
      {
        SeqLocLink (&(xosp->slps), SeqLocDup (slp));
      }
      while (sap != NULL)
      {
        sapn = sap->next;
        SeqAlignFree (sap);
        sap = sapn;
      }
    }
    orflist = orflist->next;
  }
  ComPatFree (cpph);

  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice > 0)
      SeqLocLink (&(xosp->slpa), SeqLocDup (slp));
    while (slp != NULL)
    {
      slpn = slp->next;
      SeqLocFree (slp);
      slp = slpn;
    }
    orflist->data.ptrvalue = NULL;
    orflist = orflist->next;
  }
  xosp->orflist = ValNodeFree (xosp->orflist);

  probcut = xosp->probcut;
  clustmin = xosp->clustmin;
  findmin = xosp->findmin;

  xosp->slpb = FindSimilarBiasOrfs (xosp->sep, probcut, clustmin, findmin,
                                    xosp->slps, xosp->slpa);

  tableGlobal = CodonTableFromSeqLoc (xosp->bsp, xosp->slpb);
  seg = NULL;
  top = 0;
  xisp = (XISPtr) MemNew (sizeof (XIS));
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    endpos = (xosp->bsp->length + 3 - frame - xosp->window) / 3;
    if (iframe < 3)
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_plus);
    else
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_minus);
    maxscore = 0.0;
    for (i = 0; i < endpos; i++)
      if (score[i] > maxscore)
        maxscore = score[i];
    expandscore = (FloatHiPtr) MemNew (sizeof (FloatHi) * xosp->bsp->length);
    for (i = 0; i < xosp->window/2; i++)
      expandscore[i] = maxscore;
    n = 0;
    while (i < xosp->bsp->length)
    {
      if (n < endpos)
        expandscore[i] = score[n];
      else
        expandscore[i] = maxscore;
      i++;
      if (i%3 == 0)
        n++;
    }
    MemFree (score);
    score = expandscore;
    sgp = SeqGraphNew ();
    if (xisp->sgp == NULL)
    {
      xisp->sgp = sgp;
    }
    else
    {
      sgpn = xisp->sgp;
      while (sgpn->next != NULL)
        sgpn = sgpn->next;
      sgpn->next = sgp;
    }
    XLength = xosp->bsp->length;
    if (XLength > 1200)
      XLength = 1200;
    XScale = xosp->bsp->length / XLength;
    if (xosp->bsp->length % XLength != 0)
      XScale++;
    sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                             xosp->bsp->id);
    sgp->flags[2] = 1;
    sgp->numval = xosp->bsp->length;
    sgp->values = (Pointer) score;
    sgp->max.realvalue = maxscore;
    sgp->min.realvalue = 0.0;
    sgp->flags[1] = 1;
    sgp->a = 4.0;
    sgp->b = 0.0;
    if (seg == NULL)
      seg = CreatePicture ();
    if ((gsp = AddGraphSentinelToPicture (sgp, xosp->bsp, seg, 0,
                                          top, 0, NULL)) != NULL)
    {
      sprintf (numberbuffer, "%ld", 1L);
      AddLabel (seg, gsp->box.left, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
      sprintf (numberbuffer, "%ld", (long) xosp->bsp->length);
      AddLabel (seg, gsp->box.left+xosp->bsp->length, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
    }
    shift = (Int4) (maxscore*sgp->a);
    orftop[iframe] = top - shift - 38;
    top -= (shift+56);
    frame++;
    if (frame == 3)
    {
      top -= 24;
      frame = 0;
    }
  }
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    if (iframe < 3)
      strand = Seq_strand_plus;
    else
      strand = Seq_strand_minus;
    shift = 0;
    if (xosp->slpa != NULL)
    {
      AddOrfClass (xosp->slpa, seg, orftop, iframe, frame,
                   shift, strand, YELLOW_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpk != NULL)
    {
      AddOrfClass (xosp->slpk, seg, orftop, iframe, frame,
                   shift, strand, GREEN_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpb != NULL)
    {
      AddOrfClass (xosp->slpb, seg, orftop, iframe, frame,
                   shift, strand, BLUE_COLOR, 5);
      shift += 4;
    }
    if (xosp->slps != NULL)
    {
      AddOrfClass (xosp->slps, seg, orftop, iframe, frame,
                   shift, strand, RED_COLOR, 5);
    }
    frame++;
    if (frame == 3)
      frame = 0;
  }
  MemFree (tableGlobal);

  start = 20;
  stop = 20 + (50*XScale);
  top = orftop[5] - 40;
  if (xosp->slpa != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  YELLOW_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top,
              "All Met-init'd ORFs equal to or greater than 50 codons",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpk != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  GREEN_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Annotated (reported) ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpb != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLUE_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Similar codon usage bias ORFs to seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slps != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  RED_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Pattern match seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }

  xisp->picture = seg;

  w = FixedWindow (10, 10, 640, 720, "Consign", CloseGraphWindowProc);
  SetObjectExtra (w, xisp, CleanUpGraphWindow);
  g = HiddenGroup (w, -1, 0, NULL);
  v = CreateViewer (g, 560, 640, TRUE, TRUE);
  AttachPicture (v, seg, INT4_MIN, INT4_MAX, UPPER_LEFT, XScale, 1, NULL);
  PushButton (g, "Close", CloseGraphWindowButton);
  RealizeWindow (w);
  ArrowCursor ();
  Show (w);

  return;
}
Ejemplo n.º 11
0
static void GetThisBioseq (XOSPtr xosp)
{
  GatherScopePtr  gsp;
  CharPtr         filename;
  Int4            gi;
  FILE            *fiop;
  Boolean         flagHaveNet;
  SeqEntryPtr     sep;
  ValNodePtr      vnp;
  Int2            gcode;
  GeneticCodePtr  gcp;
  SeqLocPtr       slp, slpn;
  SeqIdPtr        id;

  gsp = xosp->gsp;

  fiop = NULL;
  filename = xosp->filename;
  gi = xosp->gi;
  xosp->sep = SeqEntryFree (xosp->sep);

  if (gi > 0)
  {
    if (!EntrezInit ("cnsgnv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_NUC_PROT);
  }
  else if (filename != NULL)
  {
    if ((fiop = FileOpen (filename, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      return;
    }
    sep = FastaToSeqEntry (fiop, TRUE);
    AddBioSourceToSeqEntry (sep);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No SeqEntry");
    ErrShow ();
  }
  else
  {
    CleanUpXOS (xosp);
    xosp->sep = sep;
    xosp->gi = gi;
    xosp->filename = StringSave (filename);
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq, (Pointer) gsp);
  }

  if (xosp->bsp != NULL)
  {
    vnp = xosp->bsp->descr;
    gcode = 0;
    while (vnp != NULL)
    {
      if (vnp->choice == Seq_descr_source)
      {
        gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
        break;
      }
      vnp = vnp->next;
    }
    if (gcode == 0 && sep->choice == 2)
    {
      vnp = ((BioseqSetPtr) (sep->data.ptrvalue))->descr;
      while (vnp != NULL)
      {
        if (vnp->choice == Seq_descr_source)
        {
          gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
          break;
        }
        vnp = vnp->next;
      }
    }
    if (gcode == 0)
      gcode = 1; /* standard */
    gcp = GeneticCodeFind (gcode, NULL);
    if (gcp != NULL)
    {
      xosp->gcd = xosp->gcdi = NULL;
      vnp = (ValNodePtr) gcp->data.ptrvalue;
      while (vnp != NULL)
      {
        if (vnp->choice == 6)       /* sncbieaa */
          xosp->gcdi = (CharPtr) vnp->data.ptrvalue;
        else if (vnp->choice == 3)  /* ncbieaa */
          xosp->gcd = (CharPtr) vnp->data.ptrvalue;
        vnp = vnp->next;
      }
    }
    if (xosp->gcdi == NULL)
      xosp->gcdi = xosp->gcd;
    if (xosp->gcdi == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                 "Could not get genetic code for translation");
      ErrShow ();
      xosp->bsp = NULL;
    }
  }

  if (gi > 0)
  {
    slp = xosp->slpk;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpk = slp;
    GatherSeqEntry (sep, (Pointer) xosp, GatherKnownOrfs, (Pointer) gsp);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);

  if (xosp->bsp != NULL)
  {
    if (!ISA_na (xosp->bsp->mol))
    {
      xosp->sep = SeqEntryFree (xosp->sep);
      xosp->bsp = NULL;
      xosp->filename = (CharPtr) MemFree (xosp->filename);
      xosp->gi = 0;
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not nucleic acid Bioseq");
      ErrShow ();
    }
  }
  else
  {
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->gi = 0;
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
  }
  return;
}
Ejemplo n.º 12
0
static Int2 Main_old (void)
 
{
   AsnIoPtr aip, xml_aip = NULL;
   BioseqPtr query_bsp, PNTR query_bsp_array;
   BioSourcePtr source;
   BLAST_MatrixPtr matrix;
   BLAST_OptionsBlkPtr options;
   BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
   BlastPruneSapStructPtr prune;
   Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
   Boolean html=FALSE;
   CharPtr params_buffer=NULL;
   Int4 number_of_descriptions, number_of_alignments;
   SeqAlignPtr  seqalign, PNTR seqalign_array;
   SeqAnnotPtr seqannot;
   SeqEntryPtr PNTR sepp;
   TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
   Uint1 align_type, align_view, out_type;
   Uint4 align_options, print_options;
   ValNodePtr mask_loc, mask_loc_start, next_mask_loc;
   ValNodePtr vnp, other_returns, error_returns;
   
   CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
   FILE *infp, *outfp, *mqfp=NULL;
   Int4 index, num_bsps, total_length, total_processed = 0;
   Int2 ctr = 1;
   Char prefix[2];
   SeqLocPtr last_mask, mask_slp;
   Boolean done, hits_found;
   Boolean lcase_masking;
   MBXmlPtr mbxp = NULL;
   Boolean traditional_formatting;

    blast_program = "blastn";
    blast_database = myargs [ARG_DB].strvalue;
    blast_inputfile = myargs [ARG_QUERY].strvalue;
    blast_outputfile = myargs [ARG_OUT].strvalue;
    if (myargs[ARG_HTML].intvalue)
        html = TRUE;

    if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
       ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile);
       return (1);
    }

    align_view = (Int1) myargs[ARG_FORMAT].intvalue;
    /* Geo mod: 
      -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on
    */
    out_type=(Int1) myargs[ARG_OUTTYPE].intvalue;
    if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) {
      align_view = 12 + (out_type-MGBLAST_FLTHITS ); 
      out_type=MBLAST_ALIGNMENTS;
      //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format
      // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1
       if (align_view>12) { // this is MGBLAST_HITGAPS output
            gap_Info=TRUE;
            if (dbgaps_buf==NULL)
                  dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1);
            if (qgaps_buf==NULL) 
                qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1);
            }
      }

    outfp = NULL;

    traditional_formatting = 
        (out_type == MBLAST_ALIGNMENTS ||
         out_type == MBLAST_DELAYED_TRACEBACK);

    if ((!traditional_formatting ||
            (align_view != 7 && align_view != 10 && align_view != 11)) && 
            blast_outputfile != NULL) {
       if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
          ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
          return (1);
       }
    }

    //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    /*
    if (!traditional_formatting)
        believe_query = TRUE;
    else
        believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
    */
    //Geo mod: 
    believe_query=FALSE;
    //If ASN.1 output is requested and believe_query is not set to TRUE,
    //   exit with an error.    
    if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
                           align_view == 10 || align_view == 11)) {
        ErrPostEx(SEV_FATAL, 1, 0, 
                  "-J option must be TRUE to produce ASN.1 output; before "
                  "changing -J to TRUE please also ensure that all query "
                  "sequence identifiers are unique");
        return -1;
    }
        
    options = BLASTOptionNewEx(blast_program, TRUE, TRUE);
    if (options == NULL)
        return 3;

    options->do_sum_stats = FALSE;
    options->is_neighboring = FALSE;
        options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
    number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;    
    number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;    
    options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);

    if (myargs[ARG_XDROP].intvalue != 0)
           options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
    if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0)
           options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue;
        if (myargs[ARG_XDROP_FINAL].intvalue != 0)
           options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;

    if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0)
       options->filter_string = StringSave("D");
    else
       options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
    
    show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
    options->penalty = myargs[ARG_MISMATCH].intvalue;
    options->reward = myargs[ARG_MATCH].intvalue;
        if (myargs[ARG_GAPOPEN].intvalue >= 0)
        options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue >= 0)
        options->gap_extend = myargs[ARG_GAPEXT].intvalue;

    if (options->gap_open == 0 && options->reward % 2 == 0 && 
        options->gap_extend == options->reward / 2 - options->penalty)
       /* This is the default value */
    options->gap_extend = 0;

    options->genetic_code = 1;
    options->db_genetic_code = 1; /* Default; it's not needed here anyway */
    options->number_of_cpus = myargs[ARG_THREADS].intvalue;
    if (myargs[ARG_WORDSIZE].intvalue != 0)
           options->wordsize = myargs[ARG_WORDSIZE].intvalue;
        if (myargs[ARG_MINSCORE].intvalue == 0)
           options->cutoff_s2 = options->wordsize*options->reward;
        else 
           options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue;

        options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
        options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;

    options->perform_culling = FALSE;
    /* Kludge */
    options->block_width  = myargs[ARG_MAXPOS].intvalue;

    options->strand_option = myargs[ARG_STRAND].intvalue;
        options->window_size = myargs[ARG_WINDOW].intvalue;
#ifdef DO_NOT_SUPPRESS_BLAST_OP        
        options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue;
        if (myargs[ARG_TEMPL_LEN].intvalue != 0)
            options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue;
        options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue;
#endif
        lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue;
        /* Allow dynamic programming gapped extension only with affine 
           gap scores */
        if (options->gap_open != 0 || options->gap_extend != 0)
           options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue;

        print_options = 0;
        align_options = 0;
        align_options += TXALIGN_COMPRESS;
        align_options += TXALIGN_END_NUM;
        if (show_gi) {
       align_options += TXALIGN_SHOW_GI;
       print_options += TXALIGN_SHOW_GI;
        }
            
        if (align_view) {
       align_options += TXALIGN_MASTER;
       if (align_view == 1 || align_view == 3)
          align_options += TXALIGN_MISMATCH;
       if (align_view == 3 || align_view == 4 || align_view == 6)
          align_options += TXALIGN_FLAT_INS;
       if (align_view == 5 || align_view == 6)
          align_options += TXALIGN_BLUNT_END;
        } else {
       align_options += TXALIGN_MATRIX_VAL;
       align_options += TXALIGN_SHOW_QS;
    }

    if (html) {
       align_options += TXALIGN_HTML;
       print_options += TXALIGN_HTML;
    }

    if (myargs[ARG_GILIST].strvalue)
       options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
   
    if (out_type == MBLAST_ENDPOINTS)
      options->no_traceback = 1;
   else if (out_type == MBLAST_DELAYED_TRACEBACK)
       options->no_traceback = 2;
    else
       options->no_traceback = 0;

    options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue;
    options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue;
    options->hsp_num_max = myargs[ARG_MAXHSP].intvalue;

    if (!believe_query)
           options->megablast_full_deflines = TRUE;
        /*if (options->megablast_full_deflines)
          believe_query = FALSE;*/

    query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr));
    sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr));

    StrCpy(prefix, "");

    global_fp = outfp;
        options->output = outfp;

    if (traditional_formatting) {
       if (align_view < 7) {
              if (html) {
                 fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
                 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
                         "VLINK=\"#660099\" ALINK=\"#660099\">\n");
                 fprintf(outfp, "<PRE>\n");
              }
              init_buff_ex(90);
              BlastPrintVersionInfo("mgblast", html, outfp);
              fprintf(outfp, "\n");
              MegaBlastPrintReference(html, 90, outfp);
              fprintf(outfp, "\n");
              
              if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
                 return 1;
              
              free_buff();
    
#ifdef OS_UNIX
              fprintf(global_fp, "%s", "Searching");
#endif
           }
    }
    
        aip = NULL;
        if (myargs[ARG_ASNOUT].strvalue != NULL) {
           if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
              ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
              return 1;
           }
        }
        else if (align_view == 10 || align_view == 11)
        {
            const char* mode = (align_view == 10) ? "w" : "wb";
            if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
                    ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
                    return 1;
            }
        }


        if (align_view == 7) {
           xml_aip = AsnIoOpen(blast_outputfile, "wx");
        }

        if (myargs[ARG_QUERYLOC].strvalue) {       
            Int4 start, end;
            Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
            options->required_start = start - 1;
            options->required_end = end -1;
        }

    done = FALSE;
    while (!done) {
       num_bsps = 0;
       total_length = 0;
       done = TRUE;
       SeqMgrHoldIndexing(TRUE);
       mask_slp = last_mask = NULL;
   
       while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL,
                               believe_query, prefix, &ctr, 
                               &mask_slp)) != NULL) {
              if (!lcase_masking) /* Lower case ignored */
                 mask_slp = SeqLocFree(mask_slp);
         if (mask_slp) {
           if (!last_mask)
              options->query_lcase_mask = last_mask = mask_slp;
           else {
              last_mask->next = mask_slp;
              last_mask = last_mask->next;
              }
           mask_slp = NULL;
           }
          query_bsp = NULL;
         SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc);
         //debug:
         /*
         char query_buffer[255];
         SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
         fprintf(stderr, "===> query_buf=%s\n", query_buffer);
         */
         if (query_bsp == NULL) {
           ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
           return 2;
          }
          
          source = BioSourceNew();
          source->org = OrgRefNew();
          source->org->orgname = OrgNameNew();
          source->org->orgname->gcode = options->genetic_code;
          ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
          
          query_bsp_array[num_bsps++] = query_bsp;
          
          total_length += query_bsp->length;
          if (total_length > myargs[ARG_MAXQUERY].intvalue || 
          num_bsps >= MAX_NUM_QUERIES) {
         done = FALSE;
         break;
          }
       }

           if (num_bsps == 0)
               break;

       SeqMgrHoldIndexing(FALSE);
       other_returns = NULL;
       error_returns = NULL;
       
       if (out_type==MBLAST_ENDPOINTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0, 
                             MegaBlastPrintEndpoints);
       else if (out_type==MBLAST_SEGMENTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                             MegaBlastPrintSegments);
       else if (out_type==MBLAST_ALIGN_INFO) {
              /* -- Geo mod: do not print header
              PrintTabularOutputHeader(blast_database, 
                                       (num_bsps==1) ? query_bsp_array[0] : NULL,
                                       NULL, "megablast", 0, believe_query,
                                       global_fp);*/
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                                MegaBlastPrintAlignInfo);
       } else if (out_type==MBLAST_ALIGNMENTS) {
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                  blast_database, options, &other_returns, 
                                  &error_returns, align_view < 7 ? tick_callback : NULL,
                                  NULL, NULL, 0, NULL);
          }
       
#ifdef OS_UNIX
       fflush(global_fp);
#endif

       if (error_returns) {
             BlastErrorPrint(error_returns);
              for (vnp = error_returns; vnp; vnp = vnp->next) {
                 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
              }
              ValNodeFree(error_returns);
           }
              
              
       if (traditional_formatting) {
          dbinfo = NULL;
          ka_params = NULL;
          ka_params_gap = NULL;
          params_buffer = NULL;
          mask_loc = NULL;
          matrix = NULL;
          for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXDBINFO:
              dbinfo = vnp->data.ptrvalue;
              break;
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
          }    
          
#ifdef OS_UNIX
        if(align_view < 7) {
                 fprintf(global_fp, "%s\n", " done");
              }
#endif
          
        if (myargs[ARG_MASKEDQUERY].strvalue) {
                 if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL)
                    ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n",
                              myargs[ARG_MASKEDQUERY].strvalue);
              }

        hits_found = FALSE;

        mask_loc_start = next_mask_loc = mask_loc;
        mask_loc = NULL;

        if (align_view == 7) {
           mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, 
                             options, query_bsp_array[0], 0);
           }

        if (seqalign_array) { //results returned back for processing
             ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE);
             for (index=0; index<num_bsps; index++) {
                    seqalign = seqalign_array[index];
                    if (next_mask_loc && 
                        SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), 
                                  query_bsp_array[index]->id) == SIC_YES) {
                       mask_loc = (SeqLocPtr) 
                       MemDup(next_mask_loc, sizeof(SeqLoc));
                       next_mask_loc = next_mask_loc->next;
                       mask_loc->next = NULL;
                    }
                    if (mqfp) {
                       /* convert mask locations from all sources into
                          a single seqloc */
                       mask_slp = NULL;
                       if (mask_loc) 
                          mask_slp = blastMergeFilterLocs(mask_slp, 
                              (SeqLocPtr)mask_loc->data.ptrvalue,
                              FALSE, 0, 0);
                       PrintMaskedSequence(query_bsp_array[index], mask_slp,
                                           mqfp, 50, lcase_masking);
                       SeqLocSetFree(mask_slp);
                       }
                    if (seqalign==NULL) {
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    hits_found = TRUE;
                    if (align_view < 7) {
                       init_buff_ex(70);
                       AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, 
                                             believe_query, html);
                       free_buff();
                       }
                    if (align_view == 8 || align_view == 9) {
                       if (align_view == 9)
                          PrintTabularOutputHeader(blast_database, 
                             query_bsp_array[index], NULL, blast_program, 0,
                             believe_query, global_fp);
                       /* debug:
                       char qbuf[512];
                       strcpy(qbuf, BioseqGetTitle(query_bsp_array[index]));
                       fprintf(stderr, "---> Here: query title=%s\n", qbuf);
                       */
                       BlastPrintTabulatedResults(seqalign, 
                           query_bsp_array[index], NULL, number_of_alignments,
                            blast_program, !options->gapped_calculation, 
                            believe_query, 0, 0, 
                            global_fp, (align_view == 9));
                            

                       ObjMgrFreeCache(0);

                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    } 
                       //Geo mod:   
                   else if (align_view>=12)  {
                        MGBlastPrintTab(seqalign, 
                            query_bsp_array[index], number_of_alignments,
                            !options->gapped_calculation, 
                            global_fp);
                        ObjMgrFreeCache(0);

                        SeqAlignSetFree(seqalign);
                        mask_loc = MemFree(mask_loc);
                        continue;
                        }
                    else if(align_view == 7) {
                       IterationPtr iterp;

                       iterp = BXMLBuildOneQueryIteration(seqalign, 
                                  NULL, FALSE, 
                                  !options->gapped_calculation, index, 
                                  NULL, query_bsp_array[index], mask_loc);
                       IterationAsnWrite(iterp, mbxp->aip, mbxp->atp);
                       AsnIoFlush(mbxp->aip);
                       IterationFree(iterp);
                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    seqannot = SeqAnnotNew();
                    seqannot->type = 2;
                    AddAlignInfoToSeqAnnot(seqannot, align_type);
                    seqannot->data = seqalign;
                    if (aip) {
                       SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
                       AsnIoReset(aip);
                    }
                    if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL);
                       ObjMgrSetHold();
                       init_buff_ex(85);
                       PrintDefLinesFromSeqAlign(prune->sap, 80,
                                                 outfp, print_options, FIRST_PASS, NULL);
                       free_buff();
                       
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune);
                       seqannot->data = prune->sap;
                       if (align_view != 0)
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL,
                                                 NULL, align_options, NULL, 
                                                 mask_loc, NULL);
                       else
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc);
                       seqannot->data = seqalign;
                       prune = BlastPruneSapStructDestruct(prune);
                       ObjMgrClearHold();
                       ObjMgrFreeCache(0);
                    }
                    seqannot = SeqAnnotFree(seqannot);
                    mask_loc = MemFree(mask_loc);
                 } /* End loop on seqaligns for different queries */
                 ReadDBBioseqFetchDisable();
              } 

              if (mbxp != NULL) {
                 MBXmlClose(mbxp, other_returns, !options->gapped_calculation);
              }

              if (mqfp)
                 FileClose(mqfp);

              if (!hits_found && align_view < 7)
                 fprintf(outfp, "\n\n ***** No hits found ******\n\n");

              matrix = BLAST_MatrixDestruct(matrix);
          
              if(html) 
                 fprintf(outfp, "<PRE>\n");
              init_buff_ex(85);
              dbinfo_head = dbinfo;
              if(align_view < 7) {
                 while (dbinfo) {
                    PrintDbReport(dbinfo, 70, outfp);
                    dbinfo = dbinfo->next;
                 }
              }
              dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
              
              if (ka_params) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
                 MemFree(ka_params);
              }
              if (ka_params_gap) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
                 MemFree(ka_params_gap);
              }
              if(align_view < 7)
                 PrintTildeSepLines(params_buffer, 70, outfp);
              MemFree(params_buffer);
              free_buff();
              mask_loc = mask_loc_start;
              while (mask_loc) {
                 SeqLocSetFree(mask_loc->data.ptrvalue);
                 mask_loc = mask_loc->next;
              }
              ValNodeFree(mask_loc_start);
       } else { //not traditional formatting
          /* Just destruct all other_returns parts */
          for (vnp=other_returns; vnp; vnp = vnp->next) {
         switch (vnp->choice) {
         case TXDBINFO:
            TxDfDbInfoDestruct(vnp->data.ptrvalue);
            break;
         case TXKABLK_NOGAP:
         case TXKABLK_GAP:
         case TXPARAMETERS:
            MemFree(vnp->data.ptrvalue);
            break;
         case TXMATRIX:
            BLAST_MatrixDestruct(vnp->data.ptrvalue);
            break;
         case SEQLOC_MASKING_NOTSET:
         case SEQLOC_MASKING_PLUS1:
         case SEQLOC_MASKING_PLUS2:
         case SEQLOC_MASKING_PLUS3:
         case SEQLOC_MASKING_MINUS1:
         case SEQLOC_MASKING_MINUS2:
         case SEQLOC_MASKING_MINUS3:
                    mask_loc = vnp->data.ptrvalue;
                    SeqLocSetFree(mask_loc);
         default:
            break;
         }
          }
       }
       other_returns = ValNodeFree(other_returns);
       MemFree(seqalign_array);
           options->query_lcase_mask = 
              SeqLocSetFree(options->query_lcase_mask);

       /* Freeing SeqEntries can be very expensive, do this only if 
          this is not the last iteration of search */
       if (!done) { 
          for (index=0; index<num_bsps; index++) {
         sepp[index] = SeqEntryFree(sepp[index]);
         query_bsp_array[index] = NULL;
          }       
           }
           total_processed += num_bsps;
    } /* End of loop on complete searches */
        
        aip = AsnIoClose(aip);

        /*if (align_view == 7)
          xml_aip = AsnIoClose(xml_aip);*/

        if (align_view < 7 && html) 
           fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
        if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
           fprintf(outfp, "Mega BLAST run finished, processed %d queries\n",
                   total_processed);
    MemFree(query_bsp_array);
    MemFree(sepp);
    MemFree(qgaps_buf);
    MemFree(dbgaps_buf);
    options = BLASTOptionDelete(options);
    FileClose(infp);
        FileClose(outfp);
    
    return 0;
}
Ejemplo n.º 13
0
Int2 Main (void)
{
    Int2        argcount;
    Boolean     flagHaveNet;

    Int4        gi;
    SeqEntryPtr sep;
    ComPatPtr   cpp, cpph = NULL;
    SeqAlignPtr sap, sapn;
    StdSegPtr   ssp;
    SeqLocPtr   slp, slpn;
    Int4        start, stop;

    FILE        *fiop;
    Char        fastafile[256], namesfile[256];
    CharPtr     title;
    CharPtr     taxon;

    FloatHi     mw;
    ValNodePtr  namelist = NULL;

    static CharPtr pattern_file = "ncbipros.dat";
    static CharPtr protease_file = "ncbiendo.dat";
    static CharPtr names_file = "ncbipnam.dat";

    static GatherScope  gs;
    GatherScopePtr      gsp;
    static Gather_PBS   gpbs;
    Gather_PBSPtr       gpbsp;

#ifndef NO_TAX_NET
    Int4   i;
    static Char taxdata[8];
    static Gather_TaxId gti;
    Gather_TaxIdPtr     gtip;
#endif

#ifndef NO_TAX_NET
    Int2   ia=4, ib=5, ic=6, id=7, ie=8, ig=9, ih=10, ii=11;
#else
    Int2         ib=4, ic=5, id=6, ie=7, ig=8, ih=9,  ii=10;
#endif

    argcount = sizeof (myargs) / sizeof (Args);
    if (!GetArgs ("ProSiteSearch", argcount, myargs))
        return 1;

    if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
                   "No gi or FastA file given :: for help :   srchaa -");
        ErrShow ();
        exit (1);
    }

    gsp = &gs;

#ifndef NO_TAX_NET
    gtip = &gti;
#endif
    gpbsp = &gpbs;

    MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
    MemSet ((Pointer) gsp->ignore, (int) (TRUE),
            (size_t) (OBJ_MAX * sizeof (Boolean)));

    gsp->ignore[OBJ_SEQDESC] = TRUE;
    gsp->ignore[OBJ_BIOSEQ] = FALSE;

    gpbsp->bsp = NULL;

    gi = myargs[0].intvalue;
    if (myargs[1].strvalue != NULL)
        StrCpy (fastafile, myargs[1].strvalue);
    else
        fastafile[0] = '\0';

    if (gi > 0)
    {
        if (!EntrezInit ("srchaa", FALSE, &flagHaveNet))
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                       "Entrez init failed");
            ErrShow ();
            exit (1);
        }
    }

#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
    {
        if (!TaxArchInit ())
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Taxonomy init failed");
            ErrShow ();
            exit (1);
        }
    }
#endif

    fiop = NULL;
    if (gi > 0)
    {
        sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
    }
    else
    {
        if ((fiop = FileOpen (fastafile, "r")) == NULL)
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Failed to open FastA file: %s", fastafile);
            ErrShow ();
            exit (1);
        }
        sep = FastaToSeqEntry (fiop, FALSE);
    }

    if (sep == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
                   "No seqentry found");
        ErrShow ();
        exit (1);
    }

    while (sep != NULL)
    {
        gsp->ignore[OBJ_SEQDESC] = TRUE;
        gsp->ignore[OBJ_BIOSEQ] = FALSE;
        gpbsp->bsp = NULL;
        gpbsp->gi = gi;
        GatherSeqEntry (sep, (Pointer) gpbsp, GetBioseq, (Pointer) gsp);

        taxon = NULL;
#ifndef NO_TAX_NET
        if (myargs[ia].intvalue)
        {
            for (i = 0; i < 8; i++)
                taxdata[i] = '-';
            taxon = taxdata;

            gsp->ignore[OBJ_SEQDESC] = FALSE;
            gsp->ignore[OBJ_BIOSEQ] = TRUE;

            gtip->taxid = 0;
            GatherSeqEntry (sep, (Pointer) gtip, GetTaxId, (Pointer) gsp);

            if (gtip->taxid != 0)
                WhatOrg (gtip->taxid, taxon);
            else
                taxon = NULL;
        }
#endif

        if (gpbsp->bsp != NULL)
        {
            if (ISA_aa (gpbsp->bsp->mol))
            {
                if (cpph == NULL)
                {
                    namesfile[0] = '\0';
                    if (myargs[id].intvalue)
                        StrCpy (namesfile, names_file);
                    if (myargs[ie].strvalue != NULL)
                        StrCpy (namesfile, myargs[ie].strvalue);

                    if (myargs[ig].strvalue != NULL)
                    {
                        if ((cpph = CompilePattern (myargs[ig].strvalue, 1)) != NULL)
                            StrCpy (cpph->name, "User Pattern");
                    }
                    else
                    {
                        namelist = ReadPatternNames (namesfile);
                        if (myargs[ib].intvalue)
                            cpph = ReadPrositePattern (protease_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, NULL);
                        else
                            cpph = ReadPrositePattern (pattern_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, namelist);
                    }
                }

                if (!(Boolean) myargs[ih].intvalue)
                {
                    title = FastaTitle (gpbsp->bsp, ">", NULL);
                    printf ("%s\n", title);
                    MemFree (title);
                }
                cpp = cpph;
                while (cpp != NULL)
                {
                    sap = PatternMatchBioseq (gpbsp->bsp, cpp,
                                              (Int4)myargs[ii].intvalue);
                    if (myargs[ib].intvalue)
                    {
                        printf (">%s\n", cpp->name);
                        if (sap != NULL)
                            printf ("   Start     Stop       M.W.\n");
                    }
                    if (myargs[ib].intvalue)
                    {
                        EmbedMolecularWeightInfo (sap, gpbsp->bsp);
                        if (myargs[ic].intvalue)
                            URK_SeqAlignSortByMolWt (&sap);
                        while (sap != NULL)
                        {
                            ssp = (StdSegPtr) sap->segs;
                            slp = ssp->loc;
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            mw = ssp->scores->value.realvalue;
                            printf ("%8ld %8ld    %9.2f\n",
                                    (long) start+1, (long) stop+1, mw);
                            sapn = sap->next;
                            SeqAlignFree (sap);
                            sap = sapn;
                        }
                    }
                    else
                    {
                        slp = MatchSa2Sl (&sap);
                        if (myargs[ih].intvalue && slp != NULL)
                        {
                            title = FastaTitle (gpbsp->bsp, ">", NULL);
                            printf ("%s\n", title);
                            MemFree (title);
                        }
                        while (slp != NULL)
                        {
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            printf ("%8ld %8ld    %s\n",
                                    (long) start+1, (long) stop+1, cpp->name);
                            slpn = slp->next;
                            SeqLocFree (slp);
                            slp = slpn;
                        }
                    }
                    cpp = cpp->nextpattern;
                }
            }
            else
            {
                ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
                           "Not a protein bioseq");
                ErrShow ();
                exit (1);
            }
        }
        else
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                       "No bioseq found");
            ErrShow ();
            exit (1);
        }
        SeqEntryFree (sep);
        sep = NULL;
        if (fiop != NULL)
            sep = FastaToSeqEntry (fiop, FALSE);
    }

    ComPatFree (cpph);
    ValNodeFreeData (namelist);
    FileClose (fiop);
    if (gi > 0)
        EntrezFini ();
#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
        TaxArchFini ();
#endif
    return 0;
}
Ejemplo n.º 14
0
extern Gather_CDSPtr GatherCDSFree (Gather_CDSPtr gcdsp)
{
  SeqLocPtr slp, slpn;
  SeqIdPtr  id;

  if (gcdsp == NULL)
    return NULL;

  MemFree (gcdsp->tableGlobal);
  MemFree (gcdsp->tableRefine);

  slp = gcdsp->slpGlobal;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpRefine;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpAll;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpHit;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpFound;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }

  return (Gather_CDSPtr) MemFree (gcdsp);
}
Ejemplo n.º 15
0
extern SeqLocPtr FindSimilarBiasOrfs (SeqEntryPtr sep, FloatHi probcut,
                                      Int4 clustmin, Int4 findmin,
                                      SeqLocPtr slpKnown,
                                      SeqLocPtr slpPotential)
{
  static GatherScope  gs;
  GatherScopePtr      gsp;
  Gather_CDSPtr       gcdsp;

  Int4      gcount;
  SeqLocPtr slp, slpn;

  if (probcut == 0.0)
    probcut = 0.5;
  if (clustmin == 0)
    clustmin = 2;
  if (findmin == 0)
    findmin = 4;

  gsp = &gs;
  gcdsp = GatherCDSNew ();

  MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
  MemSet ((Pointer) gsp->ignore, (int) (TRUE),
          (size_t) (OBJ_MAX * sizeof (Boolean)));
  gsp->ignore[OBJ_BIOSEQ] = FALSE;

  slp = slpKnown;
  while (slp != NULL)
  {
    SeqLocLink (&gcdsp->slpGlobal, SeqLocDup (slp));
    slp = slp->next;
  }
  slp = slpPotential;
  while (slp != NULL)
  {
    SeqLocLink (&gcdsp->slpAll, SeqLocDup (slp));
    slp = slp->next;
  }

  GatherSeqEntry (sep, (Pointer) gcdsp, MakeGlobalTable, (Pointer) gsp);

  while (gcdsp->tableGlobal != NULL)
  {
    gcdsp->stdcount = 0;
    gcdsp->refinecount = 0;
    gcdsp->findcount = 0;
    gcdsp->stdev = 0.0;
    gcdsp->HIscore = 0.0;
    gcdsp->LOscore = -1.0;
    gcdsp->mean = 0.0;
    gcdsp->scorecut = 0.5;

    GatherSeqEntry (sep, (Pointer) gcdsp, StandardMean, (Pointer) gsp);
    if (gcdsp->stdcount > 0)
      gcdsp->mean /= gcdsp->stdcount;
    GatherSeqEntry (sep, (Pointer) gcdsp, StandardDeviation, (Pointer) gsp);

    if (gcdsp->stdcount > 1)
      gcdsp->stdev /= (gcdsp->stdcount - 1);
    else
      gcdsp->stdev = 0.0;
    gcdsp->stdev = (FloatHi) sqrt (gcdsp->stdev);
    gcdsp->scorecut = gcdsp->LOscore + (gcdsp->stdev * probcut);

    slp = gcdsp->slpRefine;
    while (slp != NULL)
    {
      slpn = slp->next;
      SeqLocFree (slp);
      slp = slpn;
    }
    gcdsp->slpRefine = slp;

    gcdsp->tableRefine = FreeCodonTable (gcdsp->tableRefine);
    GatherSeqEntry (sep, (Pointer) gcdsp, RefineCodonUsage, (Pointer) gsp);
    if (gcdsp->tableRefine != NULL)
    {
      if (gcdsp->refinecount >= clustmin)
      {
        gcdsp->scorecut *= 1.5; /* increase a bit to see any branch jumps */
        GatherSeqEntry (sep, (Pointer) gcdsp, ScanCodonUsage, (Pointer) gsp);
      }
      if (gcdsp->findcount < findmin)
      {
        slp = gcdsp->slpHit;
        while (slp != NULL)
        {
          slpn = slp->next;
          SeqLocFree (slp);
          slp = slpn;
        }
        gcdsp->slpHit = slp;
      }
      else
      {
        SeqLocLink (&gcdsp->slpFound, gcdsp->slpHit);
        gcdsp->slpHit = NULL;
      }
      gcount = CullGlobalOrfs (&gcdsp->slpGlobal, &gcdsp->slpRefine);
      gcdsp->tableGlobal = FreeCodonTable (gcdsp->tableGlobal);
      if (gcount != gcdsp->globalcount)
      {
        gcdsp->globalcount = 0;
        GatherSeqEntry (sep, (Pointer) gcdsp, MakeGlobalTable, (Pointer) gsp);
      }
    }
    else
    {
      slp = gcdsp->slpGlobal;
      while (slp != NULL)
      {
        slpn = slp->next;
        SeqLocFree (slp);
        slp = slpn;
      }
      gcdsp->slpGlobal = slp;
      gcdsp->tableGlobal = FreeCodonTable (gcdsp->tableGlobal);
    }
  }
  UniqueOrfs (&gcdsp->slpFound);
  slp = gcdsp->slpFound;
  gcdsp->slpFound = NULL;
  GatherCDSFree (gcdsp);
  return slp;
}