Esempio n. 1
0
static Boolean GetGraphsProc (GatherObjectPtr gop)

{
  GphGetPtr    ggp;
  GphItemPtr   gip;
  SeqGraphPtr  sgp;

  if (gop == NULL || gop->itemtype != OBJ_SEQGRAPH) return TRUE;
  ggp = (GphGetPtr) gop->userdata;
  sgp = (SeqGraphPtr) gop->dataptr;
  if (ggp == NULL || sgp == NULL) return TRUE;
  /* only phrap or gap4 currently allowed */
  if (StringICmp (sgp->title, "Phrap Quality") == 0 ||
      StringICmp (sgp->title, "Gap4") == 0) {
    /* data type must be bytes */
    if (sgp->flags[2] == 3) {
      if (SeqIdForSameBioseq (SeqLocId (sgp->loc), SeqLocId (ggp->slp))) {
        gip = (GphItemPtr) MemNew (sizeof (GphItem));
        if (gip == NULL) return TRUE;
        gip->sgp = sgp;
        gip->left = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_LEFT_END);
        gip->right = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_RIGHT_END);
        ValNodeAddPointer (&(ggp->vnp), 0, (Pointer) gip);
      }
    }
  }
  return TRUE;
}
Esempio n. 2
0
static void CleanUpXOS (XOSPtr xosp)
{
  SeqLocPtr slp, slpn;
  SeqIdPtr  id;

  if (xosp != NULL)
  {
    xosp->gsp  = NULL;          /* is static */
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->bsp = NULL;           /* should be in seqentry */
    xosp->gcd = NULL;
    xosp->gcdi = NULL;
    slp = xosp->slpa;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpa = slp;
    slp = xosp->slpb;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpb = slp;
    slp = xosp->slps;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slps = slp;
    slp = xosp->slpk;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpk = slp;
  }
  return;
}
Esempio n. 3
0
extern DustRegionPtr DustSeqLoc (SeqLocPtr slp,
                                 DustDataPtr ddp)
{
    BioseqPtr     bsp;
    Int4          start, end;
    DustRegionPtr drp;

    if (slp == NULL || ddp == NULL)
        return NULL;

    if (slp->choice != SEQLOC_INT)
    {
        ErrPostEx (SEV_ERROR, 2, 1,
                   "Will only dust a single SeqLoc interval");
        ErrShow ();
        return NULL;
    }

    if ((bsp = BioseqLockById (SeqLocId (slp))) == NULL)
    {
        ErrPostEx (SEV_ERROR, 2, 5, "Bioseq lock failure");
        ErrShow ();
        return NULL;
    }

    start = SeqLocStart (slp);
    end = SeqLocStop (slp);
    drp = DustBioseq (bsp, start, end, ddp);
    BioseqUnlock (bsp);
    return drp;
}
Esempio n. 4
0
extern void UniqueOrfs (SeqLocPtr PNTR pslpFound)
{
  SeqLocPtr slpFound, slpNew, slp;
  SeqIdPtr  id;

  slpFound = *pslpFound;
  slpNew = NULL;
  while (slpFound != NULL)
  {
    if (!SeqLocMatch (slpNew, slpFound))
      SeqLocLink (&slpNew, SeqLocDup (slpFound));
    slpFound = slpFound->next;
  }
  slpFound = *pslpFound;
  while (slpFound != NULL)
  {
    slp = slpFound->next;
    id = SeqLocId (slpFound);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slpFound);
    slpFound = slp;
  }
  *pslpFound = slpNew;
  return;
}
Esempio n. 5
0
extern SeqLocPtr SeqLocDup (SeqLocPtr slpold)
{
  SeqLocPtr slpnew, slpn, slp;
  SeqIdPtr  id;

  if (slpold == NULL)
    return NULL;

  slpnew = (SeqLocPtr) AsnIoMemCopy ((Pointer) slpold,
                                     (AsnReadFunc) SeqLocAsnRead,
                                     (AsnWriteFunc) SeqLocAsnWrite);
  slp = slpnew->next;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slpnew->next = NULL;

  return slpnew;
}
Esempio n. 6
0
/**********************************************************************
*
*	MapLocToAnchor(annot, slp, anchor)
*	map the current slp to a position on the anchor Bioseq
*	annot: Seq-annot that may contain the alignment of the consistent markers
*	slp: the current Bioseq
*	anchor_id: the Seq-id for the anchor Bioseq, that is the sequece map
*
************************************************************************/
SeqLocPtr MapLocToAnchor(SeqAnnotPtr annot, SeqLocPtr slp, BioseqPtr anchor_bsp)
{
    Int2 type;
    Int4Ptr x_a, x;
    Uint2 num;
    SeqIdPtr anchor_id;
    SeqLocPtr t_slp;
    Int4 start, stop;
    SeqAlignPtr align;


    if(annot == NULL || slp == NULL || anchor_bsp == NULL)
        return NULL;
    if(slp->choice != SEQLOC_PNT && slp->choice != SEQLOC_INT)
        return NULL;
    anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
    if(anchor_id == NULL)
        anchor_id = anchor_bsp->id;

    while(annot)
    {
        if(annot->type == 2)
        {
            type = GetEquivAlignType(annot);
            if(type == 1)	/*this is consistent*/
            {
                align = annot->data;
                if(!get_anchor_coordinates(align, anchor_id, SeqLocId(slp), &x_a, &x, &num))
                    return NULL;
                if(slp->choice == SEQLOC_INT)
                {
                    start = find_this_position_by_anchor (x_a, x, num, SeqLocStart(slp), anchor_bsp->length);
                    if(SeqLocStart(slp) != SeqLocStop(slp))
                        stop = find_this_position_by_anchor (x_a, x, num, SeqLocStop(slp), anchor_bsp->length);
                    else
                        stop = start;
                    t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, anchor_id);
                }
                else
                {
                    start = SeqLocStart(slp);
                    start = find_this_position_by_anchor (x_a, x, num, start, anchor_bsp->length);
                    t_slp = SeqLocPntNew(start, Seq_strand_plus, anchor_id, FALSE);
                }
                MemFree(x_a);
                MemFree(x);
                return t_slp;
            }
        }

        annot = annot->next;
    }

    return NULL;
}
Esempio n. 7
0
extern void ExportSeqAnnotFeatureTable (FILE *fp, SeqAnnotPtr sap) 
{
  FeatureTableData ftd;
  BioseqPtr fake_bsp;
  SeqFeatPtr first_feat;
  SeqEntryPtr sep;
  
  if (fp == NULL || sap == NULL) return;

  /* create fake bioseq to hold annotation */
  fake_bsp = BioseqNew();
  fake_bsp->annot = sap;
  
  /* create SeqEntry for temporary bioseq to live in */
  sep = SeqEntryNew ();
  sep->choice = 1;
  sep->data.ptrvalue = fake_bsp;
  SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) fake_bsp, sep);  

  ftd.export_only_selected = FALSE;
  ftd.fp = fp;
  ftd.show_nucs = TRUE;
  ftd.show_prots = TRUE;
  ftd.suppress_protein_ids = FALSE;

  /* show sources if there are any */
  first_feat = (SeqFeatPtr) sap->data;
  while (first_feat != NULL && first_feat->data.choice != SEQFEAT_BIOSRC) {
    first_feat = first_feat->next;
  }
  if (first_feat != NULL) {
    ftd.hide_sources = FALSE;
  } else {
    ftd.hide_sources = TRUE;
  }
      
  first_feat = (SeqFeatPtr) sap->data;
  fake_bsp->id = SeqIdDup (SeqLocId (first_feat->location));
  if (first_feat->data.choice == SEQFEAT_PROT) {
    fake_bsp->mol = Seq_mol_aa;
  } else {
    fake_bsp->mol = Seq_mol_dna;
  }
  fake_bsp->repr = Seq_repr_raw;
  fake_bsp->length = FindNecessaryBioseqLength (first_feat);
  
  VisitBioseqsInSep (sep, &ftd, VSMExportFeatureTableBioseqCallback);

  fake_bsp->annot = NULL;
  fake_bsp->idx.deleteme = TRUE;
  DeleteMarkedObjects (fake_bsp->idx.entityID, 0, NULL);
 
}
Esempio n. 8
0
/*
        Performs a global alignment, producing a SeqAlign.
*/
NLM_EXTERN SeqAlignPtr
GlobalBandToSeqAlign (GlobalBandStructPtr gbsp)
{

	SeqAlignPtr seqalign;
	GapXEditBlockPtr edit_block;
	SeqLocPtr l1, l2;

	if (gbsp == NULL)
		return NULL;

	if (GlobalBandToEditBlock(gbsp) == FALSE)
	{
		return NULL;
	}

	if (gbsp->edit_block == NULL)
		return NULL;
	edit_block = gbsp->edit_block;
	l1 = gbsp->seqloc1;
	l2 = gbsp->seqloc2;
        edit_block->length1 = SeqLocLen(l1);
        edit_block->length2 = SeqLocLen(l2);
	if (SeqLocStrand(l1) == Seq_strand_minus) {
		edit_block->frame1 = -1;
	} else {
		edit_block->frame1 = 1;		
	}
	if (SeqLocStrand(l2) == Seq_strand_minus) {
		edit_block->frame2 = -1;
	} else {
		edit_block->frame2 = 1;		
	}
	seqalign = GapXEditBlockToSeqAlign(gbsp->edit_block, SeqLocId(l2),  SeqLocId(l1));

	gbsp->edit_block = GapXEditBlockDelete(gbsp->edit_block);

	return seqalign;
}
Esempio n. 9
0
/*
        Performs local alignment, producing a SeqAlign.
*/
NLM_EXTERN SeqAlignPtr
LocalBandToSeqAlign (LocalBandStructPtr lbsp)
{

	SeqAlignPtr seqalign;

	GapXEditBlockPtr edit_block;
	SeqLocPtr l1, l2;

	if (lbsp == NULL)
		return NULL;

	if (LocalBandToEditBlock(lbsp) == FALSE)
	{
		return NULL;
	}

	if (lbsp->edit_block == NULL)
		return NULL;
	edit_block = lbsp->edit_block;
	l1 = lbsp->seqloc1;
	l2 = lbsp->seqloc2;
	if (SeqLocStrand(l1) == Seq_strand_minus) {
		edit_block->frame1 = -1;
	} else {
		edit_block->frame1 = 1;		
	}
	if (SeqLocStrand(l2) == Seq_strand_minus) {
		edit_block->frame2 = -1;
	} else {
		edit_block->frame2 = 1;		
	}
	seqalign = GapXEditBlockToSeqAlign(lbsp->edit_block, SeqLocId(l2),  SeqLocId(l1));

	lbsp->edit_block = GapXEditBlockDelete(lbsp->edit_block);

	return seqalign;
}
Esempio n. 10
0
static void DoVisitCodingRegions (
  SeqFeatPtr sfp,
  Pointer userdata
)

{
  BioseqPtr      bsp;
  CharPtr        caret5, caret3;
  CSpeedFlagPtr  cfp;
  Char           id [64];
  SeqLocPtr      loc, slp;
  Boolean        partial5, partial3;
  SeqIdPtr       sip;
  Int4           start, stop;

  if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL || cfp->ofp == NULL) return;

  loc = sfp->location;
  bsp = BioseqFindFromSeqLoc (loc);
  if (bsp == NULL) return;

  StringCpy (id, "?");
  if (sfp->product != NULL) {
    sip = SeqLocId (sfp->product);
    if (sip != NULL) {
      SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);
    }
  }

  fprintf (cfp->ofp, "%s\n", id);
  slp = SeqLocFindNext (loc, NULL);
  while (slp != NULL) {
    start = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1;
    stop = GetOffsetInBioseq (slp, bsp, SEQLOC_STOP) + 1;
    caret5 = "";
    caret3 = "";
    CheckSeqLocForPartial (slp, &partial5, &partial3);
    if (partial5) {
      caret5 = "<";
    }
    if (partial3) {
      caret3 = ">";
    }
    fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
    slp = SeqLocFindNext (loc, slp);
  }
}
Esempio n. 11
0
/** Creates a list of SeqLoc structures with data about PHI BLAST pattern 
 * occurrences, to be used as features on Query Seq-locs.
 * @param pattern_info Pattern information structure. [in]
 * @param query_seqloc Query SeqLoc, needed to retrieve Seq-id. [in]
 * @param seed_seqloc_ptr List of SeqLoc's with pattern data. [out]
 */
static Int2
s_PHIBlastCreateSeedSeqLoc(const SPHIQueryInfo* pattern_info, 
                           SeqLoc* query_seqloc, 
                           SeqLoc** seed_seqloc_ptr)
{
    Int4 index;
    for (index = 0; index < pattern_info->num_patterns; ++index) {
        const SPHIPatternInfo* this_occurrence = 
            &pattern_info->occurrences[index];
        SeqInt* si = SeqIntNew();
        si->id = SeqIdDup(SeqLocId(query_seqloc));
        si->from = this_occurrence->offset;
        si->to = this_occurrence->offset + this_occurrence->length - 1;
        ValNodeAddPointer(seed_seqloc_ptr, SEQLOC_INT, si);
    }
    return 0;
}
Esempio n. 12
0
static void LIBCALLBACK GetSeqFeat (AsnExpOptStructPtr aeosp)

{
  BioseqPtr     bsp;
  ExpStructPtr  esp;
  SeqFeatPtr    sfp;

  if (aeosp->dvp->intvalue == START_STRUCT) {
    esp = (ExpStructPtr) aeosp->data;
    sfp = (SeqFeatPtr) aeosp->the_struct;
    if (esp != NULL && esp->fp != NULL && sfp != NULL &&
        sfp->data.choice == esp->feat) {
      bsp = BioseqFind (SeqLocId (sfp->location));
      if (bsp != NULL) {
        PrintSequence (bsp, sfp, esp->fp, esp->is_na);
      }
    }
  }
}
Esempio n. 13
0
extern void RemoveInternalOrfs (SeqLocPtr PNTR slpFound)
{
  SeqLocPtr slp1, slp2, slp = NULL;
  SeqIdPtr  id;
  Int4      start1, stop1, start2, stop2;
  Boolean   flagInternal;

  slp1 = *slpFound;
  while (slp1 != NULL)
  {
    start1 = SeqLocStart (slp1);
    stop1 = SeqLocStop (slp1);
    flagInternal = FALSE;
    slp2 = *slpFound;
    while (slp2 != NULL)
    {
      start2 = SeqLocStart (slp2);
      stop2 = SeqLocStop (slp2);
      if ((start1 > start2 && start1 < stop2) &&
          (stop1 > start2 && stop1 < stop2))
      {
        flagInternal = TRUE;
        break;
      }
      slp2 = slp2->next;
    }
    if (!flagInternal)
      SeqLocLink (&slp, SeqLocDup (slp1));
    slp1 = slp1->next;
  }
  slp1 = *slpFound;
  while (slp1 != NULL)
  {
    slp2 = slp1->next;
    id = SeqLocId (slp1);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp1);
    slp1 = slp2;
  }
  *slpFound = slp;
  return;
}
Esempio n. 14
0
SBlastMessage* Blast_MessageToSBlastMessage(const Blast_Message* old, SeqLoc* query_slp, const BlastQueryInfo* query_info, Boolean believe_query)
{
    SBlastMessage* retval = NULL;
    SBlastMessage* last = NULL;
    SBlastMessage* var = NULL;

    while (old)
    {
         SeqId* sip = NULL;
         SeqLoc* slp = s_Context2SeqLoc(old->context, query_slp, query_info);

         if (slp)
           sip = SeqLocId(slp);
             
         SBlastMessageWrite(&retval, s_EBlastSeverity2ErrSev(old->severity), old->message, sip, believe_query);
         old = old->next;
    }

    /* 
    This loop removes a redundant message only if it is the next message.  It removes the second one so that
    the head of the list is not changed. 
    */
    var = retval;
    while (var)
    {
       if (s_SBlastMessageCompare(var, last) == TRUE)
       {
          last->next = var->next;
          var->next = NULL;
          SBlastMessageFree(var);
          var = last->next;
       }
       else
       {
          last = var;
          var = var->next;
       }
    }
    
    return retval;
}
Esempio n. 15
0
static Boolean IsMrnaAlignment (SeqAlignPtr align, Int4Ptr gilist, Int4 count)

{
  DenseDiagPtr  ddp;
  DenseSegPtr   dsp;
  SeqIdPtr      sip;
  StdSegPtr     ssp;
  SeqLocPtr     tloc;

  if (align == NULL) return FALSE;
  sip = NULL;
  if (align->segtype == 1) {
    ddp = (DenseDiagPtr) align->segs;
    if (ddp != NULL) {
      for (sip = ddp->id; sip != NULL; sip = sip->next) {
        if (IsSipMrna (sip, gilist, count)) return TRUE;
      }
    }
  } else if (align->segtype == 2) {
    dsp = (DenseSegPtr) align->segs;
    if (dsp != NULL) {
      for (sip = dsp->ids; sip != NULL; sip = sip->next) {
        if (IsSipMrna (sip, gilist, count)) return TRUE;
      }
    }
  } else if (align->segtype == 3) {
    ssp = (StdSegPtr) align->segs;
    if (ssp != NULL) {
      for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
        sip = SeqLocId (tloc);
        if (IsSipMrna (sip, gilist, count)) return TRUE;
      }
    }
  }
  return FALSE;
}
Esempio n. 16
0
/** Prints out the description of a query sequence, along
 *  with notification that the query has no hits
 * @param slp The query Seq-loc
 * @param format_options Options for formatting
 * @param outfp File to which the output will be directed
 */
static void 
s_AcknowledgeEmptyResults(SeqLoc *slp,
                          BlastFormattingOptions* format_options,
                          const BlastFormattingInfo* format_info,
                          FILE *outfp)
{
    Bioseq *bsp = BioseqLockById(SeqLocId(slp));

    if (format_info->head_on_every_query == TRUE)
        BLAST_PrintOutputHeader(format_info);

    init_buff_ex(70);
    AcknowledgeBlastQuery(bsp, 70, outfp, 
       format_options->believe_query, format_options->html);
    free_buff();
    BioseqUnlock(bsp);

    if (format_info->head_on_every_query == TRUE)
    {
        s_BLAST_PrintDatabaseInfo(format_info);
        fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n");
    }
    fprintf(outfp, " ***** No hits found ******\n\n\n");
}
Esempio n. 17
0
static void GetThisBioseq (XOSPtr xosp)
{
  GatherScopePtr  gsp;
  CharPtr         filename;
  Int4            gi;
  FILE            *fiop;
  Boolean         flagHaveNet;
  SeqEntryPtr     sep;
  ValNodePtr      vnp;
  Int2            gcode;
  GeneticCodePtr  gcp;
  SeqLocPtr       slp, slpn;
  SeqIdPtr        id;

  gsp = xosp->gsp;

  fiop = NULL;
  filename = xosp->filename;
  gi = xosp->gi;
  xosp->sep = SeqEntryFree (xosp->sep);

  if (gi > 0)
  {
    if (!EntrezInit ("cnsgnv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_NUC_PROT);
  }
  else if (filename != NULL)
  {
    if ((fiop = FileOpen (filename, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      return;
    }
    sep = FastaToSeqEntry (fiop, TRUE);
    AddBioSourceToSeqEntry (sep);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No SeqEntry");
    ErrShow ();
  }
  else
  {
    CleanUpXOS (xosp);
    xosp->sep = sep;
    xosp->gi = gi;
    xosp->filename = StringSave (filename);
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq, (Pointer) gsp);
  }

  if (xosp->bsp != NULL)
  {
    vnp = xosp->bsp->descr;
    gcode = 0;
    while (vnp != NULL)
    {
      if (vnp->choice == Seq_descr_source)
      {
        gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
        break;
      }
      vnp = vnp->next;
    }
    if (gcode == 0 && sep->choice == 2)
    {
      vnp = ((BioseqSetPtr) (sep->data.ptrvalue))->descr;
      while (vnp != NULL)
      {
        if (vnp->choice == Seq_descr_source)
        {
          gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
          break;
        }
        vnp = vnp->next;
      }
    }
    if (gcode == 0)
      gcode = 1; /* standard */
    gcp = GeneticCodeFind (gcode, NULL);
    if (gcp != NULL)
    {
      xosp->gcd = xosp->gcdi = NULL;
      vnp = (ValNodePtr) gcp->data.ptrvalue;
      while (vnp != NULL)
      {
        if (vnp->choice == 6)       /* sncbieaa */
          xosp->gcdi = (CharPtr) vnp->data.ptrvalue;
        else if (vnp->choice == 3)  /* ncbieaa */
          xosp->gcd = (CharPtr) vnp->data.ptrvalue;
        vnp = vnp->next;
      }
    }
    if (xosp->gcdi == NULL)
      xosp->gcdi = xosp->gcd;
    if (xosp->gcdi == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                 "Could not get genetic code for translation");
      ErrShow ();
      xosp->bsp = NULL;
    }
  }

  if (gi > 0)
  {
    slp = xosp->slpk;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpk = slp;
    GatherSeqEntry (sep, (Pointer) xosp, GatherKnownOrfs, (Pointer) gsp);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);

  if (xosp->bsp != NULL)
  {
    if (!ISA_na (xosp->bsp->mol))
    {
      xosp->sep = SeqEntryFree (xosp->sep);
      xosp->bsp = NULL;
      xosp->filename = (CharPtr) MemFree (xosp->filename);
      xosp->gi = 0;
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not nucleic acid Bioseq");
      ErrShow ();
    }
  }
  else
  {
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->gi = 0;
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
  }
  return;
}
Esempio n. 18
0
Int2 LIBCALLBACK PCCPredictFunc (Pointer data)
{
  OMProcControlPtr      ompcp;
  BioseqPtr             bsp = NULL;
  SeqFeatPtr            sfp = NULL;
  WindoW                w;
  GraphViewFormPtr      gvp;
  SeqIdPtr              psip;

  ompcp = (OMProcControlPtr) data;
  if (ompcp == NULL || ompcp->input_itemtype == 0)
    return OM_MSG_RET_ERROR;

  switch (ompcp->input_itemtype)
  {
    case OBJ_BIOSEQ:
      bsp = (BioseqPtr) ompcp->input_data;
      if (!ISA_aa (bsp->mol))
        return OM_MSG_RET_ERROR;
      break;
    case OBJ_SEQFEAT:
      sfp = (SeqFeatPtr) ompcp->input_data;
      break;
    default:
      return OM_MSG_RET_ERROR;
  }

  if (bsp != NULL)
  {
    w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      gvp->window = 22;
      gvp->type = AA_PCC;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else if (sfp != NULL)
  {
    if (sfp->data.choice != SEQFEAT_CDREGION)
      return OM_MSG_RET_ERROR;
    psip = SeqLocId (sfp->product);
    bsp = BioseqFind (psip);
    w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      gvp->window = 22;
      gvp->type = AA_PCC;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else
  {
    return OM_MSG_RET_ERROR;
  }

  Show (w);
  Select (w);
  return OM_MSG_RET_DONE;
}
Esempio n. 19
0
Int2 LIBCALLBACK HydrophobicFunc (Pointer data)
{
  OMProcControlPtr      ompcp;
  BioseqPtr             bsp = NULL;
  SeqFeatPtr            sfp = NULL;
  WindoW                w;
  GraphViewFormPtr      gvp;
  SeqIdPtr              psip;
  SeqPortPtr            spp;
  FloatHi               scr[24];
  Char                  res[24];

  ompcp = (OMProcControlPtr) data;
  if (ompcp == NULL || ompcp->input_itemtype == 0)
    return OM_MSG_RET_ERROR;

  switch (ompcp->input_itemtype)
  {
    case OBJ_BIOSEQ:
      bsp = (BioseqPtr) ompcp->input_data;
      if (!ISA_aa (bsp->mol))
        return OM_MSG_RET_ERROR;
      break;
    case OBJ_SEQFEAT:
      sfp = (SeqFeatPtr) ompcp->input_data;
      break;
    default:
      return OM_MSG_RET_ERROR;
  }

  if (bsp != NULL)
  {
    w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      if (ReadAAC ("KSkyte.flt", scr, res) != 24)
        return OM_MSG_RET_ERROR;
      gvp->window = 19;
      gvp->type = AA_FILTER_COMP_KYTE;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
      gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res,
                            &(gvp->window), gvp->type);
      SeqPortFree (spp);
      if (gvp->sgp == NULL)
      {

/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else if (sfp != NULL)
  {
    if (sfp->data.choice != SEQFEAT_CDREGION)
      return OM_MSG_RET_ERROR;
    psip = SeqLocId (sfp->product);
    bsp = BioseqFind (psip);
    w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      if (ReadAAC ("KSkyte.flt", scr, res) != 24)
        return OM_MSG_RET_ERROR;
      gvp->window = 19;
      gvp->type = AA_FILTER_COMP_KYTE;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
      gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res,
                            &(gvp->window), gvp->type);
      SeqPortFree (spp);
      if (gvp->sgp == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else
  {
    return OM_MSG_RET_ERROR;
  }

  Show (w);
  Select (w);
  return OM_MSG_RET_DONE;
}
Esempio n. 20
0
extern Gather_CDSPtr GatherCDSFree (Gather_CDSPtr gcdsp)
{
  SeqLocPtr slp, slpn;
  SeqIdPtr  id;

  if (gcdsp == NULL)
    return NULL;

  MemFree (gcdsp->tableGlobal);
  MemFree (gcdsp->tableRefine);

  slp = gcdsp->slpGlobal;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpRefine;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpAll;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpHit;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }
  slp = gcdsp->slpFound;
  while (slp != NULL)
  {
    slpn = slp->next;
    id = SeqLocId (slp);
    if (id != NULL)
      id->next = SeqIdSetFree (id->next);
    SeqLocFree (slp);
    slp = slpn;
  }

  return (Gather_CDSPtr) MemFree (gcdsp);
}
Esempio n. 21
0
static void DoProteins (BioseqPtr bsp, Pointer userdata)

{
  Char               buf [6];
  SeqMgrFeatContext  fcontext;
  Boolean            firstIsSig = FALSE;
  Int4               left = 0, right = 0;
  ScanDataPtr        sdp;
  SeqFeatPtr         sfp, last = NULL;
  SeqInt             sint;
  SeqPortPtr         spp;
  ValNode            vn;

  if (bsp == NULL) return;
  if (! ISA_aa (bsp->mol)) return;

  sdp = (ScanDataPtr) userdata;

  sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
  while (sfp != NULL) {
    if (fcontext.featdeftype == FEATDEF_mat_peptide_aa ||
        fcontext.featdeftype == FEATDEF_sig_peptide_aa ||
        fcontext.featdeftype == FEATDEF_transit_peptide_aa) {
      if (last != NULL) {
        if (fcontext.left <= right) {
          if (firstIsSig && fcontext.left == right &&
              fcontext.featdeftype != FEATDEF_sig_peptide_aa) {

            buf [0] = '\0';

            if (right >= 4) {
              MemSet ((Pointer) &vn, 0, sizeof (ValNode));
              vn.choice = SEQLOC_INT;
              vn.data.ptrvalue = &sint;

              MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
              sint.id = SeqLocId (sfp->location);

              sint.from = right - 3;
              sint.to = right;
              sint.strand = Seq_strand_plus;

              spp = SeqPortNewByLoc (&vn, Seq_code_ncbieaa);
              if (spp != NULL) {
                SeqPortRead (spp, (BytePtr) buf, 4);
                SeqPortFree (spp);
              }
              buf [4] = '\0';
            }

            PrintFeatureMessage (sfp, sdp, "SP", buf);
          } else {
            PrintFeatureMessage (sfp, sdp, "OV", NULL);
          }
        }
      } else {
        last = sfp;
        left = fcontext.left;
        right = fcontext.right;
        if (fcontext.featdeftype == FEATDEF_sig_peptide_aa) {
          firstIsSig = TRUE;
        }
      }
    }
    sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
  }
}
Esempio n. 22
0
/****************************************************************************
*
*	get_anchor_coordinates(h_align, anchor_id, sip, x_anchor_pos, x_pos, num)
*
*	load the positions of framework marker of the current sequence and its
*	corresponding positions in the anchor map (normally the NCBI sequence) into
*	two arays to extrapolate the positions for map integration
*	h_align:	the alignment of consistent markers (a.k.a good alignment)
*	anchor_id:	the Seq-id for the anchor map (normally, the NCBI sequence map)
*	sip:		the Seq-id for the current sequence
*	x_anchor_pos: the array to store the positions on the anchor map
*	x_pos:		  the array to store the positions on the current map. each
*				  member of x_anchor_pos and x_pos is a matching pair
*				  both arrays are ordered
*	num:		  the total number of framework markers in the current map.
*				  This is also the size of x_anchor_pos and x_pos
*
*	return TRUE for success and FALSE for fail
*
****************************************************************************/
Boolean get_anchor_coordinates(SeqAlignPtr h_align, SeqIdPtr anchor_id, SeqIdPtr sip, Int4Ptr PNTR x_anchor_pos, Int4Ptr PNTR x_pos, Uint2Ptr num)
{
    StdSegPtr ssp;
    SeqLocPtr slp;
    SeqIdPtr id;
    Int4 n, i, j;
    Int4 a_pos, t_pos;
    SeqAlignPtr align;
    Int4Ptr anchor_pos, pos;
    Boolean change;
    Int4 temp;

    *x_anchor_pos = NULL;
    *x_pos = NULL;
    *num = 0;

    align = h_align;
    if(align == NULL)
        return FALSE;

    /* count the number of the consistent markers for the
    current sip */
    n = 0;
    while(align)
    {
        if(align->segtype == 3)
        {   /*Std-segs*/
            ssp = align->segs;
            for(slp = ssp->loc; slp != NULL; slp = slp->next)
            {
                id = SeqLocId(slp);
                if(SeqIdForSameBioseq(id, sip))
                {
                    ++n;
                    break;
                }
            }
        }
        align = align->next;
    }

    if(n == 0)
        return FALSE;
    anchor_pos = MemNew((size_t)n * sizeof(Int4));
    pos = MemNew((size_t)n * sizeof(Int4));

    /*load the data*/
    n = 0;
    align = h_align;
    while(align)
    {
        if(align->segtype == 3)
        {
            ssp = align->segs;
            a_pos = -1;
            t_pos = -1;
            for(slp = ssp->loc; slp != NULL; slp = slp->next)
            {
                id = SeqLocId(slp);
                if(SeqIdForSameBioseq(id, sip))
                    t_pos = (SeqLocStart(slp) + SeqLocStop(slp))/2;
                else if(SeqIdForSameBioseq(id, anchor_id))
                    a_pos = SeqLocStart(slp);
            }

            /*each time, load a matching pair*/
            if(t_pos >= 0 && a_pos >=0)
            {
                anchor_pos[n] = a_pos;
                pos[n] = t_pos;
                ++n;
            }
        }
        align = align->next;
    }

    /*sort the array*/
    change = TRUE;
    for(i = 0; i<n-1 && change == TRUE; ++i)
    {
        change = FALSE;
        for(j = 0; j<n-i-1; ++j)
        {
            if(anchor_pos[j] > anchor_pos[j+1])
            {
                change = TRUE;
                temp = anchor_pos[j+1];
                anchor_pos[j+1] = anchor_pos[j];
                anchor_pos[j] = temp;

                temp = pos[j+1];
                pos[j+1] = pos[j];
                pos[j] = temp;
            }
        }
    }

    /*check the linear relationship between anchor_pos and pos*/
    temp = pos[0];
    for(i = 1, j= 1; i<n; ++i)
    {
        if(pos[i] >= temp)
        {
            temp = pos[i];
            pos[j] = temp;
            anchor_pos[j] = anchor_pos[i];
            ++j;
        }
    }
    *x_anchor_pos = anchor_pos;
    *x_pos = pos;
    *num = (Uint2)j;
    return TRUE;
}
Esempio n. 23
0
static Int2 Main_old (void)
 
{
   AsnIoPtr aip, xml_aip = NULL;
   BioseqPtr query_bsp, PNTR query_bsp_array;
   BioSourcePtr source;
   BLAST_MatrixPtr matrix;
   BLAST_OptionsBlkPtr options;
   BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
   BlastPruneSapStructPtr prune;
   Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
   Boolean html=FALSE;
   CharPtr params_buffer=NULL;
   Int4 number_of_descriptions, number_of_alignments;
   SeqAlignPtr  seqalign, PNTR seqalign_array;
   SeqAnnotPtr seqannot;
   SeqEntryPtr PNTR sepp;
   TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
   Uint1 align_type, align_view, out_type;
   Uint4 align_options, print_options;
   ValNodePtr mask_loc, mask_loc_start, next_mask_loc;
   ValNodePtr vnp, other_returns, error_returns;
   
   CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
   FILE *infp, *outfp, *mqfp=NULL;
   Int4 index, num_bsps, total_length, total_processed = 0;
   Int2 ctr = 1;
   Char prefix[2];
   SeqLocPtr last_mask, mask_slp;
   Boolean done, hits_found;
   Boolean lcase_masking;
   MBXmlPtr mbxp = NULL;
   Boolean traditional_formatting;

    blast_program = "blastn";
    blast_database = myargs [ARG_DB].strvalue;
    blast_inputfile = myargs [ARG_QUERY].strvalue;
    blast_outputfile = myargs [ARG_OUT].strvalue;
    if (myargs[ARG_HTML].intvalue)
        html = TRUE;

    if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
       ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile);
       return (1);
    }

    align_view = (Int1) myargs[ARG_FORMAT].intvalue;
    /* Geo mod: 
      -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on
    */
    out_type=(Int1) myargs[ARG_OUTTYPE].intvalue;
    if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) {
      align_view = 12 + (out_type-MGBLAST_FLTHITS ); 
      out_type=MBLAST_ALIGNMENTS;
      //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format
      // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1
       if (align_view>12) { // this is MGBLAST_HITGAPS output
            gap_Info=TRUE;
            if (dbgaps_buf==NULL)
                  dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1);
            if (qgaps_buf==NULL) 
                qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1);
            }
      }

    outfp = NULL;

    traditional_formatting = 
        (out_type == MBLAST_ALIGNMENTS ||
         out_type == MBLAST_DELAYED_TRACEBACK);

    if ((!traditional_formatting ||
            (align_view != 7 && align_view != 10 && align_view != 11)) && 
            blast_outputfile != NULL) {
       if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
          ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
          return (1);
       }
    }

    //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    /*
    if (!traditional_formatting)
        believe_query = TRUE;
    else
        believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
    */
    //Geo mod: 
    believe_query=FALSE;
    //If ASN.1 output is requested and believe_query is not set to TRUE,
    //   exit with an error.    
    if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
                           align_view == 10 || align_view == 11)) {
        ErrPostEx(SEV_FATAL, 1, 0, 
                  "-J option must be TRUE to produce ASN.1 output; before "
                  "changing -J to TRUE please also ensure that all query "
                  "sequence identifiers are unique");
        return -1;
    }
        
    options = BLASTOptionNewEx(blast_program, TRUE, TRUE);
    if (options == NULL)
        return 3;

    options->do_sum_stats = FALSE;
    options->is_neighboring = FALSE;
        options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
    number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;    
    number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;    
    options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);

    if (myargs[ARG_XDROP].intvalue != 0)
           options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
    if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0)
           options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue;
        if (myargs[ARG_XDROP_FINAL].intvalue != 0)
           options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;

    if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0)
       options->filter_string = StringSave("D");
    else
       options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
    
    show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
    options->penalty = myargs[ARG_MISMATCH].intvalue;
    options->reward = myargs[ARG_MATCH].intvalue;
        if (myargs[ARG_GAPOPEN].intvalue >= 0)
        options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue >= 0)
        options->gap_extend = myargs[ARG_GAPEXT].intvalue;

    if (options->gap_open == 0 && options->reward % 2 == 0 && 
        options->gap_extend == options->reward / 2 - options->penalty)
       /* This is the default value */
    options->gap_extend = 0;

    options->genetic_code = 1;
    options->db_genetic_code = 1; /* Default; it's not needed here anyway */
    options->number_of_cpus = myargs[ARG_THREADS].intvalue;
    if (myargs[ARG_WORDSIZE].intvalue != 0)
           options->wordsize = myargs[ARG_WORDSIZE].intvalue;
        if (myargs[ARG_MINSCORE].intvalue == 0)
           options->cutoff_s2 = options->wordsize*options->reward;
        else 
           options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue;

        options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
        options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;

    options->perform_culling = FALSE;
    /* Kludge */
    options->block_width  = myargs[ARG_MAXPOS].intvalue;

    options->strand_option = myargs[ARG_STRAND].intvalue;
        options->window_size = myargs[ARG_WINDOW].intvalue;
#ifdef DO_NOT_SUPPRESS_BLAST_OP        
        options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue;
        if (myargs[ARG_TEMPL_LEN].intvalue != 0)
            options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue;
        options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue;
#endif
        lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue;
        /* Allow dynamic programming gapped extension only with affine 
           gap scores */
        if (options->gap_open != 0 || options->gap_extend != 0)
           options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue;

        print_options = 0;
        align_options = 0;
        align_options += TXALIGN_COMPRESS;
        align_options += TXALIGN_END_NUM;
        if (show_gi) {
       align_options += TXALIGN_SHOW_GI;
       print_options += TXALIGN_SHOW_GI;
        }
            
        if (align_view) {
       align_options += TXALIGN_MASTER;
       if (align_view == 1 || align_view == 3)
          align_options += TXALIGN_MISMATCH;
       if (align_view == 3 || align_view == 4 || align_view == 6)
          align_options += TXALIGN_FLAT_INS;
       if (align_view == 5 || align_view == 6)
          align_options += TXALIGN_BLUNT_END;
        } else {
       align_options += TXALIGN_MATRIX_VAL;
       align_options += TXALIGN_SHOW_QS;
    }

    if (html) {
       align_options += TXALIGN_HTML;
       print_options += TXALIGN_HTML;
    }

    if (myargs[ARG_GILIST].strvalue)
       options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
   
    if (out_type == MBLAST_ENDPOINTS)
      options->no_traceback = 1;
   else if (out_type == MBLAST_DELAYED_TRACEBACK)
       options->no_traceback = 2;
    else
       options->no_traceback = 0;

    options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue;
    options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue;
    options->hsp_num_max = myargs[ARG_MAXHSP].intvalue;

    if (!believe_query)
           options->megablast_full_deflines = TRUE;
        /*if (options->megablast_full_deflines)
          believe_query = FALSE;*/

    query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr));
    sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr));

    StrCpy(prefix, "");

    global_fp = outfp;
        options->output = outfp;

    if (traditional_formatting) {
       if (align_view < 7) {
              if (html) {
                 fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
                 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
                         "VLINK=\"#660099\" ALINK=\"#660099\">\n");
                 fprintf(outfp, "<PRE>\n");
              }
              init_buff_ex(90);
              BlastPrintVersionInfo("mgblast", html, outfp);
              fprintf(outfp, "\n");
              MegaBlastPrintReference(html, 90, outfp);
              fprintf(outfp, "\n");
              
              if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
                 return 1;
              
              free_buff();
    
#ifdef OS_UNIX
              fprintf(global_fp, "%s", "Searching");
#endif
           }
    }
    
        aip = NULL;
        if (myargs[ARG_ASNOUT].strvalue != NULL) {
           if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
              ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
              return 1;
           }
        }
        else if (align_view == 10 || align_view == 11)
        {
            const char* mode = (align_view == 10) ? "w" : "wb";
            if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
                    ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
                    return 1;
            }
        }


        if (align_view == 7) {
           xml_aip = AsnIoOpen(blast_outputfile, "wx");
        }

        if (myargs[ARG_QUERYLOC].strvalue) {       
            Int4 start, end;
            Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
            options->required_start = start - 1;
            options->required_end = end -1;
        }

    done = FALSE;
    while (!done) {
       num_bsps = 0;
       total_length = 0;
       done = TRUE;
       SeqMgrHoldIndexing(TRUE);
       mask_slp = last_mask = NULL;
   
       while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL,
                               believe_query, prefix, &ctr, 
                               &mask_slp)) != NULL) {
              if (!lcase_masking) /* Lower case ignored */
                 mask_slp = SeqLocFree(mask_slp);
         if (mask_slp) {
           if (!last_mask)
              options->query_lcase_mask = last_mask = mask_slp;
           else {
              last_mask->next = mask_slp;
              last_mask = last_mask->next;
              }
           mask_slp = NULL;
           }
          query_bsp = NULL;
         SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc);
         //debug:
         /*
         char query_buffer[255];
         SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
         fprintf(stderr, "===> query_buf=%s\n", query_buffer);
         */
         if (query_bsp == NULL) {
           ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
           return 2;
          }
          
          source = BioSourceNew();
          source->org = OrgRefNew();
          source->org->orgname = OrgNameNew();
          source->org->orgname->gcode = options->genetic_code;
          ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
          
          query_bsp_array[num_bsps++] = query_bsp;
          
          total_length += query_bsp->length;
          if (total_length > myargs[ARG_MAXQUERY].intvalue || 
          num_bsps >= MAX_NUM_QUERIES) {
         done = FALSE;
         break;
          }
       }

           if (num_bsps == 0)
               break;

       SeqMgrHoldIndexing(FALSE);
       other_returns = NULL;
       error_returns = NULL;
       
       if (out_type==MBLAST_ENDPOINTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0, 
                             MegaBlastPrintEndpoints);
       else if (out_type==MBLAST_SEGMENTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                             MegaBlastPrintSegments);
       else if (out_type==MBLAST_ALIGN_INFO) {
              /* -- Geo mod: do not print header
              PrintTabularOutputHeader(blast_database, 
                                       (num_bsps==1) ? query_bsp_array[0] : NULL,
                                       NULL, "megablast", 0, believe_query,
                                       global_fp);*/
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                                MegaBlastPrintAlignInfo);
       } else if (out_type==MBLAST_ALIGNMENTS) {
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                  blast_database, options, &other_returns, 
                                  &error_returns, align_view < 7 ? tick_callback : NULL,
                                  NULL, NULL, 0, NULL);
          }
       
#ifdef OS_UNIX
       fflush(global_fp);
#endif

       if (error_returns) {
             BlastErrorPrint(error_returns);
              for (vnp = error_returns; vnp; vnp = vnp->next) {
                 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
              }
              ValNodeFree(error_returns);
           }
              
              
       if (traditional_formatting) {
          dbinfo = NULL;
          ka_params = NULL;
          ka_params_gap = NULL;
          params_buffer = NULL;
          mask_loc = NULL;
          matrix = NULL;
          for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXDBINFO:
              dbinfo = vnp->data.ptrvalue;
              break;
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
          }    
          
#ifdef OS_UNIX
        if(align_view < 7) {
                 fprintf(global_fp, "%s\n", " done");
              }
#endif
          
        if (myargs[ARG_MASKEDQUERY].strvalue) {
                 if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL)
                    ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n",
                              myargs[ARG_MASKEDQUERY].strvalue);
              }

        hits_found = FALSE;

        mask_loc_start = next_mask_loc = mask_loc;
        mask_loc = NULL;

        if (align_view == 7) {
           mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, 
                             options, query_bsp_array[0], 0);
           }

        if (seqalign_array) { //results returned back for processing
             ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE);
             for (index=0; index<num_bsps; index++) {
                    seqalign = seqalign_array[index];
                    if (next_mask_loc && 
                        SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), 
                                  query_bsp_array[index]->id) == SIC_YES) {
                       mask_loc = (SeqLocPtr) 
                       MemDup(next_mask_loc, sizeof(SeqLoc));
                       next_mask_loc = next_mask_loc->next;
                       mask_loc->next = NULL;
                    }
                    if (mqfp) {
                       /* convert mask locations from all sources into
                          a single seqloc */
                       mask_slp = NULL;
                       if (mask_loc) 
                          mask_slp = blastMergeFilterLocs(mask_slp, 
                              (SeqLocPtr)mask_loc->data.ptrvalue,
                              FALSE, 0, 0);
                       PrintMaskedSequence(query_bsp_array[index], mask_slp,
                                           mqfp, 50, lcase_masking);
                       SeqLocSetFree(mask_slp);
                       }
                    if (seqalign==NULL) {
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    hits_found = TRUE;
                    if (align_view < 7) {
                       init_buff_ex(70);
                       AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, 
                                             believe_query, html);
                       free_buff();
                       }
                    if (align_view == 8 || align_view == 9) {
                       if (align_view == 9)
                          PrintTabularOutputHeader(blast_database, 
                             query_bsp_array[index], NULL, blast_program, 0,
                             believe_query, global_fp);
                       /* debug:
                       char qbuf[512];
                       strcpy(qbuf, BioseqGetTitle(query_bsp_array[index]));
                       fprintf(stderr, "---> Here: query title=%s\n", qbuf);
                       */
                       BlastPrintTabulatedResults(seqalign, 
                           query_bsp_array[index], NULL, number_of_alignments,
                            blast_program, !options->gapped_calculation, 
                            believe_query, 0, 0, 
                            global_fp, (align_view == 9));
                            

                       ObjMgrFreeCache(0);

                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    } 
                       //Geo mod:   
                   else if (align_view>=12)  {
                        MGBlastPrintTab(seqalign, 
                            query_bsp_array[index], number_of_alignments,
                            !options->gapped_calculation, 
                            global_fp);
                        ObjMgrFreeCache(0);

                        SeqAlignSetFree(seqalign);
                        mask_loc = MemFree(mask_loc);
                        continue;
                        }
                    else if(align_view == 7) {
                       IterationPtr iterp;

                       iterp = BXMLBuildOneQueryIteration(seqalign, 
                                  NULL, FALSE, 
                                  !options->gapped_calculation, index, 
                                  NULL, query_bsp_array[index], mask_loc);
                       IterationAsnWrite(iterp, mbxp->aip, mbxp->atp);
                       AsnIoFlush(mbxp->aip);
                       IterationFree(iterp);
                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    seqannot = SeqAnnotNew();
                    seqannot->type = 2;
                    AddAlignInfoToSeqAnnot(seqannot, align_type);
                    seqannot->data = seqalign;
                    if (aip) {
                       SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
                       AsnIoReset(aip);
                    }
                    if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL);
                       ObjMgrSetHold();
                       init_buff_ex(85);
                       PrintDefLinesFromSeqAlign(prune->sap, 80,
                                                 outfp, print_options, FIRST_PASS, NULL);
                       free_buff();
                       
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune);
                       seqannot->data = prune->sap;
                       if (align_view != 0)
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL,
                                                 NULL, align_options, NULL, 
                                                 mask_loc, NULL);
                       else
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc);
                       seqannot->data = seqalign;
                       prune = BlastPruneSapStructDestruct(prune);
                       ObjMgrClearHold();
                       ObjMgrFreeCache(0);
                    }
                    seqannot = SeqAnnotFree(seqannot);
                    mask_loc = MemFree(mask_loc);
                 } /* End loop on seqaligns for different queries */
                 ReadDBBioseqFetchDisable();
              } 

              if (mbxp != NULL) {
                 MBXmlClose(mbxp, other_returns, !options->gapped_calculation);
              }

              if (mqfp)
                 FileClose(mqfp);

              if (!hits_found && align_view < 7)
                 fprintf(outfp, "\n\n ***** No hits found ******\n\n");

              matrix = BLAST_MatrixDestruct(matrix);
          
              if(html) 
                 fprintf(outfp, "<PRE>\n");
              init_buff_ex(85);
              dbinfo_head = dbinfo;
              if(align_view < 7) {
                 while (dbinfo) {
                    PrintDbReport(dbinfo, 70, outfp);
                    dbinfo = dbinfo->next;
                 }
              }
              dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
              
              if (ka_params) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
                 MemFree(ka_params);
              }
              if (ka_params_gap) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
                 MemFree(ka_params_gap);
              }
              if(align_view < 7)
                 PrintTildeSepLines(params_buffer, 70, outfp);
              MemFree(params_buffer);
              free_buff();
              mask_loc = mask_loc_start;
              while (mask_loc) {
                 SeqLocSetFree(mask_loc->data.ptrvalue);
                 mask_loc = mask_loc->next;
              }
              ValNodeFree(mask_loc_start);
       } else { //not traditional formatting
          /* Just destruct all other_returns parts */
          for (vnp=other_returns; vnp; vnp = vnp->next) {
         switch (vnp->choice) {
         case TXDBINFO:
            TxDfDbInfoDestruct(vnp->data.ptrvalue);
            break;
         case TXKABLK_NOGAP:
         case TXKABLK_GAP:
         case TXPARAMETERS:
            MemFree(vnp->data.ptrvalue);
            break;
         case TXMATRIX:
            BLAST_MatrixDestruct(vnp->data.ptrvalue);
            break;
         case SEQLOC_MASKING_NOTSET:
         case SEQLOC_MASKING_PLUS1:
         case SEQLOC_MASKING_PLUS2:
         case SEQLOC_MASKING_PLUS3:
         case SEQLOC_MASKING_MINUS1:
         case SEQLOC_MASKING_MINUS2:
         case SEQLOC_MASKING_MINUS3:
                    mask_loc = vnp->data.ptrvalue;
                    SeqLocSetFree(mask_loc);
         default:
            break;
         }
          }
       }
       other_returns = ValNodeFree(other_returns);
       MemFree(seqalign_array);
           options->query_lcase_mask = 
              SeqLocSetFree(options->query_lcase_mask);

       /* Freeing SeqEntries can be very expensive, do this only if 
          this is not the last iteration of search */
       if (!done) { 
          for (index=0; index<num_bsps; index++) {
         sepp[index] = SeqEntryFree(sepp[index]);
         query_bsp_array[index] = NULL;
          }       
           }
           total_processed += num_bsps;
    } /* End of loop on complete searches */
        
        aip = AsnIoClose(aip);

        /*if (align_view == 7)
          xml_aip = AsnIoClose(xml_aip);*/

        if (align_view < 7 && html) 
           fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
        if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
           fprintf(outfp, "Mega BLAST run finished, processed %d queries\n",
                   total_processed);
    MemFree(query_bsp_array);
    MemFree(sepp);
    MemFree(qgaps_buf);
    MemFree(dbgaps_buf);
    options = BLASTOptionDelete(options);
    FileClose(infp);
        FileClose(outfp);
    
    return 0;
}
Esempio n. 24
0
Int2 Main()
{
   AsnIoPtr           aip;
   BioseqPtr          bsp;
   Pointer            dataptr;
   Uint2              datatype;
   Boolean            found;
   SPI_mRNAToHerdPtr  h_head;
   SPI_mRNAToHerdPtr  h_prev;
   SPI_mRNAToHerdPtr  hptr;
   FILE               *ifp;
   Boolean            isGIlist;
   Char               line[60];
   Boolean            lowercase;
   SeqLocPtr          lcaseloc;
   FILE               *ofp;
   FILE               *ofp2;
   SeqAlignPtr        sap;
   SeqAnnotPtr        sanp;
   SeqEntryPtr        sep;
   FILE               *sfp;
   SeqIdPtr           sip;
   SeqLocPtr          slp;
   SPI_bsinfoPtr      spig;
   SPI_bsinfoPtr      spig_head;
   SPI_bsinfoPtr      spig_prev;
   SPI_bsinfoPtr      spim;
   SPI_bsinfoPtr      spim_head;
   SPI_bsinfoPtr      spim_prev;
   SPI_OptionsPtr     spot;
   SPI_RegionInfoPtr  srip = NULL;
   SPI_RegionInfoPtr  srip_head;
   SPI_RegionInfoPtr  srip_prev;
   CharPtr            str;
   CharPtr            txt;

   ID1BioseqFetchEnable("spidey", FALSE);
   LocalSeqFetchInit(FALSE);
   /* standard setup */
   ErrSetFatalLevel (SEV_MAX);
   ErrClearOptFlags (EO_SHOW_USERSTR);
   UseLocalAsnloadDataAndErrMsg ();
   ErrPathReset ();
   if (! AllObjLoad ())
   {
      Message (MSG_FATAL, "AllObjLoad failed");
      return 1;
   }
   if (! SubmitAsnLoad ())
   {
      Message (MSG_FATAL, "SubmitAsnLoad failed");
      return 1;
   }
   if (! FeatDefSetLoad ())
   {
      Message (MSG_FATAL, "FeatDefSetLoad failed");
      return 1;
   }
   if (! SeqCodeSetLoad ())
   {
      Message (MSG_FATAL, "SeqCodeSetLoad failed");
      return 1;
   }
   if (! GeneticCodeTableLoad ())
   {
      Message (MSG_FATAL, "GeneticCodeTableLoad failed");
      return 1;
   }
   if (!GetArgs("SPIDEY", NUMARGS, myargs))
      return 0;
   /* set the error message level high to suppress warnings from BLAST */
   isGIlist = (Boolean)myargs[MYARGGILIST].intvalue;
   txt = myargs[MYARGGENFILE].strvalue;
   ifp = FileOpen(txt, "r");
   spig_head = NULL;
   if (ifp == NULL)
   {
      bsp = SPI_GetBspFromGIOrAcc(txt);
      if (bsp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Can't open genomic input file\n");
         return -1;
      } else
      {
         spig_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
         spig_head->bsp = bsp;
      }
   }
   if (spig_head == NULL)
   {
      spig_prev = NULL;
      /* read in the genomic sequence(s) first and put them into bsinfo structures */
      while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL)
      {
         if (datatype == OBJ_BIOSEQ)
         {
            spig = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
            spig->bsp = (BioseqPtr)dataptr;
            if (spig_head == NULL)
               spig_head = spig_prev = spig;
            else
            {
               spig_prev->next = spig;
               spig_prev = spig;
            }
         } else if (datatype == OBJ_SEQENTRY)
         {
            sep = (SeqEntryPtr)dataptr;
            SeqEntryExplore(sep, &spig_head, SPI_FindAllNuc);
         }
      }
      FileClose(ifp);
   }
   if (spig_head == NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in genomic file\n");
      return -1;
   } else if (ISA_aa(spig_head->bsp->mol))
   {
      ErrPostEx(SEV_ERROR, 0, 0, "At least one of the genomic sequences appears to be a protein.\n");
      return -1;
   }
   if (spig_head->next != NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "This version can only process one genomic sequence at a time.  Only the first sequence in this file will be used.\n");
      spig_head->next = NULL;
   }
   spim_head = spim_prev = NULL;
   txt = myargs[MYARGMRNAFILE].strvalue;
   ifp = FileOpen(txt, "r");
   if (ifp == NULL)
   {
      bsp = SPI_GetBspFromGIOrAcc(txt);
      if (bsp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Can't open mRNA input file\n");
         return -1;
      } else
      {
         spim_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
         spim_head->bsp = bsp;
      }
   }
   if (spim_head == NULL)
   {
      lowercase = (Boolean)myargs[MYARGMASKED].intvalue;
      lcaseloc = NULL;
      /* if the mRNA has lowercase masking, read it in carefully to record the masking */
      if (lowercase == TRUE)
      {
         while ((sep = FastaToSeqEntryForDb(ifp, TRUE, NULL, TRUE, NULL, NULL, &lcaseloc)) != NULL)
         {
            SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc);
            if (lcaseloc != NULL)  /* put masking info into the bsinfo structure */
            {
               spim = spim_head;
               sip = SeqLocId(lcaseloc);
               found = FALSE;
               while (spim != NULL && !found)
               {
                  if (SeqIdComp(sip, spim->bsp->id) == SIC_YES)
                  {
                     found = TRUE;
                     spim->lcaseloc = lcaseloc;
                  }
                  spim = spim->next;
               }
               lcaseloc = NULL;
            }
         }
      } else if (isGIlist) /* mRNA file is a list of GIs, must fetch the bioseqs */
      {
         str = ReadALine(line, sizeof(line), ifp);
         while (str != NULL)
         {
            bsp = SPI_GetBspFromGIOrAcc(str);
            if (bsp != NULL)
            {
               spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
               spim->bsp = bsp;
               if (spim_head == NULL)
                  spim_head = spim_prev = spim;
               else
               {
                  spim_prev->next = spim;
                  spim_prev = spim;
               }
            }
            str = ReadALine(line, sizeof(line), ifp);
         }
      } else /* mRNAs are FASTA or ASN.1, read them all in */
      {
         while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL)
         {
            if (datatype == OBJ_BIOSEQ)
            {
               spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
               spim->bsp = (BioseqPtr)dataptr;
               if (spim_head == NULL)
                  spim_head = spim_prev = spim;
               else
               {
                  spim_prev->next = spim;
                  spim_prev = spim;
               }
            } else if (datatype == OBJ_SEQENTRY)
            {
               sep = (SeqEntryPtr)dataptr;
               SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc);
            }
         }
      }
      FileClose(ifp);
   }
   if (spim_head == NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in mRNA file\n");
      return -1;
   } else if (ISA_aa(spim_head->bsp->mol))
   {
      ErrPostEx(SEV_ERROR, 0, 0, "At least one of the mRNA sequences appears to be a protein\n");
      return -1;
   }
   txt = myargs[MYARGTABLE].strvalue;
   if (txt != NULL)
   {
      ifp = FileOpen(txt, "r");
      if (ifp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Unable to open table file\n");
         return -1;
      }
      SPI_ReadFeatureTable(ifp, spim_head);
      spim = spim_head;
      while (spim != NULL)
      {
         if (spim->lcaseloc != NULL)
         {
            slp = (SeqLocPtr)ValNodeNew(NULL);
            slp->choice = SEQLOC_MIX;
            slp->data.ptrvalue = (Pointer)spim->lcaseloc;
            spim->lcaseloc = slp;
         }
         spim = spim->next;
      }
   }
   spim = spim_head;
   spot = (SPI_OptionsPtr)MemNew(sizeof(SPI_Options));
   spot->printaln = myargs[MYARGPRALIGN].intvalue;
   txt = myargs[MYARGOUTFILE].strvalue;
   ofp = FileOpen(txt, "w");
   if (ofp == NULL && spot->printaln != 3)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file\n");
      return -1;
   }
   if (spot->printaln >= 2)
   {
      txt = myargs[MYARGALNFILE].strvalue;
      ofp2 = FileOpen(txt, "a");
      if (ofp2 == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file 2\n");
         return -1;
      }
   } else
      ofp2 = NULL;
   /** ErrSetMessageLevel(SEV_MAX); **/
   spot->firstpasseval = myargs[MYARG1STEVAL].floatvalue;
   spot->secpasseval = myargs[MYARG2NDEVAL].floatvalue;
   spot->thirdpasseval = myargs[MYARG3RDEVAL].floatvalue;
   spot->numreturns = myargs[MYARGNUMMOD].intvalue;
   spot->idcutoff = myargs[MYARGIDCUT].intvalue;
   spot->lencutoff = myargs[MYARGLENCUT].intvalue;
   spot->interspecies = (Boolean)myargs[MYARGSPEC].intvalue;
   spot->printasn = (Boolean)myargs[MYARGASN].intvalue;
   spot->fetchcds = (Boolean)myargs[MYARGGETCDS].intvalue;
   /*spot->ace = (Boolean)myargs[MYARGACEDB].intvalue;*/
   spot->from = myargs[MYARGFROM].intvalue;
   spot->to = myargs[MYARGTO].intvalue;
   spot->makemult = (Boolean)myargs[MYARGMULT].intvalue;
   /*KSK*/
   spot->bigintron = (Boolean)myargs[MYARGXL].intvalue; 
   spot->bigintron_size = myargs[MYARGXL_SIZE].intvalue;
   spot->repeat_db_file = myargs[MYARGREPDB].strvalue;
   txt = myargs[MYARGORG].strvalue;
   if (!StringICmp(txt, "d") || !StringICmp(txt, "D")){
       spot->organism = SPI_FLY;
   }
   else if (!StringICmp(txt, "p") || !StringICmp(txt, "P")){
       spot->organism = SPI_PLANT;
   }
   else if (!StringICmp(txt, "c") || !StringICmp(txt, "C")){
       spot->organism = SPI_CELEGANS;
   }
   else if (!StringICmp(txt, "m") || !StringICmp(txt, "M")){
       spot->organism = SPI_DICTY;
   }
   else {
       spot->organism = SPI_VERTEBRATE;
   }
   sap = NULL;
   if (spot->printasn)
      spot->sap_head = &sap;
   txt = myargs[MYARGSTRAND].strvalue;
   if (txt != NULL)
   {
      if (StrChr(txt, 'p') || StrChr(txt, 'P'))
         spot->strand = Seq_strand_plus;
      else
         spot->strand = Seq_strand_minus;
   } else
      spot->strand = Seq_strand_both;
   /*txt = myargs[MYARGDRAFTFILE].strvalue;
   if (txt != NULL)
      spot->draftfile = StringSave(txt);*/
   txt = myargs[MYARGDSPLICE].strvalue;
   if (txt != NULL)
   {
      sfp = FileOpen(txt, "r");
      SPI_GetSpliceInfo(spot, sfp, TRUE);
      FileClose(sfp);
   }
   txt = myargs[MYARGASPLICE].strvalue;
   if (txt != NULL)
   {
      sfp = FileOpen(txt, "r");
      SPI_GetSpliceInfo(spot, sfp, FALSE);
      FileClose(sfp);
   }
   h_head = h_prev = NULL;
   srip_head = srip_prev = NULL;
   while (spim != NULL)
   {
      spot->lcaseloc = spim->lcaseloc;
      if (spot->draftfile == NULL)
         srip = SPI_AlnSinglemRNAToGen(spig_head, spim, ofp, ofp2, spot);
      else
      {
         hptr = SPI_AlnSinglemRNAToPieces(spig_head, spim, ofp, ofp2, spot);
         if (h_head != NULL)
         {
            h_prev->next = hptr;
            h_prev = hptr;
         } else
            h_head = h_prev = hptr;
      }
      if (srip != NULL)
      {
         if (srip_head != NULL)
         {
            srip_prev->next = srip;
            srip_prev = srip;
         } else
            srip_head = srip_prev = srip;
      }
      spim = spim->next;
   }
   if (spot->makemult)
   {
      SPI_MakeMultipleAlignment(srip_head);
      SPI_PrintMultipleAlignment(srip_head, FALSE, spig_head->bsp, ofp);
      SPI_RegionListFree(srip_head);
   } else
      SPI_RegionListFree(srip_head);
   /* create the ASN.1 output, if requested; need to use the continuous alignment */
   /* that was generated */
   if (spot->printasn && *(spot->sap_head) != NULL && spot->draftfile == NULL)
   {
      sanp = SeqAnnotForSeqAlign(*(spot->sap_head));
      txt = myargs[MYARGASNFILE].strvalue;
      aip = AsnIoOpen(txt, "w");
      SeqAnnotAsnWrite(sanp, aip, NULL);
      AsnIoClose(aip);
      SeqAlignSetFree(*(spot->sap_head));
   }
   FileClose(ofp);
   FileClose(ofp2);
   SPI_OptionsFree(spot);
   SPI_bsinfoFreeList(spim_head);
   SPI_bsinfoFreeList(spig_head);
   LocalSeqFetchDisable();
   ID1BioseqFetchDisable();
   return 0;
}
Esempio n. 25
0
SeqLocPtr SeqLocDust (SeqLocPtr this_slp,
		      Int2 level, Int2 window, Int2 minwin, Int2 linker)
{
	SeqLocPtr	next_slp, slp = NULL;
	ValNodePtr	vnp = NULL;

	SeqIdPtr	id;
	BioseqPtr	bsp;
	SeqPortPtr	spp;

	DREGION	PNTR reg, PNTR regold;
	Int4 nreg;
	Int4 start, end, l;
	Int2 loopDustMax = 0;

/* error msg stuff */
	ErrSetOptFlags (EO_MSG_CODES);

	if (!this_slp)
	{
		ErrPostEx (SEV_ERROR, 2, 1,
			  "no sequence location given for dusting");
                ErrShow ();
		return slp;
	}

/* place for dusted regions */
	regold = reg = MemNew (sizeof (DREGION));
	if (!reg)
	{
		ErrPostEx (SEV_FATAL, 2, 2,
			   "memory allocation error");
                ErrShow ();
		return slp;
	}
	reg->from = 0;
	reg->to = 0;
	reg->next = NULL;

/* count seqlocs */
	next_slp = NULL;
	while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL)
			loopDustMax++;
	if (!loopDustMax)
	{
		ErrPostEx (SEV_ERROR, 2, 3,
			   "can not find next seq loc");
                ErrShow ();
	}

/* loop for dusting as needed */
	next_slp = NULL;
	while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL)
	{
/* offsets into actual sequence */
		start = SeqLocStart (next_slp);
		end = SeqLocStop (next_slp);

/* if all goes okay should get a seqport pointer */
		id = SeqLocId (next_slp);
		if (!id)
		{
			ErrPostEx (SEV_ERROR, 2, 4,
				  "no bioseq id");
			ErrShow ();
			continue;
		}
		bsp = BioseqLockById (id);
		if (!bsp)
		{
			ErrPostEx (SEV_ERROR, 2, 5,
				  "no bioseq");
			ErrShow ();
			continue;
		}
		if (!ISA_na (bsp->mol))
		{
			ErrPostEx (SEV_WARNING, 2, 6,
				  "not nucleic acid");
			ErrShow ();
			BioseqUnlock (bsp);
			continue;
		}
		spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na);
		BioseqUnlock (bsp);
		if (!spp)
		{
			ErrPostEx (SEV_ERROR, 2, 7,
				   "sequence port open failed");
			ErrShow ();
			continue;
		}

		l = spp->totlen;
		nreg = dust_segs (l, spp, start, reg,
				  (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker);
		slp = slpDust (spp, slp, id, &vnp,
			       reg, nreg, loopDustMax);
/* find tail - this way avoids referencing the pointer */
		while (reg->next) reg = reg->next;

		SeqPortFree (spp);
	}

/* clean up memory */
	reg = regold;
	while (reg)
	{
		regold = reg;
		reg = reg->next;
		MemFree (regold);
	}

	return slp;
}
Esempio n. 26
0
/******************************************************************
*
*	aa_to_dnaloc(sfp, aa_start, aa_stop)
*	map the amino acid sequence to a list of Seq-locs in the 
*	DNA sequence
*
******************************************************************/
NLM_EXTERN SeqLocPtr aa_to_dnaloc(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
  Int4 frame_offset, start_offset;	/*for determine the reading frame*/
  SeqLocPtr slp = NULL;
  CdRegionPtr crp;
  SeqLocPtr dna_loc, loc;			/*for the dna location*/

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Boolean is_new;		/**Is cur_pos at the begin of new exon?**/
  Boolean end_partial;		/*the end of aa is a partial codon*/
  Int4 d_start, d_stop;		/*the start and the stop of the DNA sequence*/
  Int4 offset;			/*offset from the start of the current exon*/
  Int4 aa_len;
  Uint1 strand;




   if(sfp->data.choice !=3)
	return NULL;


   crp = sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   if(crp->frame>0)
	frame_offset = crp->frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;


   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   slp = NULL;
   dna_loc= NULL;
   while(!is_end && ((slp = SeqLocFindNext(sfp->location, slp))!=NULL))
   {
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3 != 0);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial)
	   ++p_stop;
	if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		if(is_new)	/**special case of the first partial**/
		   offset = 0;
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   offset = 3*(cur_pos - p_start) + frame_offset;
		}
		strand = SeqLocStrand(slp);
		if(strand == Seq_strand_minus)
		   d_start = SeqLocStop(slp) - offset;
		else
		   d_start = SeqLocStart(slp) + offset;

		d_stop = d_start;
		aa_len = MIN(p_stop, aa_stop) - cur_pos +1;
		if(strand == Seq_strand_minus)
		{
			d_stop -= 3*aa_len;
			d_stop = MAX(d_stop, SeqLocStart(slp));
			loc = SeqLocIntNew(d_stop, d_start, strand, SeqLocId(slp));
		}
		else
		{
			d_stop += 3*aa_len;
			d_stop = MIN(d_stop, SeqLocStop(slp));
			loc = SeqLocIntNew(d_start, d_stop, strand, SeqLocId(slp));
		}
		ValNodeLink(&dna_loc, loc);

		if(end_partial)
			cur_pos = p_stop;
		else
			cur_pos = p_stop+1;
	}



	if(end_partial)
	    p_start = p_stop;
	else
	    p_start = p_stop +1;

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/
   return dna_loc;

}
Esempio n. 27
0
/*********************************************************************
*
*	make_cds_paragraph(sfp, aa_start, aa_stop)
*	return a buffer for the display of 3-codon under one amino 
*	acid format. It also includes the new line characters 
*	This is what Jonathan K. desires to have for the sequin 
*	doc object
*	aa_start, aa_stop: start and stop in the amino acid sequence
*
*********************************************************************/
NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
	BioseqPtr pbsp;
	SeqPortPtr spp;
	ValNodePtr cvp_node, curr;
	CodonVectorPtr cvp;
	CharPtr docbuf = NULL;
	Int4 num, buf_size;
	Uint1 residue;
	Char p_name[30];
	Int4 space_len, i;
	CharPtr buf;
	Int4 pos;
	Int4 max_len = 150;
	Boolean extra_space;

	if(sfp == NULL || sfp->data.choice !=3)
		return NULL;
	if(sfp->product == NULL)
		return NULL;
	pbsp = BioseqLockById(SeqLocId(sfp->product));
	if(pbsp == NULL)
		return NULL;

	cvp_node = aa_to_codon(sfp, aa_start, aa_stop);
	num = 1;
	for(curr = cvp_node; curr !=NULL; curr = curr->next)
		num +=3;
	buf_size = num * max_len;
	/*  #ifdef WIN_16
		if(buf_size > 10000)
		{
			Message(MSG_ERROR, "Can not allocate enough space ");
			return NULL;
		}
	#endif
	*/

	docbuf = MemNew((size_t)(buf_size) * sizeof(Char));

	MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE);
	/*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/
	pos = 0;
	pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, 
		FALSE, B_SPACE, POS_SPACE);

	/*print the amino acid sequence into buffer*/
	spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa);
	while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF )
		docbuf[pos++] = residue;
	docbuf[pos++] = '\n';
	SeqPortFree(spp);

	for(curr = cvp_node; curr !=NULL; curr = curr->next)
	{
		cvp = curr->data.ptrvalue;
		SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10);
		extra_space = (cvp->aa_index == 0);
		for(i=0; i<3; ++i)
		{
			space_len = cvp->aa_index;
			buf = cvp->buf[i] + cvp->aa_index;
			if(i == cvp->frame)
			{
				pos+= print_label_to_buffer(docbuf+pos, p_name, 
					cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE);
			}
			else
				pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 
				0, extra_space, FALSE, B_SPACE, POS_SPACE);
			sprintf(docbuf+pos, "%s\n", buf);
			pos += (StringLen(buf) +1);

		}
	}

	docbuf[pos++] = '\n';
	docbuf[pos] = '\0';
	
	free_cvp_list(cvp_node);
	BioseqUnlock(pbsp);

	return docbuf;
}
Esempio n. 28
0
/******************************************************************
*
*	aa_to_codon(sfp, aa_start, aa_stop)
*	generate a list of CodonVecotr to show the codons of an 
*	amino acid sequence
*	sfp: the Seq-feat for cds
*	aa_start: the start position of protein sequence
*	aa_stop the stop position of protein sequence
*
******************************************************************/
NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
  BioseqPtr bsp;

  Int4 frame_offset, start_offset;
  SeqLocPtr slp = NULL;
  SeqLocPtr cdloc;
  CdRegionPtr crp;
  Uint1 frame;

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/

  Int4 line_len;
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Int2 i, j;
  Int2 k, n;
  CharPtr PNTR buf;

  Boolean is_new;		/**Is cur_pos at the begin of new Seq-loc?**/
  CharPtr temp;

  SeqPortPtr spp;
  Uint1 residue;

  Boolean end_partial;
  Int4 d_start, seq_pos;
  Int2 pos;

  ValNodePtr head= NULL;
  CodonVectorPtr cvp;
  Boolean prt_stop_codon;
  Uint2 exon;




   if(sfp->data.choice !=3)
	return NULL;

   crp = sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   frame = crp->frame;
   cdloc = sfp->location;
   if(cdloc == NULL )
	return NULL;

   if(frame>0)
	frame_offset = frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;

   prt_stop_codon = (aa_stop == SeqLocStop(sfp->product));
   line_len = (aa_stop - aa_start + 1) + 1;
					/* +1 for the possible partial start codon*/
   if(prt_stop_codon)/*can be either as a stop codon or partial stop*/
	++line_len;
   buf = MemNew((size_t)3 * sizeof(CharPtr));
   for(i =0; i<3; ++i)
	buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char));
		

   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   slp = NULL;
   exon = 0;
   while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL))
   {
	++exon;
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3 != 0);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial)
	   ++p_stop;
	if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
	   bsp = BioseqLockById(SeqLocId(slp));
	   if(bsp)
	   {
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		cvp = MemNew(sizeof(CodonVector));
		cvp->sip = SeqIdDup(find_sip(bsp->id));
		cvp->strand = SeqLocStrand(slp);
		cvp->exonCount = exon;
		if(is_new)
		{
			if(frame_offset == 0)
				cvp->frame = 0;
			else
				cvp->frame = 3- (Uint1)frame_offset;
		}
		else
			cvp->frame = 0;
		if(cur_pos==0 && frame_offset > 0)	/*partial start codon*/
			cvp->aa_index = 0;
		else
			cvp->aa_index = 1;
		if(is_new)	/**special case of the first partial**/
		   d_start = SeqLocStart(slp);
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset;
		}
	    /**p_start is the start position of aa in the current Seq-loc
	       cur_pos is the current aa that is in process. The offset will
	       help to located the position on the DNA Seq-loc for translation
	       d_start is the position of the starting DNA in the coordinates
	       of DNA segment, used for mark the sequence
	       **/

		seq_pos = d_start - SeqLocStart(slp);	/**the pos in spp**/
		if(SeqLocStrand(slp)== Seq_strand_minus)
		   d_start = SeqLocStop(slp) - seq_pos;
		cvp->dna_pos = d_start;

		n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index;	/*position in buffer*/
		for(i =0; i<3; ++i)
			make_empty(buf[i], (Int2)line_len);
		spp = SeqPortNewByLoc(slp, Seq_code_iupacna);
		SeqPortSeek(spp, seq_pos, SEEK_SET);
		/**store the partial codons**/
		if(is_new && frame_offset > 0)
		{
		   k = (Int2)frame_offset;
		   while(k > 0)
		   {
			residue = SeqPortGetResidue(spp);
			temp = buf[3-k];	/**the position**/
			pos = n;
			temp[pos] = TO_LOWER(residue);
			--k;
		   }
		   ++n;
		   if(cur_pos!=0)
			++cur_pos;
		}


	     	/**load  the codons**/
		k =0;
		while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop)
		{
		   j= (Uint1)k%3;
		   temp = buf[j];
		   temp[n] = TO_LOWER(residue);
		   if(j ==2)
		   {		/**the last base**/
			++n;
		 	if(!prt_stop_codon|| !is_end) /*for the last codon*/
			/**prt_end controls to print the whole loc**/
		   	   ++cur_pos;
		   }
		   ++k;
		}	/**end of while**/

		SeqPortFree(spp);

		for(i =0; i<3; ++i)
		   cvp->buf[i] = StringSave(buf[i]);
		ValNodeAddPointer(&head, 0, (Pointer)cvp);

		BioseqUnlock(bsp);
	   }/*end of if(bsp)*/
	}/**end of if for matched intervals**/

	if(end_partial)
	    p_start = p_stop;
	else
	    p_start = p_stop +1;

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/

   for(i=0; i<3; ++i)
	MemFree(buf[i]);
   MemFree(buf);

   return head;
}
Esempio n. 29
0
static void ConsignProc (ButtoN b)
{
  XOSPtr         xosp;
  XISPtr         xisp;

  ComPatPtr      cpp, cpph;
  ValNodePtr     orflist;
  SeqLocPtr      slp, slpn;
  Int4           start, stop;
  Uint1          strand;
  SeqPortPtr     spp;
  Uint1Ptr       aaseq;
  Int4           ntpos, aapos;
  Uint1          cdn[3];

  SeqAlignPtr    sap, sapn;

  FloatHi        probcut;
  Int4           clustmin, findmin;

  Int4           i, n, endpos, XLength, XScale, shift;
  Int4           iframe, frame, top, orftop[6];
  FloatHiPtr     score, expandscore;
  FloatHi        maxscore;
  Int4Ptr        tableGlobal;

  SeqGraphPtr  sgp, sgpn;
  WindoW       w;
  VieweR       v;
  GrouP        g;
  SegmenT      seg;
  GraphSentPtr gsp;
  Char         numberbuffer[32];

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  if (xosp->bsp == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
    return;
  }

  WatchCursor ();
  cpph = cpp = ReadPrositePattern (xosp->pattern_file, TRUE, -1, NULL, NULL);
  if (cpph == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
               "read failed %s", xosp->pattern_file);
    ErrShow ();
    ArrowCursor ();
    return;
  }

  xosp->orflist =  GetOrfList (xosp->bsp, (Int2) (xosp->orfcut));
  xosp->orflist =  ClearNonMetOrfs (xosp->orflist);
  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice == 0)
    {
      orflist = orflist->next;
      continue;
    }
    if (slp->choice == SEQLOC_MIX)
      slp = (SeqLocPtr) slp->data.ptrvalue;
    start = SeqLocStart (slp);
    stop = SeqLocStop (slp);
    strand = SeqLocStrand (slp);
    if (strand != Seq_strand_both)
      strand = Seq_strand_both;
    if (stop - start + 1 >= xosp->minimumseed)
    {
      spp = SeqPortNew (xosp->bsp, start, stop, strand, Seq_code_ncbi4na);
      aaseq = (Uint1Ptr) MemNew ((size_t)
                                 (sizeof (Uint1) * (((stop-start)/3)+2)));
      ntpos = start;
      aapos = 0;
      while (ntpos < start+3)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcdi);
        aapos++;
      }
      while (ntpos <= stop)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcd);
        aapos++;
      }
      SeqPortFree (spp);
      aaseq[aapos] = 0;
      cpp = cpph;
      while (cpp != NULL)
      {
        sap = PatternMatch (aaseq, 0, Seq_strand_plus, SeqLocId (slp),
                            cpp, 0, Seq_strand_unknown, FALSE);
        if (sap != NULL)
          break;
        cpp = cpp->nextpattern;
      }
      MemFree (aaseq);
      if (sap != NULL)
      {
        SeqLocLink (&(xosp->slps), SeqLocDup (slp));
      }
      while (sap != NULL)
      {
        sapn = sap->next;
        SeqAlignFree (sap);
        sap = sapn;
      }
    }
    orflist = orflist->next;
  }
  ComPatFree (cpph);

  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice > 0)
      SeqLocLink (&(xosp->slpa), SeqLocDup (slp));
    while (slp != NULL)
    {
      slpn = slp->next;
      SeqLocFree (slp);
      slp = slpn;
    }
    orflist->data.ptrvalue = NULL;
    orflist = orflist->next;
  }
  xosp->orflist = ValNodeFree (xosp->orflist);

  probcut = xosp->probcut;
  clustmin = xosp->clustmin;
  findmin = xosp->findmin;

  xosp->slpb = FindSimilarBiasOrfs (xosp->sep, probcut, clustmin, findmin,
                                    xosp->slps, xosp->slpa);

  tableGlobal = CodonTableFromSeqLoc (xosp->bsp, xosp->slpb);
  seg = NULL;
  top = 0;
  xisp = (XISPtr) MemNew (sizeof (XIS));
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    endpos = (xosp->bsp->length + 3 - frame - xosp->window) / 3;
    if (iframe < 3)
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_plus);
    else
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_minus);
    maxscore = 0.0;
    for (i = 0; i < endpos; i++)
      if (score[i] > maxscore)
        maxscore = score[i];
    expandscore = (FloatHiPtr) MemNew (sizeof (FloatHi) * xosp->bsp->length);
    for (i = 0; i < xosp->window/2; i++)
      expandscore[i] = maxscore;
    n = 0;
    while (i < xosp->bsp->length)
    {
      if (n < endpos)
        expandscore[i] = score[n];
      else
        expandscore[i] = maxscore;
      i++;
      if (i%3 == 0)
        n++;
    }
    MemFree (score);
    score = expandscore;
    sgp = SeqGraphNew ();
    if (xisp->sgp == NULL)
    {
      xisp->sgp = sgp;
    }
    else
    {
      sgpn = xisp->sgp;
      while (sgpn->next != NULL)
        sgpn = sgpn->next;
      sgpn->next = sgp;
    }
    XLength = xosp->bsp->length;
    if (XLength > 1200)
      XLength = 1200;
    XScale = xosp->bsp->length / XLength;
    if (xosp->bsp->length % XLength != 0)
      XScale++;
    sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                             xosp->bsp->id);
    sgp->flags[2] = 1;
    sgp->numval = xosp->bsp->length;
    sgp->values = (Pointer) score;
    sgp->max.realvalue = maxscore;
    sgp->min.realvalue = 0.0;
    sgp->flags[1] = 1;
    sgp->a = 4.0;
    sgp->b = 0.0;
    if (seg == NULL)
      seg = CreatePicture ();
    if ((gsp = AddGraphSentinelToPicture (sgp, xosp->bsp, seg, 0,
                                          top, 0, NULL)) != NULL)
    {
      sprintf (numberbuffer, "%ld", 1L);
      AddLabel (seg, gsp->box.left, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
      sprintf (numberbuffer, "%ld", (long) xosp->bsp->length);
      AddLabel (seg, gsp->box.left+xosp->bsp->length, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
    }
    shift = (Int4) (maxscore*sgp->a);
    orftop[iframe] = top - shift - 38;
    top -= (shift+56);
    frame++;
    if (frame == 3)
    {
      top -= 24;
      frame = 0;
    }
  }
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    if (iframe < 3)
      strand = Seq_strand_plus;
    else
      strand = Seq_strand_minus;
    shift = 0;
    if (xosp->slpa != NULL)
    {
      AddOrfClass (xosp->slpa, seg, orftop, iframe, frame,
                   shift, strand, YELLOW_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpk != NULL)
    {
      AddOrfClass (xosp->slpk, seg, orftop, iframe, frame,
                   shift, strand, GREEN_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpb != NULL)
    {
      AddOrfClass (xosp->slpb, seg, orftop, iframe, frame,
                   shift, strand, BLUE_COLOR, 5);
      shift += 4;
    }
    if (xosp->slps != NULL)
    {
      AddOrfClass (xosp->slps, seg, orftop, iframe, frame,
                   shift, strand, RED_COLOR, 5);
    }
    frame++;
    if (frame == 3)
      frame = 0;
  }
  MemFree (tableGlobal);

  start = 20;
  stop = 20 + (50*XScale);
  top = orftop[5] - 40;
  if (xosp->slpa != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  YELLOW_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top,
              "All Met-init'd ORFs equal to or greater than 50 codons",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpk != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  GREEN_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Annotated (reported) ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpb != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLUE_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Similar codon usage bias ORFs to seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slps != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  RED_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Pattern match seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }

  xisp->picture = seg;

  w = FixedWindow (10, 10, 640, 720, "Consign", CloseGraphWindowProc);
  SetObjectExtra (w, xisp, CleanUpGraphWindow);
  g = HiddenGroup (w, -1, 0, NULL);
  v = CreateViewer (g, 560, 640, TRUE, TRUE);
  AttachPicture (v, seg, INT4_MIN, INT4_MAX, UPPER_LEFT, XScale, 1, NULL);
  PushButton (g, "Close", CloseGraphWindowButton);
  RealizeWindow (w);
  ArrowCursor ();
  Show (w);

  return;
}
Esempio n. 30
0
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, 
        SeqLoc* query_slp, SeqLoc* mask_loc_head, 
        BlastFormattingInfo* format_info,
        Blast_SummaryReturn* sum_returns)
{  
   SeqLoc* mask_loc;
   SeqLoc* next_mask_loc = NULL;
   SeqLoc* tmp_loc = NULL;
   Uint1 align_type;
   Boolean db_is_na;
   Int4 query_index;
   SeqLoc* slp;
   SeqLoc* mask_slp;
   AsnIo* aip = NULL;
   MBXml* xmlp = NULL;
   FILE *outfp = NULL;
   BlastFormattingOptions* format_options;
   EAlignView align_view;
   Boolean ungapped;

   ASSERT(format_info && format_info->format_options && 
          format_info->search_options && query_slp);

   format_options = format_info->format_options;
   align_view = format_options->align_view;
   ungapped = 
       !format_info->search_options->score_options->gapped_calculation;

   if (align_view == eAlignViewXml) {
       const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion
                                   of query sequence is desired in the XML
                                   output header. */
       xmlp = format_info->xmlp;
       if (!xmlp) {
           xmlp = format_info->xmlp = 
               s_MBXmlInit(format_info->aip, format_info->program_name, 
                           format_info->db_name, query_slp, kXmlFlag, 
                           sum_returns->search_params);
       }
   } else if (align_view == eAlignViewAsnText || 
              align_view == eAlignViewAsnBinary)
       aip = format_info->aip; 
   else 
       outfp = format_info->outfp;

   align_type = 
       GetOldAlignType(format_info->search_options->program, &db_is_na);

   if (format_info->db_name) {
       /* Enable fetching from the BLAST database. */
      ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE);
      /* If database is translated, set the genetic code for tranlation. */
      if (Blast_SubjectIsTranslated(format_info->search_options->program)) {
          ReadDBBioseqSetDbGeneticCode(format_info->search_options->
                                       db_options->genetic_code);
      }
   }

   if(format_info->search_options->score_options->is_ooframe) {
        ErrPostEx(SEV_WARNING, 0, 0, 
         "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments");
   }


   slp = query_slp;
   mask_loc = mask_loc_head;
  
   for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next)
   {
      Bioseq* bsp = NULL;
      SeqAlignPtr seqalign = seqalign_arr->array[query_index];
      /* Find which query the current SeqAlign is for */
      SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign);
      if (seqalign == NULL)
      {
            if (align_view < eAlignViewXml)
                s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp);  /* this query has no results. */
            else if (align_view == eAlignViewXml)
            {
                /* Retrieve this query's Bioseq */
                Iteration* iterp;
                /* Call to TxGetQueryIdFromSeqAlign returned NULL. */
                query_id = SeqLocId(slp);
      		bsp = BioseqLockById(query_id);
                iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         "No hits found", bsp, NULL);
                IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
                AsnIoFlush(xmlp->aip);
                IterationFree(iterp);
      		BioseqUnlock(bsp);
            }
            else if (align_view == eAlignViewTabularWithComments)
            {
                 query_id = SeqLocId(slp);
      		 bsp = BioseqLockById(query_id);
                 PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
      		 BioseqUnlock(bsp);
            }
            continue;
      }
      format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */

      /* Find the masking location for this query. Initialize next_mask_loc
	 to the current start of the chain, in case nothing for this query 
	 will be found. */
      next_mask_loc = mask_loc;
      for ( ; mask_loc; mask_loc = mask_loc->next) {
         mask_slp = (SeqLoc*) mask_loc->data.ptrvalue;
         if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) {
            break;
         }
      }
      /* Unlink the masking location for this query and save the next one */
      if (mask_loc) {
         for (next_mask_loc = mask_loc; next_mask_loc->next; 
              next_mask_loc = next_mask_loc->next) {
            mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue;
            if (SeqIdComp(query_id, SeqLocId(mask_slp))
                != SIC_YES) {
               break;
            }
         }
         tmp_loc = next_mask_loc;
         next_mask_loc = next_mask_loc->next;
         tmp_loc->next = NULL;
      }

      /* On the next iteration we can start from the next query */

      /* Retrieve this query's Bioseq */
      bsp = BioseqLockById(query_id);

      if (align_view < eAlignViewXml) {
         if (format_info->head_on_every_query == TRUE)
             BLAST_PrintOutputHeader(format_info);

         init_buff_ex(70);
         AcknowledgeBlastQuery(bsp, 70, outfp, 
            format_options->believe_query, format_options->html);
         free_buff();

         if (format_info->head_on_every_query == TRUE)
         {
             s_BLAST_PrintDatabaseInfo(format_info);
             fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n");
         }
      }
      if (align_view == eAlignViewTabular || 
          align_view == eAlignViewTabularWithComments) {
         if (align_view == eAlignViewTabularWithComments)
            PrintTabularOutputHeader(format_info->db_name, bsp, NULL, 
                                     format_info->program_name,
                                     0, format_options->believe_query, outfp);
         
         BlastPrintTabulatedResults(seqalign, bsp, NULL, 
            format_options->number_of_alignments, format_info->program_name, 
            ungapped, format_options->believe_query, 0, 0, 
            outfp, (Boolean)(align_view == eAlignViewTabularWithComments));
      } else if(align_view == eAlignViewXml) {
         Iteration* iterp;
         
         ASSERT(xmlp && xmlp->aip);
         /* The index of this "query iteration" is the query_index in the 
            current formatting round, plus the number of previously formatted
            queries. */
         iterp = 
             s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, 
                                         ungapped, 
                                         query_index+1+format_info->num_formatted,
                                         NULL, bsp, mask_loc);
         IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
         AsnIoFlush(xmlp->aip);
         IterationFree(iterp);
      } else {
         SeqAnnot* seqannot = SeqAnnotNew();
         seqannot->type = 2;
         AddAlignInfoToSeqAnnot(seqannot, align_type);
         seqannot->data = seqalign;
         if (aip) {
            SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL);
            AsnIoReset(aip);
         } 
         if (outfp) {
            BlastPruneSapStruct* prune;
            Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix);
            ObjMgrSetHold();
            init_buff_ex(85);
            PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, 
               format_options->print_options, FIRST_PASS, NULL,
               format_options->number_of_descriptions, NULL, NULL);
            free_buff();
            
            /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign
             * we're making a COPY of the seqalign to print it out! Clearly
             * this could use a better design */
            prune = BlastPruneHitsFromSeqAlign(seqalign, 
                       format_options->number_of_alignments, NULL);
            seqannot->data = prune->sap;

            if(format_info->search_options->score_options->is_ooframe) {
               OOFShowBlastAlignment(prune->sap, mask_loc, outfp, 
                                     format_options->align_options, NULL);
            } else if (align_view != eAlignViewPairwise) {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, NULL);
            } else {
               ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, 
                  format_options->align_options, matrix, mask_loc, 
                  FormatScoreFunc);
            }
            s_DeleteMatrix(matrix);
            seqannot->data = seqalign;
            prune = BlastPruneSapStructDestruct(prune);
            ObjMgrClearHold();
         }
         /* Set data to NULL, because we do not free Seq-align here. */
         seqannot->data = NULL;
         seqannot = SeqAnnotFree(seqannot);
      }
      BioseqUnlock(bsp);
      /* Relink the mask locations so chain can be freed in the end.
       The 'tmp_loc' variable points to the location that was unlinked. */
      if (tmp_loc)
          tmp_loc->next = next_mask_loc;
      
      mask_loc = next_mask_loc;
      ObjMgrFreeCache(0);

   } /* End loop on seqaligns for different queries */

   /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */
   if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) {
     /* extra output only if no hits at all, otherwise "for loop" logic should take care*/
     Iteration* iterp;    
     iterp = IterationNew();
     iterp->iter_num = 1;
     iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped);

     ASSERT(xmlp && xmlp->aip);
     IterationAsnWrite(iterp, xmlp->aip, xmlp->atp);
     AsnIoFlush(xmlp->aip);
     IterationFree(iterp);

   }

   if (format_info->db_name) {
       /* Free the database translation tables, if applicable. */
       TransTableFreeAll();
       ReadDBBioseqFetchDisable();
   }

   /* Update the count of the formatted queries. */
   format_info->num_formatted += num_queries;

   return 0;
}