Example #1
0
/******************************************************************
*
*	aa_to_codon(sfp, aa_start, aa_stop)
*	generate a list of CodonVecotr to show the codons of an 
*	amino acid sequence
*	sfp: the Seq-feat for cds
*	aa_start: the start position of protein sequence
*	aa_stop the stop position of protein sequence
*
******************************************************************/
NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
  BioseqPtr bsp;

  Int4 frame_offset, start_offset;
  SeqLocPtr slp = NULL;
  SeqLocPtr cdloc;
  CdRegionPtr crp;
  Uint1 frame;

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/

  Int4 line_len;
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Int2 i, j;
  Int2 k, n;
  CharPtr PNTR buf;

  Boolean is_new;		/**Is cur_pos at the begin of new Seq-loc?**/
  CharPtr temp;

  SeqPortPtr spp;
  Uint1 residue;

  Boolean end_partial;
  Int4 d_start, seq_pos;
  Int2 pos;

  ValNodePtr head= NULL;
  CodonVectorPtr cvp;
  Boolean prt_stop_codon;
  Uint2 exon;




   if(sfp->data.choice !=3)
	return NULL;

   crp = sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   frame = crp->frame;
   cdloc = sfp->location;
   if(cdloc == NULL )
	return NULL;

   if(frame>0)
	frame_offset = frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;

   prt_stop_codon = (aa_stop == SeqLocStop(sfp->product));
   line_len = (aa_stop - aa_start + 1) + 1;
					/* +1 for the possible partial start codon*/
   if(prt_stop_codon)/*can be either as a stop codon or partial stop*/
	++line_len;
   buf = MemNew((size_t)3 * sizeof(CharPtr));
   for(i =0; i<3; ++i)
	buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char));
		

   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   slp = NULL;
   exon = 0;
   while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL))
   {
	++exon;
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3 != 0);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial)
	   ++p_stop;
	if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
	   bsp = BioseqLockById(SeqLocId(slp));
	   if(bsp)
	   {
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		cvp = MemNew(sizeof(CodonVector));
		cvp->sip = SeqIdDup(find_sip(bsp->id));
		cvp->strand = SeqLocStrand(slp);
		cvp->exonCount = exon;
		if(is_new)
		{
			if(frame_offset == 0)
				cvp->frame = 0;
			else
				cvp->frame = 3- (Uint1)frame_offset;
		}
		else
			cvp->frame = 0;
		if(cur_pos==0 && frame_offset > 0)	/*partial start codon*/
			cvp->aa_index = 0;
		else
			cvp->aa_index = 1;
		if(is_new)	/**special case of the first partial**/
		   d_start = SeqLocStart(slp);
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset;
		}
	    /**p_start is the start position of aa in the current Seq-loc
	       cur_pos is the current aa that is in process. The offset will
	       help to located the position on the DNA Seq-loc for translation
	       d_start is the position of the starting DNA in the coordinates
	       of DNA segment, used for mark the sequence
	       **/

		seq_pos = d_start - SeqLocStart(slp);	/**the pos in spp**/
		if(SeqLocStrand(slp)== Seq_strand_minus)
		   d_start = SeqLocStop(slp) - seq_pos;
		cvp->dna_pos = d_start;

		n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index;	/*position in buffer*/
		for(i =0; i<3; ++i)
			make_empty(buf[i], (Int2)line_len);
		spp = SeqPortNewByLoc(slp, Seq_code_iupacna);
		SeqPortSeek(spp, seq_pos, SEEK_SET);
		/**store the partial codons**/
		if(is_new && frame_offset > 0)
		{
		   k = (Int2)frame_offset;
		   while(k > 0)
		   {
			residue = SeqPortGetResidue(spp);
			temp = buf[3-k];	/**the position**/
			pos = n;
			temp[pos] = TO_LOWER(residue);
			--k;
		   }
		   ++n;
		   if(cur_pos!=0)
			++cur_pos;
		}


	     	/**load  the codons**/
		k =0;
		while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop)
		{
		   j= (Uint1)k%3;
		   temp = buf[j];
		   temp[n] = TO_LOWER(residue);
		   if(j ==2)
		   {		/**the last base**/
			++n;
		 	if(!prt_stop_codon|| !is_end) /*for the last codon*/
			/**prt_end controls to print the whole loc**/
		   	   ++cur_pos;
		   }
		   ++k;
		}	/**end of while**/

		SeqPortFree(spp);

		for(i =0; i<3; ++i)
		   cvp->buf[i] = StringSave(buf[i]);
		ValNodeAddPointer(&head, 0, (Pointer)cvp);

		BioseqUnlock(bsp);
	   }/*end of if(bsp)*/
	}/**end of if for matched intervals**/

	if(end_partial)
	    p_start = p_stop;
	else
	    p_start = p_stop +1;

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/

   for(i=0; i<3; ++i)
	MemFree(buf[i]);
   MemFree(buf);

   return head;
}
Example #2
0
/*********************************************************************
*
*	make_cds_paragraph(sfp, aa_start, aa_stop)
*	return a buffer for the display of 3-codon under one amino 
*	acid format. It also includes the new line characters 
*	This is what Jonathan K. desires to have for the sequin 
*	doc object
*	aa_start, aa_stop: start and stop in the amino acid sequence
*
*********************************************************************/
NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
	BioseqPtr pbsp;
	SeqPortPtr spp;
	ValNodePtr cvp_node, curr;
	CodonVectorPtr cvp;
	CharPtr docbuf = NULL;
	Int4 num, buf_size;
	Uint1 residue;
	Char p_name[30];
	Int4 space_len, i;
	CharPtr buf;
	Int4 pos;
	Int4 max_len = 150;
	Boolean extra_space;

	if(sfp == NULL || sfp->data.choice !=3)
		return NULL;
	if(sfp->product == NULL)
		return NULL;
	pbsp = BioseqLockById(SeqLocId(sfp->product));
	if(pbsp == NULL)
		return NULL;

	cvp_node = aa_to_codon(sfp, aa_start, aa_stop);
	num = 1;
	for(curr = cvp_node; curr !=NULL; curr = curr->next)
		num +=3;
	buf_size = num * max_len;
	/*  #ifdef WIN_16
		if(buf_size > 10000)
		{
			Message(MSG_ERROR, "Can not allocate enough space ");
			return NULL;
		}
	#endif
	*/

	docbuf = MemNew((size_t)(buf_size) * sizeof(Char));

	MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE);
	/*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/
	pos = 0;
	pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, 
		FALSE, B_SPACE, POS_SPACE);

	/*print the amino acid sequence into buffer*/
	spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa);
	while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF )
		docbuf[pos++] = residue;
	docbuf[pos++] = '\n';
	SeqPortFree(spp);

	for(curr = cvp_node; curr !=NULL; curr = curr->next)
	{
		cvp = curr->data.ptrvalue;
		SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10);
		extra_space = (cvp->aa_index == 0);
		for(i=0; i<3; ++i)
		{
			space_len = cvp->aa_index;
			buf = cvp->buf[i] + cvp->aa_index;
			if(i == cvp->frame)
			{
				pos+= print_label_to_buffer(docbuf+pos, p_name, 
					cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE);
			}
			else
				pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 
				0, extra_space, FALSE, B_SPACE, POS_SPACE);
			sprintf(docbuf+pos, "%s\n", buf);
			pos += (StringLen(buf) +1);

		}
	}

	docbuf[pos++] = '\n';
	docbuf[pos] = '\0';
	
	free_cvp_list(cvp_node);
	BioseqUnlock(pbsp);

	return docbuf;
}
Example #3
0
static void ConsignProc (ButtoN b)
{
  XOSPtr         xosp;
  XISPtr         xisp;

  ComPatPtr      cpp, cpph;
  ValNodePtr     orflist;
  SeqLocPtr      slp, slpn;
  Int4           start, stop;
  Uint1          strand;
  SeqPortPtr     spp;
  Uint1Ptr       aaseq;
  Int4           ntpos, aapos;
  Uint1          cdn[3];

  SeqAlignPtr    sap, sapn;

  FloatHi        probcut;
  Int4           clustmin, findmin;

  Int4           i, n, endpos, XLength, XScale, shift;
  Int4           iframe, frame, top, orftop[6];
  FloatHiPtr     score, expandscore;
  FloatHi        maxscore;
  Int4Ptr        tableGlobal;

  SeqGraphPtr  sgp, sgpn;
  WindoW       w;
  VieweR       v;
  GrouP        g;
  SegmenT      seg;
  GraphSentPtr gsp;
  Char         numberbuffer[32];

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  if (xosp->bsp == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
    return;
  }

  WatchCursor ();
  cpph = cpp = ReadPrositePattern (xosp->pattern_file, TRUE, -1, NULL, NULL);
  if (cpph == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
               "read failed %s", xosp->pattern_file);
    ErrShow ();
    ArrowCursor ();
    return;
  }

  xosp->orflist =  GetOrfList (xosp->bsp, (Int2) (xosp->orfcut));
  xosp->orflist =  ClearNonMetOrfs (xosp->orflist);
  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice == 0)
    {
      orflist = orflist->next;
      continue;
    }
    if (slp->choice == SEQLOC_MIX)
      slp = (SeqLocPtr) slp->data.ptrvalue;
    start = SeqLocStart (slp);
    stop = SeqLocStop (slp);
    strand = SeqLocStrand (slp);
    if (strand != Seq_strand_both)
      strand = Seq_strand_both;
    if (stop - start + 1 >= xosp->minimumseed)
    {
      spp = SeqPortNew (xosp->bsp, start, stop, strand, Seq_code_ncbi4na);
      aaseq = (Uint1Ptr) MemNew ((size_t)
                                 (sizeof (Uint1) * (((stop-start)/3)+2)));
      ntpos = start;
      aapos = 0;
      while (ntpos < start+3)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcdi);
        aapos++;
      }
      while (ntpos <= stop)
      {
        cdn[0] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[1] = SeqPortGetResidue (spp);
        ntpos++;
        cdn[2] = SeqPortGetResidue (spp);
        ntpos++;
        aaseq[aapos] = AAForCodon (cdn, xosp->gcd);
        aapos++;
      }
      SeqPortFree (spp);
      aaseq[aapos] = 0;
      cpp = cpph;
      while (cpp != NULL)
      {
        sap = PatternMatch (aaseq, 0, Seq_strand_plus, SeqLocId (slp),
                            cpp, 0, Seq_strand_unknown, FALSE);
        if (sap != NULL)
          break;
        cpp = cpp->nextpattern;
      }
      MemFree (aaseq);
      if (sap != NULL)
      {
        SeqLocLink (&(xosp->slps), SeqLocDup (slp));
      }
      while (sap != NULL)
      {
        sapn = sap->next;
        SeqAlignFree (sap);
        sap = sapn;
      }
    }
    orflist = orflist->next;
  }
  ComPatFree (cpph);

  orflist = xosp->orflist;
  while (orflist != NULL)
  {
    slp = (SeqLocPtr) orflist->data.ptrvalue;
    if (slp->choice > 0)
      SeqLocLink (&(xosp->slpa), SeqLocDup (slp));
    while (slp != NULL)
    {
      slpn = slp->next;
      SeqLocFree (slp);
      slp = slpn;
    }
    orflist->data.ptrvalue = NULL;
    orflist = orflist->next;
  }
  xosp->orflist = ValNodeFree (xosp->orflist);

  probcut = xosp->probcut;
  clustmin = xosp->clustmin;
  findmin = xosp->findmin;

  xosp->slpb = FindSimilarBiasOrfs (xosp->sep, probcut, clustmin, findmin,
                                    xosp->slps, xosp->slpa);

  tableGlobal = CodonTableFromSeqLoc (xosp->bsp, xosp->slpb);
  seg = NULL;
  top = 0;
  xisp = (XISPtr) MemNew (sizeof (XIS));
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    endpos = (xosp->bsp->length + 3 - frame - xosp->window) / 3;
    if (iframe < 3)
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_plus);
    else
      score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window,
                               frame, Seq_strand_minus);
    maxscore = 0.0;
    for (i = 0; i < endpos; i++)
      if (score[i] > maxscore)
        maxscore = score[i];
    expandscore = (FloatHiPtr) MemNew (sizeof (FloatHi) * xosp->bsp->length);
    for (i = 0; i < xosp->window/2; i++)
      expandscore[i] = maxscore;
    n = 0;
    while (i < xosp->bsp->length)
    {
      if (n < endpos)
        expandscore[i] = score[n];
      else
        expandscore[i] = maxscore;
      i++;
      if (i%3 == 0)
        n++;
    }
    MemFree (score);
    score = expandscore;
    sgp = SeqGraphNew ();
    if (xisp->sgp == NULL)
    {
      xisp->sgp = sgp;
    }
    else
    {
      sgpn = xisp->sgp;
      while (sgpn->next != NULL)
        sgpn = sgpn->next;
      sgpn->next = sgp;
    }
    XLength = xosp->bsp->length;
    if (XLength > 1200)
      XLength = 1200;
    XScale = xosp->bsp->length / XLength;
    if (xosp->bsp->length % XLength != 0)
      XScale++;
    sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                             xosp->bsp->id);
    sgp->flags[2] = 1;
    sgp->numval = xosp->bsp->length;
    sgp->values = (Pointer) score;
    sgp->max.realvalue = maxscore;
    sgp->min.realvalue = 0.0;
    sgp->flags[1] = 1;
    sgp->a = 4.0;
    sgp->b = 0.0;
    if (seg == NULL)
      seg = CreatePicture ();
    if ((gsp = AddGraphSentinelToPicture (sgp, xosp->bsp, seg, 0,
                                          top, 0, NULL)) != NULL)
    {
      sprintf (numberbuffer, "%ld", 1L);
      AddLabel (seg, gsp->box.left, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
      sprintf (numberbuffer, "%ld", (long) xosp->bsp->length);
      AddLabel (seg, gsp->box.left+xosp->bsp->length, gsp->bottom-20,
                numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0);
    }
    shift = (Int4) (maxscore*sgp->a);
    orftop[iframe] = top - shift - 38;
    top -= (shift+56);
    frame++;
    if (frame == 3)
    {
      top -= 24;
      frame = 0;
    }
  }
  frame = 0;
  for (iframe = 0; iframe < 6; iframe++)
  {
    if (iframe < 3)
      strand = Seq_strand_plus;
    else
      strand = Seq_strand_minus;
    shift = 0;
    if (xosp->slpa != NULL)
    {
      AddOrfClass (xosp->slpa, seg, orftop, iframe, frame,
                   shift, strand, YELLOW_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpk != NULL)
    {
      AddOrfClass (xosp->slpk, seg, orftop, iframe, frame,
                   shift, strand, GREEN_COLOR, 5);
      shift += 4;
    }
    if (xosp->slpb != NULL)
    {
      AddOrfClass (xosp->slpb, seg, orftop, iframe, frame,
                   shift, strand, BLUE_COLOR, 5);
      shift += 4;
    }
    if (xosp->slps != NULL)
    {
      AddOrfClass (xosp->slps, seg, orftop, iframe, frame,
                   shift, strand, RED_COLOR, 5);
    }
    frame++;
    if (frame == 3)
      frame = 0;
  }
  MemFree (tableGlobal);

  start = 20;
  stop = 20 + (50*XScale);
  top = orftop[5] - 40;
  if (xosp->slpa != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  YELLOW_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top,
              "All Met-init'd ORFs equal to or greater than 50 codons",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpk != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  GREEN_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Annotated (reported) ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slpb != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLUE_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Similar codon usage bias ORFs to seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }
  if (xosp->slps != NULL)
  {
    top -= 12;
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  RED_COLOR, SOLID_LINE, SOLID_SHADING, 5,
                  0);
    AddLine (seg, start, top, stop, top, FALSE, 0);
    AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT),
                  BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH,
                  0);
    AddLabel (seg, stop+(20*XScale), top, "Pattern match seed ORFs",
              SMALL_TEXT, 0, MIDDLE_RIGHT, 0);
  }

  xisp->picture = seg;

  w = FixedWindow (10, 10, 640, 720, "Consign", CloseGraphWindowProc);
  SetObjectExtra (w, xisp, CleanUpGraphWindow);
  g = HiddenGroup (w, -1, 0, NULL);
  v = CreateViewer (g, 560, 640, TRUE, TRUE);
  AttachPicture (v, seg, INT4_MIN, INT4_MAX, UPPER_LEFT, XScale, 1, NULL);
  PushButton (g, "Close", CloseGraphWindowButton);
  RealizeWindow (w);
  ArrowCursor ();
  Show (w);

  return;
}
Example #4
0
SeqLocPtr SeqLocDust (SeqLocPtr this_slp,
		      Int2 level, Int2 window, Int2 minwin, Int2 linker)
{
	SeqLocPtr	next_slp, slp = NULL;
	ValNodePtr	vnp = NULL;

	SeqIdPtr	id;
	BioseqPtr	bsp;
	SeqPortPtr	spp;

	DREGION	PNTR reg, PNTR regold;
	Int4 nreg;
	Int4 start, end, l;
	Int2 loopDustMax = 0;

/* error msg stuff */
	ErrSetOptFlags (EO_MSG_CODES);

	if (!this_slp)
	{
		ErrPostEx (SEV_ERROR, 2, 1,
			  "no sequence location given for dusting");
                ErrShow ();
		return slp;
	}

/* place for dusted regions */
	regold = reg = MemNew (sizeof (DREGION));
	if (!reg)
	{
		ErrPostEx (SEV_FATAL, 2, 2,
			   "memory allocation error");
                ErrShow ();
		return slp;
	}
	reg->from = 0;
	reg->to = 0;
	reg->next = NULL;

/* count seqlocs */
	next_slp = NULL;
	while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL)
			loopDustMax++;
	if (!loopDustMax)
	{
		ErrPostEx (SEV_ERROR, 2, 3,
			   "can not find next seq loc");
                ErrShow ();
	}

/* loop for dusting as needed */
	next_slp = NULL;
	while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL)
	{
/* offsets into actual sequence */
		start = SeqLocStart (next_slp);
		end = SeqLocStop (next_slp);

/* if all goes okay should get a seqport pointer */
		id = SeqLocId (next_slp);
		if (!id)
		{
			ErrPostEx (SEV_ERROR, 2, 4,
				  "no bioseq id");
			ErrShow ();
			continue;
		}
		bsp = BioseqLockById (id);
		if (!bsp)
		{
			ErrPostEx (SEV_ERROR, 2, 5,
				  "no bioseq");
			ErrShow ();
			continue;
		}
		if (!ISA_na (bsp->mol))
		{
			ErrPostEx (SEV_WARNING, 2, 6,
				  "not nucleic acid");
			ErrShow ();
			BioseqUnlock (bsp);
			continue;
		}
		spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na);
		BioseqUnlock (bsp);
		if (!spp)
		{
			ErrPostEx (SEV_ERROR, 2, 7,
				   "sequence port open failed");
			ErrShow ();
			continue;
		}

		l = spp->totlen;
		nreg = dust_segs (l, spp, start, reg,
				  (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker);
		slp = slpDust (spp, slp, id, &vnp,
			       reg, nreg, loopDustMax);
/* find tail - this way avoids referencing the pointer */
		while (reg->next) reg = reg->next;

		SeqPortFree (spp);
	}

/* clean up memory */
	reg = regold;
	while (reg)
	{
		regold = reg;
		reg = reg->next;
		MemFree (regold);
	}

	return slp;
}
Example #5
0
SeqLocPtr BioseqDust (BioseqPtr bsp, Int4 start, Int4 end,
		      Int2 level, Int2 window, Int2 minwin, Int2 linker)
{
	SeqLocPtr	slp = NULL;	/* initialize */
	ValNodePtr	vnp = NULL;

	SeqPortPtr	spp;

	DREGION	PNTR reg, PNTR regold;
	Int4	nreg;
	Int4 l;
	Int4 loopDustMax = 1;

/* error msg stuff */
/*	ErrSetOptFlags (EO_MSG_CODES | EO_SHOW_FILELINE | EO_BEEP); */
	ErrSetOptFlags (EO_MSG_CODES);

/* make sure bioseq is there */
	if (!bsp)
	{
		ErrPostEx (SEV_ERROR, 1, 1,
			  "no bioseq");
                ErrShow ();
		return slp;
	}
	if (!ISA_na (bsp->mol))
	{
		ErrPostEx (SEV_WARNING, 1, 2,
			  "not nucleic acid");
                ErrShow ();
		return slp;
	}

/* place for dusted regions */
	reg = MemNew (sizeof (DREGION));
	if (!reg)
	{
		ErrPostEx (SEV_FATAL, 1, 3,
			   "memory allocation error");
                ErrShow ();
		return slp;
	}
	reg->from = 0;
	reg->to = 0;
	reg->next = NULL;

/* do it */
	spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na);
	if (!spp)
	{
		ErrPostEx (SEV_ERROR, 1, 4,
			   "sequence port open failed");
                ErrShow ();
		MemFree (reg);
		return slp;
	}

	l = spp->totlen;
	nreg = dust_segs (l, spp, start, reg, (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker);
	slp = slpDust (spp, NULL, bsp->id, &vnp, reg, nreg, loopDustMax);

/* clean up memory */
	SeqPortFree (spp);
	while (reg)
	{
		regold = reg;
		reg = reg->next;
		MemFree (regold);
	}

	return slp;
}
Example #6
0
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
              short whichSeq, short *seqnum,
              char **seq, char **seqid, long *seqlen)
{
  SeqPortPtr spp;
  SeqIdPtr bestid;
  Uint1 repr, code, residue;
  CharPtr tmp, title;
  long  outlen, outmax;
  char  localid[256], *sp;

  /* !!! this may be called several times for a single sequence
    because SeqEntryExplore looks for parts and joins them...
    assume seq, seqid, seqlen may contain data (or NULL)
  */
  if (bsp == NULL) return;
  repr = Bioseq_repr(bsp);
  if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;

  (*seqnum)++;
  if (!(whichSeq == *seqnum || whichSeq == 0)) return;

  bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
  title = BioseqGetTitle(bsp);
  if (idonly) {
    sprintf(localid, " %d)  ", *seqnum);
    tmp= localid + strlen(localid)-1;
    }
  else {
    strcpy(localid," ");
    tmp= localid;
    }
  tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
  tmp = StringMove(tmp, " ");
  StringNCpy(tmp, title, 200);
/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */

          /* < seqid is fixed storage */
  /* strcpy( *seqid, localid);  */
          /* < seqid is variable sized */
  outmax= strlen(localid) + 3;
  if (*seqid==NULL) {
    *seqid= (char*) malloc(outmax);
    if (*seqid==NULL) return;
    strcpy(*seqid, localid);
    }
  else {
    outmax += strlen(*seqid) + 2;
    *seqid= (char*) realloc( *seqid, outmax);
    if (*seqid==NULL) return;
    if (!idonly) strcat(*seqid, "; ");
    strcat(*seqid, localid);
    }

  if (idonly) {
    strcat(*seqid,"\n");
    return;
    }

  if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
  else code = Seq_code_iupacaa;
  spp = SeqPortNew(bsp, 0, -1, 0, code);
  SeqPortSeek(spp, 0, SEEK_SET);

  sp= *seq;
  if (sp==NULL) {  /* this is always true now !? */
    outlen= 0;
    outmax= 500;
    sp= (char*) malloc(outmax);
    }
  else {
    outlen= strlen(sp);
    outmax= outlen + 500;
    sp= (char*) realloc( sp, outmax);
    }
  if (sp==NULL) return;

  while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
    if (outlen>=outmax) {
      outmax= outlen + 500;
      sp= (char*) realloc(sp, outmax);
      if (sp==NULL) return;
      }
    sp[outlen++] = residue;
    }
  sp= (char*) realloc(sp, outlen+1);
  if (sp!=NULL) sp[outlen]= '\0';
  *seq= sp;
  *seqlen= outlen;
  SeqPortFree(spp);
  return;
}
Example #7
0
Int2 Main(void)
{
	AsnIoPtr       aip;
	SeqEntryPtr    sep;
	BioseqPtr PNTR seqlist;
	Int4           seqnum, i, numseg, lens[10], j;
	Int2           ctr;
	SeqPortPtr     spp;
	Uint1          residue;
	FILE*          fp;
	CharPtr        title;
	Char           buffer[101];
	MonitorPtr     mon;

						/* check command line arguments */

	if ( ! GetArgs("SeqTest",NUMARG, myargs))
		return 1;

	mon = MonitorStrNew("SeqTest", 40);
	SetProgMon(StdProgMon, (Pointer)mon);

	/*
	** Load SeqEntry object loader and sequence alphabets
	*/

	if (! SeqEntryLoad()) {
		Message(MSG_ERROR, "SeqEntryLoad failed");
		return 1;
	}

	/*
	** Use the file "example.prt" as the ASN I/O stream.  This file
	** can be found in the ncbi/demo.  It is in ASN.1 Print Value format.
	*/

	if ((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL)
		return 1;

	/*
	** Write the output to "seqtest.out".
	*/

	fp = FileOpen(myargs[1].strvalue, "w");
	fprintf(fp, "Sequence summary:\n\n");

	/*
	** Read in the whole entry into the Sequence Entry Pointer, sep.
	** Close the ASN stream, which in turn closes the input file.
	*/

	sep = SeqEntryAsnRead(aip, NULL);
	aip = AsnIoClose(aip);

	mon = MonitorFree(mon);
	SetProgMon(NULL, NULL);

	/*
	** Determine how many Bioseqs are in this SeqEntry. Allocate
	** enough memory to hold a list of pointer to all of these
	** Bioseqs.  Invoke an Explore function to "visit"each Bioseq.
	** We are allowed to pass one pointer for use by the exploring
	** function, in this case, "BuildList".
	*/

	seqnum = BioseqCount(sep);
	seqlist = MemNew((size_t)(seqnum * sizeof(BioseqPtr)));
	BioseqExplore(sep, (Pointer) seqlist, BuildList);

	/*
	** For each Bioseq in the SeqEntry write out it's title
	** len, number of gaps, and number of segments. Write out
	** the length of each segment, up to 10.
	*/

	for(i = 0; i < seqnum; i++) {
		numseg = BioseqCountSegs(seqlist[i]);
		title = BioseqGetTitle(seqlist[i]);
		FilePuts((VoidPtr)title, fp);
		FilePuts("\n", fp);
		fprintf(fp, "len=%ld gaps=%ld segs=%ld\n", BioseqGetLen(seqlist[i]),
		BioseqGetGaps(seqlist[i]), numseg);
		if ((numseg > 1) && (numseg <= 10)) {
			BioseqGetSegLens (seqlist[i], lens);
			for (j = 0; j < numseg; j++)
				fprintf(fp, "  len = %ld\n", lens[j]);
		}
		FilePuts("\n", fp);
	}

	spp = SeqPortNew(seqlist[0], 0, -1, 0, Seq_code_iupacna);
	if (spp == NULL)
		Message(MSG_ERROR, "fail on SeqPortNew");

	fprintf(fp, "SeqPort: plus strand with SeqPortGetResidue\n\n");

	i = 0;
	while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
		if (! IS_residue(residue)) {
			buffer[i] = '\0';
			fprintf(fp, "%s\n", buffer);
			i = 0;
			switch (residue)
			{
				case SEQPORT_VIRT:
					fprintf(fp, "[Gap]\n");
					break;
				case SEQPORT_EOS:
					fprintf(fp, "[EOS]\n");
					break;
				default:
					fprintf(fp, "[Invalid Residue]\n");
					break;
			}

		}
		else {
			buffer[i] = residue;
			i++;
			if (i == 60) {
				buffer[i] = '\0';
				fprintf(fp, "%s\n", buffer);
				i = 0;
			}
		}
	}

	if (i) {
		buffer[i] = '\0';
		fprintf(fp, "%s\n", buffer);
	}

	fprintf(fp, "[EOF]\n");
	SeqPortFree(spp);

	fprintf(fp, "\nSeqPort on minus with SeqPortRead\n\n");
	spp = SeqPortNew(seqlist[0], 0, -1, Seq_strand_minus, Seq_code_iupacna);

	if (spp == NULL)
		Message(MSG_ERROR, "fail on SeqPortNew");

	do {
		ctr = SeqPortRead(spp, (Uint1Ptr)buffer, 60);
		if (ctr > 0) {
			buffer[ctr] = '\0';
			fprintf(fp, "%s\n", buffer);
		} else {
			ctr *= -1;
			switch (ctr)
			{
				case SEQPORT_VIRT:
					fprintf(fp, "[Gap]\n");
					break;
				case SEQPORT_EOS:
					fprintf(fp, "[EOS]\n");
					break;
				case SEQPORT_EOF:
					fprintf(fp, "[EOF]\n");
					break;
				default:
					fprintf(fp, "[Invalid Residue]\n");
					break;
			}
		}
	} while (ctr != SEQPORT_EOF);

	SeqPortFree(spp);

	/*
	** Write out the nucleic acid sequences in this SeqEntry
	*/

	fprintf(fp, "\nNucleic Acids in FASTA format:\n\n");
	SeqEntryToFasta(sep, fp, TRUE);

	/*
	** Write out the protein sequences in this SeqEntry.
	*/

	fprintf(fp, "\nProteins in FASTA format:\n\n");
	SeqEntryToFasta(sep, fp, FALSE);

	/*
	** Close the output file and free up allocated space.
	*/

	fclose(fp);
	MemFree(seqlist);
	SeqEntryFree(sep);

	return 0;
}