Exemple #1
0
static Int4 wo (Int4 len, SeqPortPtr spp, Int4 iseg, DCURLOC PNTR cloc,
			Int4 PNTR invrescount, UcharPtr seq)
{
	Int4 i, flen;

	cloc->curlevel = 0;
	cloc->curstart = 0;
	cloc->curend = 0;

/* get the chunk of sequence in triplets */

	SeqPortSeek (spp, iseg, SEEK_SET);
        MemSet (seq,0,len+2);        /* Zero the triplet buffer */
	flen = dusttripfind (spp, seq, iseg, len, invrescount);

/* dust the chunk */
	for (i = 0; i < flen; i++)
	{
		wo1 (flen-i, seq+i, i, cloc);
	}

	cloc->curend += cloc->curstart;

	return flen;
}
Exemple #2
0
/*  WARNING not called and not tested... */
static Int4 FillCSANWithSeq(PCSAN pcsanThis, BioseqPtr pbsq, Int4 iLen)
{
     SeqPortPtr spp = NULL;
     Uint1 code = Seq_code_ncbieaa;
     Uint1 residue;
     Int4 iCount = 0;
     CharPtr pcA;

     if (!pcsanThis) return 0;
     if (!ISA_aa(pbsq->mol)) return 0;
     if (!iLen) return 0;

     spp = SeqPortNew(pbsq, 0, -1, 0, code);
     if (!spp) return 0;
     SeqPortSeek(spp, 0, SEEK_SET);
     pcsanThis->pcSeqAln = (CharPtr)MemNew((size_t) (1+ sizeof(char) * iLen));
     pcA = pcsanThis->pcSeqAln;
     residue = SeqPortGetResidue(spp);
     iCount = 0;
     while ((residue != SEQPORT_EOF) && (residue != '\0') && (iLen < iCount))
      {
            iCount++;
            *pcA = (char) residue;
            pcA++;
            residue = SeqPortGetResidue(spp);
      }
     while (iCount < iLen)
       {
            *pcA = '-';
            pcA++;
            iCount++;
      }
    pcsanThis->pcSeqAln[iLen] = '\0';
    SeqPortFree(spp);
    return iCount;
}
Exemple #3
0
NLM_EXTERN Int4 print_protein_for_cds(SeqFeatPtr sfp, CharPtr buf, SeqLocPtr loc, Boolean reverse_minus)
{
	CdRegionPtr crp;
	Int4 frame_offset, start_offset;
	Uint1 f_strand;
	Boolean reverse;
	Int4 cd_len;
	GatherRange gr;
	Int2 p_pos, buf_len;
	Int4 a_left, a_right;
	Int4 aa, val;
	SeqLocPtr slp;
	SeqPortPtr spp;
	ByteStorePtr p_data;
	Int4 end_pos, start_pos = -1;
	Uint1 residue;
	Boolean seal_ends = FALSE;
	Boolean reverse_order;

	if(sfp == NULL || sfp->data.choice != 3)
		return -1;
	if(buf == NULL || loc == NULL)
		return -1;
	crp = sfp->data.value.ptrvalue;
	if(crp == NULL)
		return -1;

	if(buf[0] == '\0')
		seal_ends = TRUE;
	spp = NULL;
	p_data = NULL;
	if(sfp->product !=NULL && !IS_BOGO_Product(sfp->ext))
	{
		spp = SeqPortNewByLoc(sfp->product, Seq_code_ncbieaa);
		if(spp !=NULL)
		{
    			SeqPortSeek(spp, 0, SEEK_SET);
			end_pos = spp->totlen-1;
		}
	}
	if(spp == NULL)
	{
		p_data = ProteinFromCdRegion(sfp, TRUE);
		/* p_data = ProteinFromCdRegion(sfp, FALSE); */
		if(p_data !=NULL)
		{
			BSSeek(p_data, 0, SEEK_SET);
			end_pos = BSLen(p_data)-1;
		}
	}

	if(spp == NULL && p_data == NULL)
		return -1;

	if(crp->frame == 0)
		frame_offset = 0;
	else
		frame_offset = (Int4)crp->frame-1;
	start_offset = frame_offset;

	f_strand = SeqLocStrand(sfp->location);
	reverse = ck_reverse(f_strand, SeqLocStrand(loc));
	/*if reverse == TRUE, the translated protein is written backwards*/
	if(reverse && reverse_minus)
		reverse_order = TRUE;
	else
		reverse_order = FALSE;
	
	slp = NULL;
	cd_len = 0;
	aa = 0;

        buf_len = SeqLocLen(loc);

	if(reverse_order)
	{
		p_pos = buf_len -1;
		if(seal_ends)
		{
			buf[p_pos+1] = '\0';
			seal_ends = FALSE;
		}
	}
	else
		p_pos = 0;

	while((slp = SeqLocFindNext(sfp->location, slp))!=NULL)
	{
	   if(SeqLocOffset(loc, slp, &gr, 0))
	   {
		if(reverse_order)
		{
			if(gr.right < p_pos)
				p_pos = (Int2)(gr.right);
		}
		else
		{
			if(p_pos < gr.left)
				p_pos = (Int2)(gr.left);
		}
		SeqLocOffset(slp, loc, &gr, 0);
		
		a_left = gr.left + cd_len;
		a_right = gr.right + cd_len;
		/* if(reverse_order)
		{
			temp = a_right;
			a_right = -a_left;
			a_left = -temp;
		} */
		for(; a_left<=a_right; ++a_left)
		{
			val = ABS(a_left) - start_offset;
			aa = val/3;
			if(aa < 0 || aa > end_pos)/*stop & partial codon*/
			{
				buf[p_pos] = '^';
			}
			else
			{
				if(val%3==1)/*label aa in the middle of 3-bp codon*/
				{
					if(start_pos == -1)
						start_pos = aa;
					if(spp !=NULL)
					{
						SeqPortSeek(spp, aa, SEEK_SET);
						residue = SeqPortGetResidue(spp); 
					}
					else
					{
						BSSeek(p_data, aa, SEEK_SET);
						residue = (Uint1)BSGetByte(p_data);
					}
					if(IS_ALPHA(residue) || residue == '*' || residue == '-')
					
						buf[p_pos] = residue;
					else
						buf[p_pos] = '?';
				}
				else
					buf[p_pos] = ' ';
			}
			if(reverse_order)
				-- p_pos;
			else {
				++p_pos;
                                if (p_pos > buf_len)
                                   break;
                        }
		}
	     }
	     cd_len += SeqLocLen(slp);
	     /*frame_offset = (cd_len - start_offset)%3;
	     if(frame_offset > 0)
		--frame_offset;*/
		
	}

	if(spp != NULL)
		SeqPortFree(spp);
	if(p_data != NULL)
		BSFree(p_data);

	if(p_pos  == 0)	/*all the residues are introns*/
	{
		if(seal_ends)
		{
			end_pos = buf_len;
			MemSet((Pointer)buf, '~', (size_t)(end_pos) * sizeof(Char));
			buf[end_pos] = '\0';
		}
	}
	else
	{
		if(seal_ends)
		{
			buf[p_pos] = '\0';
		}
		if(start_pos == -1)
			start_pos = aa;
	}

	return start_pos;
}
Exemple #4
0
/******************************************************************
*
*	aa_to_codon(sfp, aa_start, aa_stop)
*	generate a list of CodonVecotr to show the codons of an 
*	amino acid sequence
*	sfp: the Seq-feat for cds
*	aa_start: the start position of protein sequence
*	aa_stop the stop position of protein sequence
*
******************************************************************/
NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
  BioseqPtr bsp;

  Int4 frame_offset, start_offset;
  SeqLocPtr slp = NULL;
  SeqLocPtr cdloc;
  CdRegionPtr crp;
  Uint1 frame;

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/

  Int4 line_len;
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Int2 i, j;
  Int2 k, n;
  CharPtr PNTR buf;

  Boolean is_new;		/**Is cur_pos at the begin of new Seq-loc?**/
  CharPtr temp;

  SeqPortPtr spp;
  Uint1 residue;

  Boolean end_partial;
  Int4 d_start, seq_pos;
  Int2 pos;

  ValNodePtr head= NULL;
  CodonVectorPtr cvp;
  Boolean prt_stop_codon;
  Uint2 exon;




   if(sfp->data.choice !=3)
	return NULL;

   crp = sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   frame = crp->frame;
   cdloc = sfp->location;
   if(cdloc == NULL )
	return NULL;

   if(frame>0)
	frame_offset = frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;

   prt_stop_codon = (aa_stop == SeqLocStop(sfp->product));
   line_len = (aa_stop - aa_start + 1) + 1;
					/* +1 for the possible partial start codon*/
   if(prt_stop_codon)/*can be either as a stop codon or partial stop*/
	++line_len;
   buf = MemNew((size_t)3 * sizeof(CharPtr));
   for(i =0; i<3; ++i)
	buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char));
		

   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   slp = NULL;
   exon = 0;
   while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL))
   {
	++exon;
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3 != 0);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial)
	   ++p_stop;
	if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
	   bsp = BioseqLockById(SeqLocId(slp));
	   if(bsp)
	   {
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		cvp = MemNew(sizeof(CodonVector));
		cvp->sip = SeqIdDup(find_sip(bsp->id));
		cvp->strand = SeqLocStrand(slp);
		cvp->exonCount = exon;
		if(is_new)
		{
			if(frame_offset == 0)
				cvp->frame = 0;
			else
				cvp->frame = 3- (Uint1)frame_offset;
		}
		else
			cvp->frame = 0;
		if(cur_pos==0 && frame_offset > 0)	/*partial start codon*/
			cvp->aa_index = 0;
		else
			cvp->aa_index = 1;
		if(is_new)	/**special case of the first partial**/
		   d_start = SeqLocStart(slp);
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset;
		}
	    /**p_start is the start position of aa in the current Seq-loc
	       cur_pos is the current aa that is in process. The offset will
	       help to located the position on the DNA Seq-loc for translation
	       d_start is the position of the starting DNA in the coordinates
	       of DNA segment, used for mark the sequence
	       **/

		seq_pos = d_start - SeqLocStart(slp);	/**the pos in spp**/
		if(SeqLocStrand(slp)== Seq_strand_minus)
		   d_start = SeqLocStop(slp) - seq_pos;
		cvp->dna_pos = d_start;

		n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index;	/*position in buffer*/
		for(i =0; i<3; ++i)
			make_empty(buf[i], (Int2)line_len);
		spp = SeqPortNewByLoc(slp, Seq_code_iupacna);
		SeqPortSeek(spp, seq_pos, SEEK_SET);
		/**store the partial codons**/
		if(is_new && frame_offset > 0)
		{
		   k = (Int2)frame_offset;
		   while(k > 0)
		   {
			residue = SeqPortGetResidue(spp);
			temp = buf[3-k];	/**the position**/
			pos = n;
			temp[pos] = TO_LOWER(residue);
			--k;
		   }
		   ++n;
		   if(cur_pos!=0)
			++cur_pos;
		}


	     	/**load  the codons**/
		k =0;
		while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop)
		{
		   j= (Uint1)k%3;
		   temp = buf[j];
		   temp[n] = TO_LOWER(residue);
		   if(j ==2)
		   {		/**the last base**/
			++n;
		 	if(!prt_stop_codon|| !is_end) /*for the last codon*/
			/**prt_end controls to print the whole loc**/
		   	   ++cur_pos;
		   }
		   ++k;
		}	/**end of while**/

		SeqPortFree(spp);

		for(i =0; i<3; ++i)
		   cvp->buf[i] = StringSave(buf[i]);
		ValNodeAddPointer(&head, 0, (Pointer)cvp);

		BioseqUnlock(bsp);
	   }/*end of if(bsp)*/
	}/**end of if for matched intervals**/

	if(end_partial)
	    p_start = p_stop;
	else
	    p_start = p_stop +1;

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/

   for(i=0; i<3; ++i)
	MemFree(buf[i]);
   MemFree(buf);

   return head;
}
Exemple #5
0
static Int4 dusttripfind (SeqPortPtr spp, UcharPtr s1, Int4 icur, Int4 max,
				Int4 PNTR invrescount)
{
        Int4 pos;
        Int4 n;
	UcharPtr s2, s3;
	Int2 c;
	Boolean flagVD;

	n = 0;

	s2 = s1 + 1;
	s3 = s1 + 2;

	SeqPortSeek (spp, icur, SEEK_SET);

/* set up needs streamlining */
/* start again at segment or virtual sequence bounderies */
/* set up 1 */
	if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF) 
            return n;
	if (c == SEQPORT_EOS || c == SEQPORT_VIRT) 
            return n;
	if (!IS_residue (c))
	{
		c = 0;				/* 255 it's 'A' */
                if (*invrescount < 3)
                {
            		pos = SeqPortTell (spp);
         		ErrPostEx (SEV_INFO, 5, 1,
			 "Invalid residue converted to 'A': %ld", (long) pos);
			ErrShow ();
		}
		(*invrescount)++;
	}
	*s1 |= c;
	*s1 <<= 2;

/* set up 2 */
	if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF)
            return n;
	if (c == SEQPORT_EOS || c == SEQPORT_VIRT)
            return n;
	if (!IS_residue (c))
	{
		c = 0;				/* 255 it's 'A' */
                if (*invrescount < 3)
                {
                        pos = SeqPortTell (spp);
         		ErrPostEx (SEV_INFO, 5, 1,
			 "Invalid residue converted to 'A': %ld", (long) pos);
			ErrShow ();
		}
		(*invrescount)++;
	}
	*s1 |= c;
	*s2 |= c;

/* triplet fill loop */
	flagVD = TRUE;
	while ((c = SeqPortGetResidue (spp)) != SEQPORT_EOF && n < max)
	{
		if (c == INVALID_RESIDUE)
		{
			c = 0;				/* 255 it's 'A' */
			if (*invrescount < 3)
			{
          			pos = SeqPortTell (spp);
				ErrPostEx (SEV_INFO, 5, 1,
				 "Invalid residue converted to 'A': %ld", (long) pos);
				ErrShow ();
			}
			(*invrescount)++;
		}
		if (IS_residue (c))
		{
				*s1 <<= 2;
				*s2 <<= 2;
				*s1 |= c;
				*s2 |= c;
				*s3 |= c;
				s1++;
				s2++;
				s3++;
				n++;
		}
		else
		{
			switch (c)
			{
				case SEQPORT_EOS:	/* 252 rare	*/
					break;
/* VIRT if there is an undetermined segment of sequence			*/
				case SEQPORT_VIRT:	/* 251 ignore ?	*/
				default:
/*					flagVD = TRUE;     dust across v-seg */
					flagVD = FALSE;  /* don't dust across */
					break;
			}
			if (!flagVD) break;
		}
	}		/* end while */

	return n;
}
Exemple #6
0
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
              short whichSeq, short *seqnum,
              char **seq, char **seqid, long *seqlen)
{
  SeqPortPtr spp;
  SeqIdPtr bestid;
  Uint1 repr, code, residue;
  CharPtr tmp, title;
  long  outlen, outmax;
  char  localid[256], *sp;

  /* !!! this may be called several times for a single sequence
    because SeqEntryExplore looks for parts and joins them...
    assume seq, seqid, seqlen may contain data (or NULL)
  */
  if (bsp == NULL) return;
  repr = Bioseq_repr(bsp);
  if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;

  (*seqnum)++;
  if (!(whichSeq == *seqnum || whichSeq == 0)) return;

  bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
  title = BioseqGetTitle(bsp);
  if (idonly) {
    sprintf(localid, " %d)  ", *seqnum);
    tmp= localid + strlen(localid)-1;
    }
  else {
    strcpy(localid," ");
    tmp= localid;
    }
  tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
  tmp = StringMove(tmp, " ");
  StringNCpy(tmp, title, 200);
/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */

          /* < seqid is fixed storage */
  /* strcpy( *seqid, localid);  */
          /* < seqid is variable sized */
  outmax= strlen(localid) + 3;
  if (*seqid==NULL) {
    *seqid= (char*) malloc(outmax);
    if (*seqid==NULL) return;
    strcpy(*seqid, localid);
    }
  else {
    outmax += strlen(*seqid) + 2;
    *seqid= (char*) realloc( *seqid, outmax);
    if (*seqid==NULL) return;
    if (!idonly) strcat(*seqid, "; ");
    strcat(*seqid, localid);
    }

  if (idonly) {
    strcat(*seqid,"\n");
    return;
    }

  if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
  else code = Seq_code_iupacaa;
  spp = SeqPortNew(bsp, 0, -1, 0, code);
  SeqPortSeek(spp, 0, SEEK_SET);

  sp= *seq;
  if (sp==NULL) {  /* this is always true now !? */
    outlen= 0;
    outmax= 500;
    sp= (char*) malloc(outmax);
    }
  else {
    outlen= strlen(sp);
    outmax= outlen + 500;
    sp= (char*) realloc( sp, outmax);
    }
  if (sp==NULL) return;

  while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
    if (outlen>=outmax) {
      outmax= outlen + 500;
      sp= (char*) realloc(sp, outmax);
      if (sp==NULL) return;
      }
    sp[outlen++] = residue;
    }
  sp= (char*) realloc(sp, outlen+1);
  if (sp!=NULL) sp[outlen]= '\0';
  *seq= sp;
  *seqlen= outlen;
  SeqPortFree(spp);
  return;
}
Exemple #7
0
extern DustRegionPtr DustSeqPort (SeqPortPtr spp,
                                  Int4 start, Int4 stop,
                                  DustDataPtr ddp)
{
    Int4          i, posn, pos;
    Uint1         c;
    Uint1Ptr      shead, s1, s2, s3;
    Boolean       flagVS;
    Int4          this_start, this_stop;
    DustRegionPtr drphead = NULL, drp = NULL;
    Int4          invrescount = 0;

    if (spp == NULL || ddp == NULL)
        return NULL;

    if (stop-start+1 > MAXSEQCHUNK)
    {
        this_start = start;
        this_stop = this_start + MAXSEQCHUNK;
    }
    else
    {
        this_start = start;
        this_stop = stop;
    }

    while (this_stop <= stop)
    {
        shead = (Uint1Ptr) MemNew ((size_t)(sizeof (Uchar)*(stop-start+1)));
        if (shead == NULL)
            return NULL;
        MemSet (shead, 0, (size_t) (sizeof (Uint1) * (stop-start+1)));
        s1 = shead;
        s2 = s1 + 1;
        s3 = s2 + 1;

        posn = start-1;

        SeqPortSeek (spp, this_start, SEEK_SET);

        /* set up 1 */
        if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF)
            return NULL;
        if (c == SEQPORT_EOS || c == SEQPORT_VIRT)
            return NULL;
        if (!IS_residue (c))
        {
            c = 0;        /* 255 it's 'A' */
            pos = SeqPortTell (spp);
            if (invrescount < 3)
            {
                ErrPostEx (SEV_INFO, 5, 1,
                           "Invalid residue converted to 'A': %ld", (long) pos);
                ErrShow ();
            }
            invrescount++;
        }
        *s1 |= c;

        /* set up 2 */
        if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF)
            return NULL;
        if (c == SEQPORT_EOS || c == SEQPORT_VIRT)
            return NULL;
        if (!IS_residue (c))
        {
            c = 0;        /* 255 it's 'A' */
            pos = SeqPortTell (spp);
            if (invrescount < 3)
            {
                ErrPostEx (SEV_INFO, 5, 1,
                           "Invalid residue converted to 'A': %ld", (long) pos);
                ErrShow ();
            }
            invrescount++;
        }
        *s1 <<= 2;
        *s1 |= c;
        *s2 |= c;

        /* triplet fill loop */
        flagVS = FALSE;
        for (i = this_start+2; i <= this_stop; i++)
        {
            if ((c = SeqPortGetResidue (spp)) != SEQPORT_EOF)
            {
                if (c == INVALID_RESIDUE)  /* 255 */
                {
                    c = 0;                   /* ping! -- it's 'A' */
                    pos = SeqPortTell (spp);
                    if (invrescount < 3)
                    {
                        ErrPostEx (SEV_INFO, 5, 1,
                                   "Invalid residue converted to 'A': %ld", (long) pos);
                        ErrShow ();
                    }
                    invrescount++;
                }
                if (IS_residue (c))
                {
                    *s1 <<= 2;
                    *s2 <<= 2;
                    *s1 |= c;
                    *s2 |= c;
                    *s3 |= c;
                    s1++;
                    s2++;
                    s3++;
                    posn++;
                }
                else
                {
                    switch (c)
                    {
                    /* this should be okay unless segements are being used weirdly */
                    case SEQPORT_EOS:   /* 252 */
                        break;
                    /* start again at virtual sequence bounderies */
                    case SEQPORT_VIRT:  /* 251 */
                        flagVS = TRUE;
                        break;
                    /* and/or just ignore anything odd at this point */
                    default:
                        break;
                    }
                    if (flagVS)
                        break;
                }
            }
        }
        if (this_stop != posn+2+this_start)
        {
            ErrPostEx (SEV_WARNING, 1, 10,
                       "Possible seqport read error: Expected: %ld Actual: %ld",
                       (long) this_stop, (long) posn);
            ErrShow ();
        }
        drp = Dust (shead, this_start, this_stop, ddp, drp);
        if (drphead == NULL)
        {
            drphead = drp;
        }
        MemFree (shead);
        if (this_stop == stop)
        {
            break;
        }
        else
        {
            this_start = posn+3;
            this_stop = this_start + MAXSEQCHUNK;
            if (this_stop > stop)
                this_stop = stop;
        }
    }
    if (invrescount > 0)
    {
        ErrPostEx (SEV_INFO, 3, 2,
                   "Total invalid residues found: %ld", (long) invrescount);
        ErrShow ();
    }
    return drphead;
}