/* WARNING not called and not tested... */ static Int4 FillCSANWithSeq(PCSAN pcsanThis, BioseqPtr pbsq, Int4 iLen) { SeqPortPtr spp = NULL; Uint1 code = Seq_code_ncbieaa; Uint1 residue; Int4 iCount = 0; CharPtr pcA; if (!pcsanThis) return 0; if (!ISA_aa(pbsq->mol)) return 0; if (!iLen) return 0; spp = SeqPortNew(pbsq, 0, -1, 0, code); if (!spp) return 0; SeqPortSeek(spp, 0, SEEK_SET); pcsanThis->pcSeqAln = (CharPtr)MemNew((size_t) (1+ sizeof(char) * iLen)); pcA = pcsanThis->pcSeqAln; residue = SeqPortGetResidue(spp); iCount = 0; while ((residue != SEQPORT_EOF) && (residue != '\0') && (iLen < iCount)) { iCount++; *pcA = (char) residue; pcA++; residue = SeqPortGetResidue(spp); } while (iCount < iLen) { *pcA = '-'; pcA++; iCount++; } pcsanThis->pcSeqAln[iLen] = '\0'; SeqPortFree(spp); return iCount; }
static Uint1Ptr load_data (SeqLocPtr slp,Boolean is_prot) { SeqPortPtr spp; Uint1Ptr seq = NULL; Int4 len; Int4 index; Uint1 code; if (is_prot) code = Seq_code_ncbistdaa; else code = Seq_code_ncbi4na; len = SeqLocLen(slp); seq = (Uint1Ptr) MemNew(((len)+2)*sizeof(Uint1)); spp = SeqPortNewByLoc(slp, code); for (index=0; index < len; index++) { seq[index] = SeqPortGetResidue(spp); } SeqPortFree (spp); return seq; }
static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp, FILE *fp, Boolean is_na) { Char buffer [255]; Uint1 code; Int2 count; Uint1 repr; Uint1 residue; SeqPortPtr spp; CharPtr title; CharPtr tmp; if (bsp != NULL && fp != NULL) { if ((Boolean) ISA_na (bsp->mol) == is_na) { repr = Bioseq_repr (bsp); if (repr == Seq_repr_raw || repr == Seq_repr_const) { title = BioseqGetTitle (bsp); tmp = StringMove (buffer, ">"); tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG); tmp = StringMove (tmp, " "); StringNCpy (tmp, title, 200); fprintf (fp, "%s\n", buffer); if (pmon != NULL) MonitorStrValue(pmon, buffer); if (is_na) { code = Seq_code_iupacna; } else { code = Seq_code_iupacaa; } if (sfp != NULL) { spp = SeqPortNewByLoc (sfp->location, code); } else { spp = SeqPortNew (bsp, 0, -1, 0, code); } if (spp != NULL) { count = 0; while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) { if (! IS_residue (residue)) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; switch (residue) { case SEQPORT_VIRT : fprintf (fp, "[Gap]\n"); break; case SEQPORT_EOS : fprintf (fp, "[EOS]\n"); break; default : fprintf (fp, "[Invalid Residue]\n"); break; } } else { buffer [count] = residue; count++; if (count >= CHARSPERLINE) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; } } } if (count != 0) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); } SeqPortFree (spp); } } } } }
NLM_EXTERN Int4 print_protein_for_cds(SeqFeatPtr sfp, CharPtr buf, SeqLocPtr loc, Boolean reverse_minus) { CdRegionPtr crp; Int4 frame_offset, start_offset; Uint1 f_strand; Boolean reverse; Int4 cd_len; GatherRange gr; Int2 p_pos, buf_len; Int4 a_left, a_right; Int4 aa, val; SeqLocPtr slp; SeqPortPtr spp; ByteStorePtr p_data; Int4 end_pos, start_pos = -1; Uint1 residue; Boolean seal_ends = FALSE; Boolean reverse_order; if(sfp == NULL || sfp->data.choice != 3) return -1; if(buf == NULL || loc == NULL) return -1; crp = sfp->data.value.ptrvalue; if(crp == NULL) return -1; if(buf[0] == '\0') seal_ends = TRUE; spp = NULL; p_data = NULL; if(sfp->product !=NULL && !IS_BOGO_Product(sfp->ext)) { spp = SeqPortNewByLoc(sfp->product, Seq_code_ncbieaa); if(spp !=NULL) { SeqPortSeek(spp, 0, SEEK_SET); end_pos = spp->totlen-1; } } if(spp == NULL) { p_data = ProteinFromCdRegion(sfp, TRUE); /* p_data = ProteinFromCdRegion(sfp, FALSE); */ if(p_data !=NULL) { BSSeek(p_data, 0, SEEK_SET); end_pos = BSLen(p_data)-1; } } if(spp == NULL && p_data == NULL) return -1; if(crp->frame == 0) frame_offset = 0; else frame_offset = (Int4)crp->frame-1; start_offset = frame_offset; f_strand = SeqLocStrand(sfp->location); reverse = ck_reverse(f_strand, SeqLocStrand(loc)); /*if reverse == TRUE, the translated protein is written backwards*/ if(reverse && reverse_minus) reverse_order = TRUE; else reverse_order = FALSE; slp = NULL; cd_len = 0; aa = 0; buf_len = SeqLocLen(loc); if(reverse_order) { p_pos = buf_len -1; if(seal_ends) { buf[p_pos+1] = '\0'; seal_ends = FALSE; } } else p_pos = 0; while((slp = SeqLocFindNext(sfp->location, slp))!=NULL) { if(SeqLocOffset(loc, slp, &gr, 0)) { if(reverse_order) { if(gr.right < p_pos) p_pos = (Int2)(gr.right); } else { if(p_pos < gr.left) p_pos = (Int2)(gr.left); } SeqLocOffset(slp, loc, &gr, 0); a_left = gr.left + cd_len; a_right = gr.right + cd_len; /* if(reverse_order) { temp = a_right; a_right = -a_left; a_left = -temp; } */ for(; a_left<=a_right; ++a_left) { val = ABS(a_left) - start_offset; aa = val/3; if(aa < 0 || aa > end_pos)/*stop & partial codon*/ { buf[p_pos] = '^'; } else { if(val%3==1)/*label aa in the middle of 3-bp codon*/ { if(start_pos == -1) start_pos = aa; if(spp !=NULL) { SeqPortSeek(spp, aa, SEEK_SET); residue = SeqPortGetResidue(spp); } else { BSSeek(p_data, aa, SEEK_SET); residue = (Uint1)BSGetByte(p_data); } if(IS_ALPHA(residue) || residue == '*' || residue == '-') buf[p_pos] = residue; else buf[p_pos] = '?'; } else buf[p_pos] = ' '; } if(reverse_order) -- p_pos; else { ++p_pos; if (p_pos > buf_len) break; } } } cd_len += SeqLocLen(slp); /*frame_offset = (cd_len - start_offset)%3; if(frame_offset > 0) --frame_offset;*/ } if(spp != NULL) SeqPortFree(spp); if(p_data != NULL) BSFree(p_data); if(p_pos == 0) /*all the residues are introns*/ { if(seal_ends) { end_pos = buf_len; MemSet((Pointer)buf, '~', (size_t)(end_pos) * sizeof(Char)); buf[end_pos] = '\0'; } } else { if(seal_ends) { buf[p_pos] = '\0'; } if(start_pos == -1) start_pos = aa; } return start_pos; }
/********************************************************************* * * make_cds_paragraph(sfp, aa_start, aa_stop) * return a buffer for the display of 3-codon under one amino * acid format. It also includes the new line characters * This is what Jonathan K. desires to have for the sequin * doc object * aa_start, aa_stop: start and stop in the amino acid sequence * *********************************************************************/ NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr pbsp; SeqPortPtr spp; ValNodePtr cvp_node, curr; CodonVectorPtr cvp; CharPtr docbuf = NULL; Int4 num, buf_size; Uint1 residue; Char p_name[30]; Int4 space_len, i; CharPtr buf; Int4 pos; Int4 max_len = 150; Boolean extra_space; if(sfp == NULL || sfp->data.choice !=3) return NULL; if(sfp->product == NULL) return NULL; pbsp = BioseqLockById(SeqLocId(sfp->product)); if(pbsp == NULL) return NULL; cvp_node = aa_to_codon(sfp, aa_start, aa_stop); num = 1; for(curr = cvp_node; curr !=NULL; curr = curr->next) num +=3; buf_size = num * max_len; /* #ifdef WIN_16 if(buf_size > 10000) { Message(MSG_ERROR, "Can not allocate enough space "); return NULL; } #endif */ docbuf = MemNew((size_t)(buf_size) * sizeof(Char)); MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE); /*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/ pos = 0; pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, FALSE, B_SPACE, POS_SPACE); /*print the amino acid sequence into buffer*/ spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa); while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF ) docbuf[pos++] = residue; docbuf[pos++] = '\n'; SeqPortFree(spp); for(curr = cvp_node; curr !=NULL; curr = curr->next) { cvp = curr->data.ptrvalue; SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10); extra_space = (cvp->aa_index == 0); for(i=0; i<3; ++i) { space_len = cvp->aa_index; buf = cvp->buf[i] + cvp->aa_index; if(i == cvp->frame) { pos+= print_label_to_buffer(docbuf+pos, p_name, cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE); } else pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 0, extra_space, FALSE, B_SPACE, POS_SPACE); sprintf(docbuf+pos, "%s\n", buf); pos += (StringLen(buf) +1); } } docbuf[pos++] = '\n'; docbuf[pos] = '\0'; free_cvp_list(cvp_node); BioseqUnlock(pbsp); return docbuf; }
/****************************************************************** * * aa_to_codon(sfp, aa_start, aa_stop) * generate a list of CodonVecotr to show the codons of an * amino acid sequence * sfp: the Seq-feat for cds * aa_start: the start position of protein sequence * aa_stop the stop position of protein sequence * ******************************************************************/ NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr bsp; Int4 frame_offset, start_offset; SeqLocPtr slp = NULL; SeqLocPtr cdloc; CdRegionPtr crp; Uint1 frame; Boolean is_end; /**is the end for process reached?**/ Int4 p_start=0, p_stop=0; /**protein start & stop in defined corresponding CdRegion Seq-loc**/ Int4 line_len; Int4 cur_pos; /**current protein position in process**/ Int4 cd_len; /**length of the cDNA for the coding region**/ Int2 i, j; Int2 k, n; CharPtr PNTR buf; Boolean is_new; /**Is cur_pos at the begin of new Seq-loc?**/ CharPtr temp; SeqPortPtr spp; Uint1 residue; Boolean end_partial; Int4 d_start, seq_pos; Int2 pos; ValNodePtr head= NULL; CodonVectorPtr cvp; Boolean prt_stop_codon; Uint2 exon; if(sfp->data.choice !=3) return NULL; crp = sfp->data.value.ptrvalue; if(!crp) return NULL; frame = crp->frame; cdloc = sfp->location; if(cdloc == NULL ) return NULL; if(frame>0) frame_offset = frame-1; else frame_offset = 0; start_offset = frame_offset; prt_stop_codon = (aa_stop == SeqLocStop(sfp->product)); line_len = (aa_stop - aa_start + 1) + 1; /* +1 for the possible partial start codon*/ if(prt_stop_codon)/*can be either as a stop codon or partial stop*/ ++line_len; buf = MemNew((size_t)3 * sizeof(CharPtr)); for(i =0; i<3; ++i) buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char)); cur_pos= aa_start; cd_len = 0; is_end = FALSE; p_start = 0; slp = NULL; exon = 0; while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL)) { ++exon; cd_len += SeqLocLen(slp); end_partial = ((cd_len - start_offset)%3 != 0); p_stop = (cd_len - start_offset)/3 -1; if(end_partial) ++p_stop; if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial)) { p_stop = aa_stop; /**check if the end is reached**/ is_end = TRUE; } if(p_stop >= cur_pos) /*get the exon*/ { bsp = BioseqLockById(SeqLocId(slp)); if(bsp) { is_new = (p_start == cur_pos); /*start a new exon?*/ cvp = MemNew(sizeof(CodonVector)); cvp->sip = SeqIdDup(find_sip(bsp->id)); cvp->strand = SeqLocStrand(slp); cvp->exonCount = exon; if(is_new) { if(frame_offset == 0) cvp->frame = 0; else cvp->frame = 3- (Uint1)frame_offset; } else cvp->frame = 0; if(cur_pos==0 && frame_offset > 0) /*partial start codon*/ cvp->aa_index = 0; else cvp->aa_index = 1; if(is_new) /**special case of the first partial**/ d_start = SeqLocStart(slp); else { if(frame_offset && p_start >0) ++p_start; d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset; } /**p_start is the start position of aa in the current Seq-loc cur_pos is the current aa that is in process. The offset will help to located the position on the DNA Seq-loc for translation d_start is the position of the starting DNA in the coordinates of DNA segment, used for mark the sequence **/ seq_pos = d_start - SeqLocStart(slp); /**the pos in spp**/ if(SeqLocStrand(slp)== Seq_strand_minus) d_start = SeqLocStop(slp) - seq_pos; cvp->dna_pos = d_start; n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index; /*position in buffer*/ for(i =0; i<3; ++i) make_empty(buf[i], (Int2)line_len); spp = SeqPortNewByLoc(slp, Seq_code_iupacna); SeqPortSeek(spp, seq_pos, SEEK_SET); /**store the partial codons**/ if(is_new && frame_offset > 0) { k = (Int2)frame_offset; while(k > 0) { residue = SeqPortGetResidue(spp); temp = buf[3-k]; /**the position**/ pos = n; temp[pos] = TO_LOWER(residue); --k; } ++n; if(cur_pos!=0) ++cur_pos; } /**load the codons**/ k =0; while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop) { j= (Uint1)k%3; temp = buf[j]; temp[n] = TO_LOWER(residue); if(j ==2) { /**the last base**/ ++n; if(!prt_stop_codon|| !is_end) /*for the last codon*/ /**prt_end controls to print the whole loc**/ ++cur_pos; } ++k; } /**end of while**/ SeqPortFree(spp); for(i =0; i<3; ++i) cvp->buf[i] = StringSave(buf[i]); ValNodeAddPointer(&head, 0, (Pointer)cvp); BioseqUnlock(bsp); }/*end of if(bsp)*/ }/**end of if for matched intervals**/ if(end_partial) p_start = p_stop; else p_start = p_stop +1; frame_offset = (cd_len - start_offset)%3; if(frame_offset >0) frame_offset = 3-frame_offset; }/**end of while(slp && !is_end) **/ for(i=0; i<3; ++i) MemFree(buf[i]); MemFree(buf); return head; }
static void ConsignProc (ButtoN b) { XOSPtr xosp; XISPtr xisp; ComPatPtr cpp, cpph; ValNodePtr orflist; SeqLocPtr slp, slpn; Int4 start, stop; Uint1 strand; SeqPortPtr spp; Uint1Ptr aaseq; Int4 ntpos, aapos; Uint1 cdn[3]; SeqAlignPtr sap, sapn; FloatHi probcut; Int4 clustmin, findmin; Int4 i, n, endpos, XLength, XScale, shift; Int4 iframe, frame, top, orftop[6]; FloatHiPtr score, expandscore; FloatHi maxscore; Int4Ptr tableGlobal; SeqGraphPtr sgp, sgpn; WindoW w; VieweR v; GrouP g; SegmenT seg; GraphSentPtr gsp; Char numberbuffer[32]; if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL) return; if (xosp->bsp == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq"); ErrShow (); return; } WatchCursor (); cpph = cpp = ReadPrositePattern (xosp->pattern_file, TRUE, -1, NULL, NULL); if (cpph == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "read failed %s", xosp->pattern_file); ErrShow (); ArrowCursor (); return; } xosp->orflist = GetOrfList (xosp->bsp, (Int2) (xosp->orfcut)); xosp->orflist = ClearNonMetOrfs (xosp->orflist); orflist = xosp->orflist; while (orflist != NULL) { slp = (SeqLocPtr) orflist->data.ptrvalue; if (slp->choice == 0) { orflist = orflist->next; continue; } if (slp->choice == SEQLOC_MIX) slp = (SeqLocPtr) slp->data.ptrvalue; start = SeqLocStart (slp); stop = SeqLocStop (slp); strand = SeqLocStrand (slp); if (strand != Seq_strand_both) strand = Seq_strand_both; if (stop - start + 1 >= xosp->minimumseed) { spp = SeqPortNew (xosp->bsp, start, stop, strand, Seq_code_ncbi4na); aaseq = (Uint1Ptr) MemNew ((size_t) (sizeof (Uint1) * (((stop-start)/3)+2))); ntpos = start; aapos = 0; while (ntpos < start+3) { cdn[0] = SeqPortGetResidue (spp); ntpos++; cdn[1] = SeqPortGetResidue (spp); ntpos++; cdn[2] = SeqPortGetResidue (spp); ntpos++; aaseq[aapos] = AAForCodon (cdn, xosp->gcdi); aapos++; } while (ntpos <= stop) { cdn[0] = SeqPortGetResidue (spp); ntpos++; cdn[1] = SeqPortGetResidue (spp); ntpos++; cdn[2] = SeqPortGetResidue (spp); ntpos++; aaseq[aapos] = AAForCodon (cdn, xosp->gcd); aapos++; } SeqPortFree (spp); aaseq[aapos] = 0; cpp = cpph; while (cpp != NULL) { sap = PatternMatch (aaseq, 0, Seq_strand_plus, SeqLocId (slp), cpp, 0, Seq_strand_unknown, FALSE); if (sap != NULL) break; cpp = cpp->nextpattern; } MemFree (aaseq); if (sap != NULL) { SeqLocLink (&(xosp->slps), SeqLocDup (slp)); } while (sap != NULL) { sapn = sap->next; SeqAlignFree (sap); sap = sapn; } } orflist = orflist->next; } ComPatFree (cpph); orflist = xosp->orflist; while (orflist != NULL) { slp = (SeqLocPtr) orflist->data.ptrvalue; if (slp->choice > 0) SeqLocLink (&(xosp->slpa), SeqLocDup (slp)); while (slp != NULL) { slpn = slp->next; SeqLocFree (slp); slp = slpn; } orflist->data.ptrvalue = NULL; orflist = orflist->next; } xosp->orflist = ValNodeFree (xosp->orflist); probcut = xosp->probcut; clustmin = xosp->clustmin; findmin = xosp->findmin; xosp->slpb = FindSimilarBiasOrfs (xosp->sep, probcut, clustmin, findmin, xosp->slps, xosp->slpa); tableGlobal = CodonTableFromSeqLoc (xosp->bsp, xosp->slpb); seg = NULL; top = 0; xisp = (XISPtr) MemNew (sizeof (XIS)); frame = 0; for (iframe = 0; iframe < 6; iframe++) { endpos = (xosp->bsp->length + 3 - frame - xosp->window) / 3; if (iframe < 3) score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window, frame, Seq_strand_plus); else score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window, frame, Seq_strand_minus); maxscore = 0.0; for (i = 0; i < endpos; i++) if (score[i] > maxscore) maxscore = score[i]; expandscore = (FloatHiPtr) MemNew (sizeof (FloatHi) * xosp->bsp->length); for (i = 0; i < xosp->window/2; i++) expandscore[i] = maxscore; n = 0; while (i < xosp->bsp->length) { if (n < endpos) expandscore[i] = score[n]; else expandscore[i] = maxscore; i++; if (i%3 == 0) n++; } MemFree (score); score = expandscore; sgp = SeqGraphNew (); if (xisp->sgp == NULL) { xisp->sgp = sgp; } else { sgpn = xisp->sgp; while (sgpn->next != NULL) sgpn = sgpn->next; sgpn->next = sgp; } XLength = xosp->bsp->length; if (XLength > 1200) XLength = 1200; XScale = xosp->bsp->length / XLength; if (xosp->bsp->length % XLength != 0) XScale++; sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand, xosp->bsp->id); sgp->flags[2] = 1; sgp->numval = xosp->bsp->length; sgp->values = (Pointer) score; sgp->max.realvalue = maxscore; sgp->min.realvalue = 0.0; sgp->flags[1] = 1; sgp->a = 4.0; sgp->b = 0.0; if (seg == NULL) seg = CreatePicture (); if ((gsp = AddGraphSentinelToPicture (sgp, xosp->bsp, seg, 0, top, 0, NULL)) != NULL) { sprintf (numberbuffer, "%ld", 1L); AddLabel (seg, gsp->box.left, gsp->bottom-20, numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0); sprintf (numberbuffer, "%ld", (long) xosp->bsp->length); AddLabel (seg, gsp->box.left+xosp->bsp->length, gsp->bottom-20, numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0); } shift = (Int4) (maxscore*sgp->a); orftop[iframe] = top - shift - 38; top -= (shift+56); frame++; if (frame == 3) { top -= 24; frame = 0; } } frame = 0; for (iframe = 0; iframe < 6; iframe++) { if (iframe < 3) strand = Seq_strand_plus; else strand = Seq_strand_minus; shift = 0; if (xosp->slpa != NULL) { AddOrfClass (xosp->slpa, seg, orftop, iframe, frame, shift, strand, YELLOW_COLOR, 5); shift += 4; } if (xosp->slpk != NULL) { AddOrfClass (xosp->slpk, seg, orftop, iframe, frame, shift, strand, GREEN_COLOR, 5); shift += 4; } if (xosp->slpb != NULL) { AddOrfClass (xosp->slpb, seg, orftop, iframe, frame, shift, strand, BLUE_COLOR, 5); shift += 4; } if (xosp->slps != NULL) { AddOrfClass (xosp->slps, seg, orftop, iframe, frame, shift, strand, RED_COLOR, 5); } frame++; if (frame == 3) frame = 0; } MemFree (tableGlobal); start = 20; stop = 20 + (50*XScale); top = orftop[5] - 40; if (xosp->slpa != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), YELLOW_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "All Met-init'd ORFs equal to or greater than 50 codons", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slpk != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), GREEN_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Annotated (reported) ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slpb != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLUE_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Similar codon usage bias ORFs to seed ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slps != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), RED_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Pattern match seed ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } xisp->picture = seg; w = FixedWindow (10, 10, 640, 720, "Consign", CloseGraphWindowProc); SetObjectExtra (w, xisp, CleanUpGraphWindow); g = HiddenGroup (w, -1, 0, NULL); v = CreateViewer (g, 560, 640, TRUE, TRUE); AttachPicture (v, seg, INT4_MIN, INT4_MAX, UPPER_LEFT, XScale, 1, NULL); PushButton (g, "Close", CloseGraphWindowButton); RealizeWindow (w); ArrowCursor (); Show (w); return; }
static Int4 dusttripfind (SeqPortPtr spp, UcharPtr s1, Int4 icur, Int4 max, Int4 PNTR invrescount) { Int4 pos; Int4 n; UcharPtr s2, s3; Int2 c; Boolean flagVD; n = 0; s2 = s1 + 1; s3 = s1 + 2; SeqPortSeek (spp, icur, SEEK_SET); /* set up needs streamlining */ /* start again at segment or virtual sequence bounderies */ /* set up 1 */ if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF) return n; if (c == SEQPORT_EOS || c == SEQPORT_VIRT) return n; if (!IS_residue (c)) { c = 0; /* 255 it's 'A' */ if (*invrescount < 3) { pos = SeqPortTell (spp); ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } (*invrescount)++; } *s1 |= c; *s1 <<= 2; /* set up 2 */ if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF) return n; if (c == SEQPORT_EOS || c == SEQPORT_VIRT) return n; if (!IS_residue (c)) { c = 0; /* 255 it's 'A' */ if (*invrescount < 3) { pos = SeqPortTell (spp); ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } (*invrescount)++; } *s1 |= c; *s2 |= c; /* triplet fill loop */ flagVD = TRUE; while ((c = SeqPortGetResidue (spp)) != SEQPORT_EOF && n < max) { if (c == INVALID_RESIDUE) { c = 0; /* 255 it's 'A' */ if (*invrescount < 3) { pos = SeqPortTell (spp); ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } (*invrescount)++; } if (IS_residue (c)) { *s1 <<= 2; *s2 <<= 2; *s1 |= c; *s2 |= c; *s3 |= c; s1++; s2++; s3++; n++; } else { switch (c) { case SEQPORT_EOS: /* 252 rare */ break; /* VIRT if there is an undetermined segment of sequence */ case SEQPORT_VIRT: /* 251 ignore ? */ default: /* flagVD = TRUE; dust across v-seg */ flagVD = FALSE; /* don't dust across */ break; } if (!flagVD) break; } } /* end while */ return n; }
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { SeqPortPtr spp; SeqIdPtr bestid; Uint1 repr, code, residue; CharPtr tmp, title; long outlen, outmax; char localid[256], *sp; /* !!! this may be called several times for a single sequence because SeqEntryExplore looks for parts and joins them... assume seq, seqid, seqlen may contain data (or NULL) */ if (bsp == NULL) return; repr = Bioseq_repr(bsp); if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; (*seqnum)++; if (!(whichSeq == *seqnum || whichSeq == 0)) return; bestid = SeqIdFindBest(bsp->id, (Uint1) 0); title = BioseqGetTitle(bsp); if (idonly) { sprintf(localid, " %d) ", *seqnum); tmp= localid + strlen(localid)-1; } else { strcpy(localid," "); tmp= localid; } tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); tmp = StringMove(tmp, " "); StringNCpy(tmp, title, 200); /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ /* < seqid is fixed storage */ /* strcpy( *seqid, localid); */ /* < seqid is variable sized */ outmax= strlen(localid) + 3; if (*seqid==NULL) { *seqid= (char*) malloc(outmax); if (*seqid==NULL) return; strcpy(*seqid, localid); } else { outmax += strlen(*seqid) + 2; *seqid= (char*) realloc( *seqid, outmax); if (*seqid==NULL) return; if (!idonly) strcat(*seqid, "; "); strcat(*seqid, localid); } if (idonly) { strcat(*seqid,"\n"); return; } if (ISA_na(bsp->mol)) code = Seq_code_iupacna; else code = Seq_code_iupacaa; spp = SeqPortNew(bsp, 0, -1, 0, code); SeqPortSeek(spp, 0, SEEK_SET); sp= *seq; if (sp==NULL) { /* this is always true now !? */ outlen= 0; outmax= 500; sp= (char*) malloc(outmax); } else { outlen= strlen(sp); outmax= outlen + 500; sp= (char*) realloc( sp, outmax); } if (sp==NULL) return; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (outlen>=outmax) { outmax= outlen + 500; sp= (char*) realloc(sp, outmax); if (sp==NULL) return; } sp[outlen++] = residue; } sp= (char*) realloc(sp, outlen+1); if (sp!=NULL) sp[outlen]= '\0'; *seq= sp; *seqlen= outlen; SeqPortFree(spp); return; }
static FloatHiPtr NTComposition (SeqPortPtr spp, Int4 length, Int4 window, Uint1 filtertype) { Int4 i, n, iring; Boolean flagMatch; FloatHi NTcount; FloatHiPtr ringptr; Char res[FILTS], chres; FloatHi scr[FILTS]; Boolean flagFilter; FloatHiPtr fhead, fptr; Char buf[PATH_MAX], filename[32]; if (length < window) return NULL; if (!FindPath ("ncbi", "ncbi", "data", buf, sizeof (buf))) return NULL; fptr = (FloatHiPtr) MemNew ((sizeof (FloatHi)) * length); fhead = fptr; for (i = 0; i < length; i++) *fptr++ = 0.0; fptr = fhead; switch (filtertype) { case NA_FILTER_GC: StringCpy (filename, "KSgc.flt"); FileBuildPath (buf, NULL, "KSgc.flt"); break; case NA_FILTER_AT: StringCpy (filename, "KSat.flt"); FileBuildPath (buf, NULL, "KSat.flt"); break; case NA_FILTER_PUR: StringCpy (filename, "KSpur.flt"); FileBuildPath (buf, NULL, "KSpur.flt"); break; case NA_FILTER_PYR: StringCpy (filename, "KSpyr.flt"); FileBuildPath (buf, NULL, "KSpyr.flt"); break; default: MemFree (fptr); return NULL; } flagFilter = ReadFilter (res, scr, buf, filename); if (!flagFilter) { MemFree (fptr); return NULL; } /* set up ring buffer */ ringptr = (FloatHiPtr) MemNew (sizeof (FloatHi) * window); iring = 0; NTcount = 0; for (i = 0; i < window; i++) { chres = SeqPortGetResidue (spp); flagMatch = 0; n = 0; while (res[n] != '\0') { if (chres == res[n]) { flagMatch = 1; break; } n++; } switch (flagMatch) { case 1: NTcount += scr[n]; ringptr[iring] = scr[n]; break; default: ringptr[iring] = 0; break; } iring++; if (iring == window) iring = 0; } for (i = 0; i < window/2; i++) fptr++; *fptr = NTcount/window*100; fptr++; /* calculate average */ for (i = window; i < length; i++) { NTcount = NTcount - ringptr[iring]; chres = SeqPortGetResidue (spp); flagMatch = FALSE; n = 0; while (res[n] != '\0') { if (chres == res[n]) { flagMatch = TRUE; break; } n++; } switch (flagMatch) { case 1: NTcount += scr[n]; ringptr[iring] = scr[n]; break; default: ringptr[iring] = 0; break; } iring++; if (iring == window) iring = 0; *fptr = NTcount/window*100; fptr++; } MemFree (ringptr); return fhead; }
extern DustRegionPtr DustSeqPort (SeqPortPtr spp, Int4 start, Int4 stop, DustDataPtr ddp) { Int4 i, posn, pos; Uint1 c; Uint1Ptr shead, s1, s2, s3; Boolean flagVS; Int4 this_start, this_stop; DustRegionPtr drphead = NULL, drp = NULL; Int4 invrescount = 0; if (spp == NULL || ddp == NULL) return NULL; if (stop-start+1 > MAXSEQCHUNK) { this_start = start; this_stop = this_start + MAXSEQCHUNK; } else { this_start = start; this_stop = stop; } while (this_stop <= stop) { shead = (Uint1Ptr) MemNew ((size_t)(sizeof (Uchar)*(stop-start+1))); if (shead == NULL) return NULL; MemSet (shead, 0, (size_t) (sizeof (Uint1) * (stop-start+1))); s1 = shead; s2 = s1 + 1; s3 = s2 + 1; posn = start-1; SeqPortSeek (spp, this_start, SEEK_SET); /* set up 1 */ if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF) return NULL; if (c == SEQPORT_EOS || c == SEQPORT_VIRT) return NULL; if (!IS_residue (c)) { c = 0; /* 255 it's 'A' */ pos = SeqPortTell (spp); if (invrescount < 3) { ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } invrescount++; } *s1 |= c; /* set up 2 */ if ((c = SeqPortGetResidue (spp)) == SEQPORT_EOF) return NULL; if (c == SEQPORT_EOS || c == SEQPORT_VIRT) return NULL; if (!IS_residue (c)) { c = 0; /* 255 it's 'A' */ pos = SeqPortTell (spp); if (invrescount < 3) { ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } invrescount++; } *s1 <<= 2; *s1 |= c; *s2 |= c; /* triplet fill loop */ flagVS = FALSE; for (i = this_start+2; i <= this_stop; i++) { if ((c = SeqPortGetResidue (spp)) != SEQPORT_EOF) { if (c == INVALID_RESIDUE) /* 255 */ { c = 0; /* ping! -- it's 'A' */ pos = SeqPortTell (spp); if (invrescount < 3) { ErrPostEx (SEV_INFO, 5, 1, "Invalid residue converted to 'A': %ld", (long) pos); ErrShow (); } invrescount++; } if (IS_residue (c)) { *s1 <<= 2; *s2 <<= 2; *s1 |= c; *s2 |= c; *s3 |= c; s1++; s2++; s3++; posn++; } else { switch (c) { /* this should be okay unless segements are being used weirdly */ case SEQPORT_EOS: /* 252 */ break; /* start again at virtual sequence bounderies */ case SEQPORT_VIRT: /* 251 */ flagVS = TRUE; break; /* and/or just ignore anything odd at this point */ default: break; } if (flagVS) break; } } } if (this_stop != posn+2+this_start) { ErrPostEx (SEV_WARNING, 1, 10, "Possible seqport read error: Expected: %ld Actual: %ld", (long) this_stop, (long) posn); ErrShow (); } drp = Dust (shead, this_start, this_stop, ddp, drp); if (drphead == NULL) { drphead = drp; } MemFree (shead); if (this_stop == stop) { break; } else { this_start = posn+3; this_stop = this_start + MAXSEQCHUNK; if (this_stop > stop) this_stop = stop; } } if (invrescount > 0) { ErrPostEx (SEV_INFO, 3, 2, "Total invalid residues found: %ld", (long) invrescount); ErrShow (); } return drphead; }
Int2 Main(void) { AsnIoPtr aip; SeqEntryPtr sep; BioseqPtr PNTR seqlist; Int4 seqnum, i, numseg, lens[10], j; Int2 ctr; SeqPortPtr spp; Uint1 residue; FILE* fp; CharPtr title; Char buffer[101]; MonitorPtr mon; /* check command line arguments */ if ( ! GetArgs("SeqTest",NUMARG, myargs)) return 1; mon = MonitorStrNew("SeqTest", 40); SetProgMon(StdProgMon, (Pointer)mon); /* ** Load SeqEntry object loader and sequence alphabets */ if (! SeqEntryLoad()) { Message(MSG_ERROR, "SeqEntryLoad failed"); return 1; } /* ** Use the file "example.prt" as the ASN I/O stream. This file ** can be found in the ncbi/demo. It is in ASN.1 Print Value format. */ if ((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL) return 1; /* ** Write the output to "seqtest.out". */ fp = FileOpen(myargs[1].strvalue, "w"); fprintf(fp, "Sequence summary:\n\n"); /* ** Read in the whole entry into the Sequence Entry Pointer, sep. ** Close the ASN stream, which in turn closes the input file. */ sep = SeqEntryAsnRead(aip, NULL); aip = AsnIoClose(aip); mon = MonitorFree(mon); SetProgMon(NULL, NULL); /* ** Determine how many Bioseqs are in this SeqEntry. Allocate ** enough memory to hold a list of pointer to all of these ** Bioseqs. Invoke an Explore function to "visit"each Bioseq. ** We are allowed to pass one pointer for use by the exploring ** function, in this case, "BuildList". */ seqnum = BioseqCount(sep); seqlist = MemNew((size_t)(seqnum * sizeof(BioseqPtr))); BioseqExplore(sep, (Pointer) seqlist, BuildList); /* ** For each Bioseq in the SeqEntry write out it's title ** len, number of gaps, and number of segments. Write out ** the length of each segment, up to 10. */ for(i = 0; i < seqnum; i++) { numseg = BioseqCountSegs(seqlist[i]); title = BioseqGetTitle(seqlist[i]); FilePuts((VoidPtr)title, fp); FilePuts("\n", fp); fprintf(fp, "len=%ld gaps=%ld segs=%ld\n", BioseqGetLen(seqlist[i]), BioseqGetGaps(seqlist[i]), numseg); if ((numseg > 1) && (numseg <= 10)) { BioseqGetSegLens (seqlist[i], lens); for (j = 0; j < numseg; j++) fprintf(fp, " len = %ld\n", lens[j]); } FilePuts("\n", fp); } spp = SeqPortNew(seqlist[0], 0, -1, 0, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); fprintf(fp, "SeqPort: plus strand with SeqPortGetResidue\n\n"); i = 0; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (! IS_residue(residue)) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; switch (residue) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } else { buffer[i] = residue; i++; if (i == 60) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; } } } if (i) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); } fprintf(fp, "[EOF]\n"); SeqPortFree(spp); fprintf(fp, "\nSeqPort on minus with SeqPortRead\n\n"); spp = SeqPortNew(seqlist[0], 0, -1, Seq_strand_minus, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); do { ctr = SeqPortRead(spp, (Uint1Ptr)buffer, 60); if (ctr > 0) { buffer[ctr] = '\0'; fprintf(fp, "%s\n", buffer); } else { ctr *= -1; switch (ctr) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; case SEQPORT_EOF: fprintf(fp, "[EOF]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } } while (ctr != SEQPORT_EOF); SeqPortFree(spp); /* ** Write out the nucleic acid sequences in this SeqEntry */ fprintf(fp, "\nNucleic Acids in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, TRUE); /* ** Write out the protein sequences in this SeqEntry. */ fprintf(fp, "\nProteins in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, FALSE); /* ** Close the output file and free up allocated space. */ fclose(fp); MemFree(seqlist); SeqEntryFree(sep); return 0; }