extern DustRegionPtr DustBioseq (BioseqPtr bsp, Int4 start, Int4 stop, DustDataPtr ddp) { DustRegionPtr drp; SeqPortPtr spp; if (bsp == NULL || ddp == NULL) return NULL; if (!ISA_na (bsp->mol)) { ErrPostEx (SEV_WARNING, 1, 2, "Not nucleic acid"); ErrShow (); return NULL; } /* error msg stuff */ ErrSetOptFlags (EO_MSG_CODES); spp = SeqPortNew (bsp, start, stop, 0, Seq_code_ncbi2na); if (spp == NULL) { ErrPostEx (SEV_ERROR, 1, 4, "SeqPort open failed"); ErrShow (); return NULL; } drp = DustSeqPort (spp, start, stop, ddp); SeqPortFree (spp); return drp; }
/* WARNING not called and not tested... */ static Int4 FillCSANWithSeq(PCSAN pcsanThis, BioseqPtr pbsq, Int4 iLen) { SeqPortPtr spp = NULL; Uint1 code = Seq_code_ncbieaa; Uint1 residue; Int4 iCount = 0; CharPtr pcA; if (!pcsanThis) return 0; if (!ISA_aa(pbsq->mol)) return 0; if (!iLen) return 0; spp = SeqPortNew(pbsq, 0, -1, 0, code); if (!spp) return 0; SeqPortSeek(spp, 0, SEEK_SET); pcsanThis->pcSeqAln = (CharPtr)MemNew((size_t) (1+ sizeof(char) * iLen)); pcA = pcsanThis->pcSeqAln; residue = SeqPortGetResidue(spp); iCount = 0; while ((residue != SEQPORT_EOF) && (residue != '\0') && (iLen < iCount)) { iCount++; *pcA = (char) residue; pcA++; residue = SeqPortGetResidue(spp); } while (iCount < iLen) { *pcA = '-'; pcA++; iCount++; } pcsanThis->pcSeqAln[iLen] = '\0'; SeqPortFree(spp); return iCount; }
Int2 LIBCALLBACK HydrophobicFunc (Pointer data) { OMProcControlPtr ompcp; BioseqPtr bsp = NULL; SeqFeatPtr sfp = NULL; WindoW w; GraphViewFormPtr gvp; SeqIdPtr psip; SeqPortPtr spp; FloatHi scr[24]; Char res[24]; ompcp = (OMProcControlPtr) data; if (ompcp == NULL || ompcp->input_itemtype == 0) return OM_MSG_RET_ERROR; switch (ompcp->input_itemtype) { case OBJ_BIOSEQ: bsp = (BioseqPtr) ompcp->input_data; if (!ISA_aa (bsp->mol)) return OM_MSG_RET_ERROR; break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) ompcp->input_data; break; default: return OM_MSG_RET_ERROR; } if (bsp != NULL) { w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; if (ReadAAC ("KSkyte.flt", scr, res) != 24) return OM_MSG_RET_ERROR; gvp->window = 19; gvp->type = AA_FILTER_COMP_KYTE; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa); gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res, &(gvp->window), gvp->type); SeqPortFree (spp); if (gvp->sgp == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else if (sfp != NULL) { if (sfp->data.choice != SEQFEAT_CDREGION) return OM_MSG_RET_ERROR; psip = SeqLocId (sfp->product); bsp = BioseqFind (psip); w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; if (ReadAAC ("KSkyte.flt", scr, res) != 24) return OM_MSG_RET_ERROR; gvp->window = 19; gvp->type = AA_FILTER_COMP_KYTE; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa); gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res, &(gvp->window), gvp->type); SeqPortFree (spp); if (gvp->sgp == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else { return OM_MSG_RET_ERROR; } Show (w); Select (w); return OM_MSG_RET_DONE; }
Int2 LIBCALLBACK FilterDustFunc (Pointer data) { OMProcControlPtr ompcp; BioseqPtr bsp; GraphViewFormPtr gvp; WindoW w; OMUserDataPtr omudp; SeqPortPtr spp; Int4 exval[4]; ompcp = (OMProcControlPtr) data; if (ompcp == NULL || ompcp->input_itemtype == 0) return OM_MSG_RET_ERROR; switch (ompcp->input_itemtype) { case OBJ_BIOSEQ: bsp = (BioseqPtr) ompcp->input_data; break; default: return OM_MSG_RET_ERROR; } if (bsp != NULL) { if (!ISA_na (bsp->mol)) return OM_MSG_RET_ERROR; w = (WindoW) CreateGraphViewForm (-50, -33, "Dust Filter", bsp, GRAPH_FILTER); } else { return OM_MSG_RET_ERROR; } if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { SetObjectExtra (w, gvp, CleanupGCFilterForm); gvp->graphtype = GRAPH_FILTER; gvp->window = 64; exval[0] = 64; exval[1] = 20; exval[2] = 4; exval[3] = 1; gvp->type = NA_FILTER_DUST; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; gvp->procID = ompcp->proc->procid; /* msg stuff */ gvp->userKEY = OMGetNextUserKey (); if ((omudp = ObjMgrAddUserData (gvp->entityID, gvp->procID, OMPROC_FILTER, gvp->userKEY)) != NULL) { omudp->userdata.ptrvalue = (Pointer) gvp; omudp->messagefunc = FilterMsgFunc; /* omudp->messagefunc = NULL; */ } WatchCursor (); spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_ncbi2na); gvp->sgp = FilterSeq (spp, 0, bsp->length-1, NULL, NULL, exval, gvp->type); SeqPortFree (spp); if (gvp->sgp == NULL) { /* w = Remove (w); */ ArrowCursor (); return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } /* put the screen up */ Show (w); Select (w); return OM_MSG_RET_DONE; }
static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp, FILE *fp, Boolean is_na) { Char buffer [255]; Uint1 code; Int2 count; Uint1 repr; Uint1 residue; SeqPortPtr spp; CharPtr title; CharPtr tmp; if (bsp != NULL && fp != NULL) { if ((Boolean) ISA_na (bsp->mol) == is_na) { repr = Bioseq_repr (bsp); if (repr == Seq_repr_raw || repr == Seq_repr_const) { title = BioseqGetTitle (bsp); tmp = StringMove (buffer, ">"); tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG); tmp = StringMove (tmp, " "); StringNCpy (tmp, title, 200); fprintf (fp, "%s\n", buffer); if (pmon != NULL) MonitorStrValue(pmon, buffer); if (is_na) { code = Seq_code_iupacna; } else { code = Seq_code_iupacaa; } if (sfp != NULL) { spp = SeqPortNewByLoc (sfp->location, code); } else { spp = SeqPortNew (bsp, 0, -1, 0, code); } if (spp != NULL) { count = 0; while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) { if (! IS_residue (residue)) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; switch (residue) { case SEQPORT_VIRT : fprintf (fp, "[Gap]\n"); break; case SEQPORT_EOS : fprintf (fp, "[EOS]\n"); break; default : fprintf (fp, "[Invalid Residue]\n"); break; } } else { buffer [count] = residue; count++; if (count >= CHARSPERLINE) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); count = 0; } } } if (count != 0) { buffer [count] = '\0'; fprintf (fp, "%s\n", buffer); } SeqPortFree (spp); } } } } }
/********************************************************************* * * make_cds_paragraph(sfp, aa_start, aa_stop) * return a buffer for the display of 3-codon under one amino * acid format. It also includes the new line characters * This is what Jonathan K. desires to have for the sequin * doc object * aa_start, aa_stop: start and stop in the amino acid sequence * *********************************************************************/ NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr pbsp; SeqPortPtr spp; ValNodePtr cvp_node, curr; CodonVectorPtr cvp; CharPtr docbuf = NULL; Int4 num, buf_size; Uint1 residue; Char p_name[30]; Int4 space_len, i; CharPtr buf; Int4 pos; Int4 max_len = 150; Boolean extra_space; if(sfp == NULL || sfp->data.choice !=3) return NULL; if(sfp->product == NULL) return NULL; pbsp = BioseqLockById(SeqLocId(sfp->product)); if(pbsp == NULL) return NULL; cvp_node = aa_to_codon(sfp, aa_start, aa_stop); num = 1; for(curr = cvp_node; curr !=NULL; curr = curr->next) num +=3; buf_size = num * max_len; /* #ifdef WIN_16 if(buf_size > 10000) { Message(MSG_ERROR, "Can not allocate enough space "); return NULL; } #endif */ docbuf = MemNew((size_t)(buf_size) * sizeof(Char)); MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE); /*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/ pos = 0; pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, FALSE, B_SPACE, POS_SPACE); /*print the amino acid sequence into buffer*/ spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa); while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF ) docbuf[pos++] = residue; docbuf[pos++] = '\n'; SeqPortFree(spp); for(curr = cvp_node; curr !=NULL; curr = curr->next) { cvp = curr->data.ptrvalue; SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10); extra_space = (cvp->aa_index == 0); for(i=0; i<3; ++i) { space_len = cvp->aa_index; buf = cvp->buf[i] + cvp->aa_index; if(i == cvp->frame) { pos+= print_label_to_buffer(docbuf+pos, p_name, cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE); } else pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 0, extra_space, FALSE, B_SPACE, POS_SPACE); sprintf(docbuf+pos, "%s\n", buf); pos += (StringLen(buf) +1); } } docbuf[pos++] = '\n'; docbuf[pos] = '\0'; free_cvp_list(cvp_node); BioseqUnlock(pbsp); return docbuf; }
static void ConsignProc (ButtoN b) { XOSPtr xosp; XISPtr xisp; ComPatPtr cpp, cpph; ValNodePtr orflist; SeqLocPtr slp, slpn; Int4 start, stop; Uint1 strand; SeqPortPtr spp; Uint1Ptr aaseq; Int4 ntpos, aapos; Uint1 cdn[3]; SeqAlignPtr sap, sapn; FloatHi probcut; Int4 clustmin, findmin; Int4 i, n, endpos, XLength, XScale, shift; Int4 iframe, frame, top, orftop[6]; FloatHiPtr score, expandscore; FloatHi maxscore; Int4Ptr tableGlobal; SeqGraphPtr sgp, sgpn; WindoW w; VieweR v; GrouP g; SegmenT seg; GraphSentPtr gsp; Char numberbuffer[32]; if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL) return; if (xosp->bsp == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq"); ErrShow (); return; } WatchCursor (); cpph = cpp = ReadPrositePattern (xosp->pattern_file, TRUE, -1, NULL, NULL); if (cpph == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "read failed %s", xosp->pattern_file); ErrShow (); ArrowCursor (); return; } xosp->orflist = GetOrfList (xosp->bsp, (Int2) (xosp->orfcut)); xosp->orflist = ClearNonMetOrfs (xosp->orflist); orflist = xosp->orflist; while (orflist != NULL) { slp = (SeqLocPtr) orflist->data.ptrvalue; if (slp->choice == 0) { orflist = orflist->next; continue; } if (slp->choice == SEQLOC_MIX) slp = (SeqLocPtr) slp->data.ptrvalue; start = SeqLocStart (slp); stop = SeqLocStop (slp); strand = SeqLocStrand (slp); if (strand != Seq_strand_both) strand = Seq_strand_both; if (stop - start + 1 >= xosp->minimumseed) { spp = SeqPortNew (xosp->bsp, start, stop, strand, Seq_code_ncbi4na); aaseq = (Uint1Ptr) MemNew ((size_t) (sizeof (Uint1) * (((stop-start)/3)+2))); ntpos = start; aapos = 0; while (ntpos < start+3) { cdn[0] = SeqPortGetResidue (spp); ntpos++; cdn[1] = SeqPortGetResidue (spp); ntpos++; cdn[2] = SeqPortGetResidue (spp); ntpos++; aaseq[aapos] = AAForCodon (cdn, xosp->gcdi); aapos++; } while (ntpos <= stop) { cdn[0] = SeqPortGetResidue (spp); ntpos++; cdn[1] = SeqPortGetResidue (spp); ntpos++; cdn[2] = SeqPortGetResidue (spp); ntpos++; aaseq[aapos] = AAForCodon (cdn, xosp->gcd); aapos++; } SeqPortFree (spp); aaseq[aapos] = 0; cpp = cpph; while (cpp != NULL) { sap = PatternMatch (aaseq, 0, Seq_strand_plus, SeqLocId (slp), cpp, 0, Seq_strand_unknown, FALSE); if (sap != NULL) break; cpp = cpp->nextpattern; } MemFree (aaseq); if (sap != NULL) { SeqLocLink (&(xosp->slps), SeqLocDup (slp)); } while (sap != NULL) { sapn = sap->next; SeqAlignFree (sap); sap = sapn; } } orflist = orflist->next; } ComPatFree (cpph); orflist = xosp->orflist; while (orflist != NULL) { slp = (SeqLocPtr) orflist->data.ptrvalue; if (slp->choice > 0) SeqLocLink (&(xosp->slpa), SeqLocDup (slp)); while (slp != NULL) { slpn = slp->next; SeqLocFree (slp); slp = slpn; } orflist->data.ptrvalue = NULL; orflist = orflist->next; } xosp->orflist = ValNodeFree (xosp->orflist); probcut = xosp->probcut; clustmin = xosp->clustmin; findmin = xosp->findmin; xosp->slpb = FindSimilarBiasOrfs (xosp->sep, probcut, clustmin, findmin, xosp->slps, xosp->slpa); tableGlobal = CodonTableFromSeqLoc (xosp->bsp, xosp->slpb); seg = NULL; top = 0; xisp = (XISPtr) MemNew (sizeof (XIS)); frame = 0; for (iframe = 0; iframe < 6; iframe++) { endpos = (xosp->bsp->length + 3 - frame - xosp->window) / 3; if (iframe < 3) score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window, frame, Seq_strand_plus); else score = BiasScoreBioseq (xosp->bsp, tableGlobal, xosp->window, frame, Seq_strand_minus); maxscore = 0.0; for (i = 0; i < endpos; i++) if (score[i] > maxscore) maxscore = score[i]; expandscore = (FloatHiPtr) MemNew (sizeof (FloatHi) * xosp->bsp->length); for (i = 0; i < xosp->window/2; i++) expandscore[i] = maxscore; n = 0; while (i < xosp->bsp->length) { if (n < endpos) expandscore[i] = score[n]; else expandscore[i] = maxscore; i++; if (i%3 == 0) n++; } MemFree (score); score = expandscore; sgp = SeqGraphNew (); if (xisp->sgp == NULL) { xisp->sgp = sgp; } else { sgpn = xisp->sgp; while (sgpn->next != NULL) sgpn = sgpn->next; sgpn->next = sgp; } XLength = xosp->bsp->length; if (XLength > 1200) XLength = 1200; XScale = xosp->bsp->length / XLength; if (xosp->bsp->length % XLength != 0) XScale++; sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand, xosp->bsp->id); sgp->flags[2] = 1; sgp->numval = xosp->bsp->length; sgp->values = (Pointer) score; sgp->max.realvalue = maxscore; sgp->min.realvalue = 0.0; sgp->flags[1] = 1; sgp->a = 4.0; sgp->b = 0.0; if (seg == NULL) seg = CreatePicture (); if ((gsp = AddGraphSentinelToPicture (sgp, xosp->bsp, seg, 0, top, 0, NULL)) != NULL) { sprintf (numberbuffer, "%ld", 1L); AddLabel (seg, gsp->box.left, gsp->bottom-20, numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0); sprintf (numberbuffer, "%ld", (long) xosp->bsp->length); AddLabel (seg, gsp->box.left+xosp->bsp->length, gsp->bottom-20, numberbuffer, SMALL_TEXT, 0, MIDDLE_CENTER, 0); } shift = (Int4) (maxscore*sgp->a); orftop[iframe] = top - shift - 38; top -= (shift+56); frame++; if (frame == 3) { top -= 24; frame = 0; } } frame = 0; for (iframe = 0; iframe < 6; iframe++) { if (iframe < 3) strand = Seq_strand_plus; else strand = Seq_strand_minus; shift = 0; if (xosp->slpa != NULL) { AddOrfClass (xosp->slpa, seg, orftop, iframe, frame, shift, strand, YELLOW_COLOR, 5); shift += 4; } if (xosp->slpk != NULL) { AddOrfClass (xosp->slpk, seg, orftop, iframe, frame, shift, strand, GREEN_COLOR, 5); shift += 4; } if (xosp->slpb != NULL) { AddOrfClass (xosp->slpb, seg, orftop, iframe, frame, shift, strand, BLUE_COLOR, 5); shift += 4; } if (xosp->slps != NULL) { AddOrfClass (xosp->slps, seg, orftop, iframe, frame, shift, strand, RED_COLOR, 5); } frame++; if (frame == 3) frame = 0; } MemFree (tableGlobal); start = 20; stop = 20 + (50*XScale); top = orftop[5] - 40; if (xosp->slpa != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), YELLOW_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "All Met-init'd ORFs equal to or greater than 50 codons", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slpk != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), GREEN_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Annotated (reported) ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slpb != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLUE_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Similar codon usage bias ORFs to seed ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } if (xosp->slps != NULL) { top -= 12; AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), RED_COLOR, SOLID_LINE, SOLID_SHADING, 5, 0); AddLine (seg, start, top, stop, top, FALSE, 0); AddAttribute (seg, (COLOR_ATT|STYLE_ATT|SHADING_ATT|WIDTH_ATT), BLACK_COLOR, SOLID_LINE, SOLID_SHADING, STD_PEN_WIDTH, 0); AddLabel (seg, stop+(20*XScale), top, "Pattern match seed ORFs", SMALL_TEXT, 0, MIDDLE_RIGHT, 0); } xisp->picture = seg; w = FixedWindow (10, 10, 640, 720, "Consign", CloseGraphWindowProc); SetObjectExtra (w, xisp, CleanUpGraphWindow); g = HiddenGroup (w, -1, 0, NULL); v = CreateViewer (g, 560, 640, TRUE, TRUE); AttachPicture (v, seg, INT4_MIN, INT4_MAX, UPPER_LEFT, XScale, 1, NULL); PushButton (g, "Close", CloseGraphWindowButton); RealizeWindow (w); ArrowCursor (); Show (w); return; }
SeqLocPtr SeqLocDust (SeqLocPtr this_slp, Int2 level, Int2 window, Int2 minwin, Int2 linker) { SeqLocPtr next_slp, slp = NULL; ValNodePtr vnp = NULL; SeqIdPtr id; BioseqPtr bsp; SeqPortPtr spp; DREGION PNTR reg, PNTR regold; Int4 nreg; Int4 start, end, l; Int2 loopDustMax = 0; /* error msg stuff */ ErrSetOptFlags (EO_MSG_CODES); if (!this_slp) { ErrPostEx (SEV_ERROR, 2, 1, "no sequence location given for dusting"); ErrShow (); return slp; } /* place for dusted regions */ regold = reg = MemNew (sizeof (DREGION)); if (!reg) { ErrPostEx (SEV_FATAL, 2, 2, "memory allocation error"); ErrShow (); return slp; } reg->from = 0; reg->to = 0; reg->next = NULL; /* count seqlocs */ next_slp = NULL; while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL) loopDustMax++; if (!loopDustMax) { ErrPostEx (SEV_ERROR, 2, 3, "can not find next seq loc"); ErrShow (); } /* loop for dusting as needed */ next_slp = NULL; while ((next_slp = SeqLocFindNext (this_slp, next_slp)) != NULL) { /* offsets into actual sequence */ start = SeqLocStart (next_slp); end = SeqLocStop (next_slp); /* if all goes okay should get a seqport pointer */ id = SeqLocId (next_slp); if (!id) { ErrPostEx (SEV_ERROR, 2, 4, "no bioseq id"); ErrShow (); continue; } bsp = BioseqLockById (id); if (!bsp) { ErrPostEx (SEV_ERROR, 2, 5, "no bioseq"); ErrShow (); continue; } if (!ISA_na (bsp->mol)) { ErrPostEx (SEV_WARNING, 2, 6, "not nucleic acid"); ErrShow (); BioseqUnlock (bsp); continue; } spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na); BioseqUnlock (bsp); if (!spp) { ErrPostEx (SEV_ERROR, 2, 7, "sequence port open failed"); ErrShow (); continue; } l = spp->totlen; nreg = dust_segs (l, spp, start, reg, (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker); slp = slpDust (spp, slp, id, &vnp, reg, nreg, loopDustMax); /* find tail - this way avoids referencing the pointer */ while (reg->next) reg = reg->next; SeqPortFree (spp); } /* clean up memory */ reg = regold; while (reg) { regold = reg; reg = reg->next; MemFree (regold); } return slp; }
SeqLocPtr BioseqDust (BioseqPtr bsp, Int4 start, Int4 end, Int2 level, Int2 window, Int2 minwin, Int2 linker) { SeqLocPtr slp = NULL; /* initialize */ ValNodePtr vnp = NULL; SeqPortPtr spp; DREGION PNTR reg, PNTR regold; Int4 nreg; Int4 l; Int4 loopDustMax = 1; /* error msg stuff */ /* ErrSetOptFlags (EO_MSG_CODES | EO_SHOW_FILELINE | EO_BEEP); */ ErrSetOptFlags (EO_MSG_CODES); /* make sure bioseq is there */ if (!bsp) { ErrPostEx (SEV_ERROR, 1, 1, "no bioseq"); ErrShow (); return slp; } if (!ISA_na (bsp->mol)) { ErrPostEx (SEV_WARNING, 1, 2, "not nucleic acid"); ErrShow (); return slp; } /* place for dusted regions */ reg = MemNew (sizeof (DREGION)); if (!reg) { ErrPostEx (SEV_FATAL, 1, 3, "memory allocation error"); ErrShow (); return slp; } reg->from = 0; reg->to = 0; reg->next = NULL; /* do it */ spp = SeqPortNew (bsp, start, end, 0, Seq_code_ncbi2na); if (!spp) { ErrPostEx (SEV_ERROR, 1, 4, "sequence port open failed"); ErrShow (); MemFree (reg); return slp; } l = spp->totlen; nreg = dust_segs (l, spp, start, reg, (Int4)level, (Int4)window, (Int4)minwin, (Int4)linker); slp = slpDust (spp, NULL, bsp->id, &vnp, reg, nreg, loopDustMax); /* clean up memory */ SeqPortFree (spp); while (reg) { regold = reg; reg = reg->next; MemFree (regold); } return slp; }
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, short whichSeq, short *seqnum, char **seq, char **seqid, long *seqlen) { SeqPortPtr spp; SeqIdPtr bestid; Uint1 repr, code, residue; CharPtr tmp, title; long outlen, outmax; char localid[256], *sp; /* !!! this may be called several times for a single sequence because SeqEntryExplore looks for parts and joins them... assume seq, seqid, seqlen may contain data (or NULL) */ if (bsp == NULL) return; repr = Bioseq_repr(bsp); if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; (*seqnum)++; if (!(whichSeq == *seqnum || whichSeq == 0)) return; bestid = SeqIdFindBest(bsp->id, (Uint1) 0); title = BioseqGetTitle(bsp); if (idonly) { sprintf(localid, " %d) ", *seqnum); tmp= localid + strlen(localid)-1; } else { strcpy(localid," "); tmp= localid; } tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); tmp = StringMove(tmp, " "); StringNCpy(tmp, title, 200); /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ /* < seqid is fixed storage */ /* strcpy( *seqid, localid); */ /* < seqid is variable sized */ outmax= strlen(localid) + 3; if (*seqid==NULL) { *seqid= (char*) malloc(outmax); if (*seqid==NULL) return; strcpy(*seqid, localid); } else { outmax += strlen(*seqid) + 2; *seqid= (char*) realloc( *seqid, outmax); if (*seqid==NULL) return; if (!idonly) strcat(*seqid, "; "); strcat(*seqid, localid); } if (idonly) { strcat(*seqid,"\n"); return; } if (ISA_na(bsp->mol)) code = Seq_code_iupacna; else code = Seq_code_iupacaa; spp = SeqPortNew(bsp, 0, -1, 0, code); SeqPortSeek(spp, 0, SEEK_SET); sp= *seq; if (sp==NULL) { /* this is always true now !? */ outlen= 0; outmax= 500; sp= (char*) malloc(outmax); } else { outlen= strlen(sp); outmax= outlen + 500; sp= (char*) realloc( sp, outmax); } if (sp==NULL) return; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (outlen>=outmax) { outmax= outlen + 500; sp= (char*) realloc(sp, outmax); if (sp==NULL) return; } sp[outlen++] = residue; } sp= (char*) realloc(sp, outlen+1); if (sp!=NULL) sp[outlen]= '\0'; *seq= sp; *seqlen= outlen; SeqPortFree(spp); return; }
Int2 Main(void) { AsnIoPtr aip; SeqEntryPtr sep; BioseqPtr PNTR seqlist; Int4 seqnum, i, numseg, lens[10], j; Int2 ctr; SeqPortPtr spp; Uint1 residue; FILE* fp; CharPtr title; Char buffer[101]; MonitorPtr mon; /* check command line arguments */ if ( ! GetArgs("SeqTest",NUMARG, myargs)) return 1; mon = MonitorStrNew("SeqTest", 40); SetProgMon(StdProgMon, (Pointer)mon); /* ** Load SeqEntry object loader and sequence alphabets */ if (! SeqEntryLoad()) { Message(MSG_ERROR, "SeqEntryLoad failed"); return 1; } /* ** Use the file "example.prt" as the ASN I/O stream. This file ** can be found in the ncbi/demo. It is in ASN.1 Print Value format. */ if ((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL) return 1; /* ** Write the output to "seqtest.out". */ fp = FileOpen(myargs[1].strvalue, "w"); fprintf(fp, "Sequence summary:\n\n"); /* ** Read in the whole entry into the Sequence Entry Pointer, sep. ** Close the ASN stream, which in turn closes the input file. */ sep = SeqEntryAsnRead(aip, NULL); aip = AsnIoClose(aip); mon = MonitorFree(mon); SetProgMon(NULL, NULL); /* ** Determine how many Bioseqs are in this SeqEntry. Allocate ** enough memory to hold a list of pointer to all of these ** Bioseqs. Invoke an Explore function to "visit"each Bioseq. ** We are allowed to pass one pointer for use by the exploring ** function, in this case, "BuildList". */ seqnum = BioseqCount(sep); seqlist = MemNew((size_t)(seqnum * sizeof(BioseqPtr))); BioseqExplore(sep, (Pointer) seqlist, BuildList); /* ** For each Bioseq in the SeqEntry write out it's title ** len, number of gaps, and number of segments. Write out ** the length of each segment, up to 10. */ for(i = 0; i < seqnum; i++) { numseg = BioseqCountSegs(seqlist[i]); title = BioseqGetTitle(seqlist[i]); FilePuts((VoidPtr)title, fp); FilePuts("\n", fp); fprintf(fp, "len=%ld gaps=%ld segs=%ld\n", BioseqGetLen(seqlist[i]), BioseqGetGaps(seqlist[i]), numseg); if ((numseg > 1) && (numseg <= 10)) { BioseqGetSegLens (seqlist[i], lens); for (j = 0; j < numseg; j++) fprintf(fp, " len = %ld\n", lens[j]); } FilePuts("\n", fp); } spp = SeqPortNew(seqlist[0], 0, -1, 0, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); fprintf(fp, "SeqPort: plus strand with SeqPortGetResidue\n\n"); i = 0; while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { if (! IS_residue(residue)) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; switch (residue) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } else { buffer[i] = residue; i++; if (i == 60) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); i = 0; } } } if (i) { buffer[i] = '\0'; fprintf(fp, "%s\n", buffer); } fprintf(fp, "[EOF]\n"); SeqPortFree(spp); fprintf(fp, "\nSeqPort on minus with SeqPortRead\n\n"); spp = SeqPortNew(seqlist[0], 0, -1, Seq_strand_minus, Seq_code_iupacna); if (spp == NULL) Message(MSG_ERROR, "fail on SeqPortNew"); do { ctr = SeqPortRead(spp, (Uint1Ptr)buffer, 60); if (ctr > 0) { buffer[ctr] = '\0'; fprintf(fp, "%s\n", buffer); } else { ctr *= -1; switch (ctr) { case SEQPORT_VIRT: fprintf(fp, "[Gap]\n"); break; case SEQPORT_EOS: fprintf(fp, "[EOS]\n"); break; case SEQPORT_EOF: fprintf(fp, "[EOF]\n"); break; default: fprintf(fp, "[Invalid Residue]\n"); break; } } } while (ctr != SEQPORT_EOF); SeqPortFree(spp); /* ** Write out the nucleic acid sequences in this SeqEntry */ fprintf(fp, "\nNucleic Acids in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, TRUE); /* ** Write out the protein sequences in this SeqEntry. */ fprintf(fp, "\nProteins in FASTA format:\n\n"); SeqEntryToFasta(sep, fp, FALSE); /* ** Close the output file and free up allocated space. */ fclose(fp); MemFree(seqlist); SeqEntryFree(sep); return 0; }