static void GetProtListCallback (BioseqPtr bsp, Pointer userdata) { ValNodePtr PNTR pList; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; ProtRefPtr prp; AlphaProtPtr app; if (bsp == NULL || userdata == NULL || ! ISA_aa (bsp->mol)) return; pList = (ValNodePtr PNTR) userdata; app = (AlphaProtPtr) MemNew (sizeof (AlphaProtData)); if (app == NULL) return; app->bsp = bsp; app->prot_name = NULL; sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext); if (sfp != NULL && sfp->data.value.ptrvalue != NULL) { prp = (ProtRefPtr) sfp->data.value.ptrvalue; if (prp->name != NULL) { app->prot_name = StringSave (prp->name->data.ptrvalue); } else { app->prot_name = StringSave (fcontext.label); } } ValNodeAddPointer (pList, 0, app); }
static void DoSuggestIntervals ( BioseqPtr bsp, Pointer userdata ) { CharPtr caret5, caret3; CSpeedFlagPtr cfp; Char id [64]; SeqLocPtr loc, slp; Boolean partial5, partial3; SeqAnnotPtr sap; SeqFeatPtr sfp; SeqIdPtr sip; Int4 start, stop; if (bsp == NULL) return; if (! ISA_aa (bsp->mol)) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return; sip = SeqIdFindBest (bsp->id, 0); if (sip == NULL) return; SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1); sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode); if (sap == NULL) return; if (sap->type == 1) { sfp = (SeqFeatPtr) sap->data; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { loc = sfp->location; if (loc != NULL) { fprintf (cfp->ofp, "%s\n", id); slp = SeqLocFindNext (loc, NULL); while (slp != NULL) { start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1; stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1; caret5 = ""; caret3 = ""; CheckSeqLocForPartial (slp, &partial5, &partial3); if (partial5) { caret5 = "<"; } if (partial3) { caret3 = ">"; } fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop); slp = SeqLocFindNext (loc, slp); } } } } SeqAnnotFree (sap); }
static void CollectBioseqsForConversion (BioseqPtr bsp, Pointer userdata) { ValNodePtr PNTR list; if (bsp == NULL || bsp->repr != Seq_repr_raw || ISA_aa (bsp->mol)) return; if (userdata == NULL) { return; } list = (ValNodePtr PNTR) userdata; ValNodeAddPointer (list, 0, bsp); }
static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata) { LookForIDsPtr lfip; SeqIdPtr sip; TextSeqIdPtr tsip; lfip = (LookForIDsPtr) userdata; if (ISA_na (bsp->mol)) { lfip->isNuc = TRUE; } if (ISA_aa (bsp->mol)) { lfip->isProt = TRUE; } for (sip = bsp->id; sip != NULL; sip = sip->next) { switch (sip->choice) { case SEQID_GENBANK : case SEQID_EMBL : case SEQID_DDBJ : lfip->isGED = TRUE; break; case SEQID_TPG : case SEQID_TPE : case SEQID_TPD : lfip->isTPA = TRUE; break; case SEQID_OTHER : tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL) { if (StringNCmp (tsip->accession, "NC_", 3) == 0) { lfip->isNC = TRUE; } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) { lfip->isNTorNW = TRUE; } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) { lfip->isNTorNW = TRUE; } } break; default : break; } } }
static void PopulateFetchItemCallback (BioseqPtr bsp, Pointer data) { PopulatePtr pp; SeqIdPtr sip; TextSeqIdPtr tsip; Char buffer[15]; FetchItemPtr fetch_item = NULL; if (bsp == NULL || ISA_aa(bsp->mol) || (pp = (PopulatePtr)data) == NULL) { return; } for (sip = bsp->id; sip != NULL && fetch_item == NULL; sip = sip->next) { switch (sip->choice) { case SEQID_GI: printf (buffer, "%d", sip->data.intvalue); fetch_item = FindInFetchIndex(buffer); break; case SEQID_GENBANK : case SEQID_EMBL : case SEQID_DDBJ : case SEQID_TPG : case SEQID_TPE : case SEQID_TPD : case SEQID_OTHER : tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL) { fetch_item = FindInFetchIndex(tsip->accession); } break; default : break; } } if (fetch_item != NULL && fetch_item->index_pos < 0) { /* collect field values */ fetch_item->field_values = CollectBioseqLineValues (bsp, pp->field_list, pp->want_gi); fetch_item->index_pos = 0; } }
static void WriteOneProteinWithProduct (BioseqPtr bsp, Pointer data) { FastaExportOptionsPtr fe; SeqFeatPtr prot; SeqMgrFeatContext fcontext; Char id [128]; if (bsp != NULL && ISA_aa (bsp->mol) && (fe = (FastaExportOptionsPtr) data) != NULL) { prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &fcontext); if (prot == NULL) { BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen, TRUE, FALSE, FALSE); } else { SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (fe->fp, ">%s [prot=%s]\n", id, fcontext.label); BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen, FALSE, FALSE, FALSE); } } }
static void PrintFarQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) { BioseqPtr bsp; FILE *fp; if (IS_Bioseq (sep)) { bsp = (BioseqPtr) sep->data.ptrvalue; /* WARNING: we're assuming here that asn2fast's quality-score output is DNA-centric, thus protein bioseqs can be ignored in the PrintQualScores callback. --MLC, 5/2000 */ if (ISA_aa(bsp->mol)) return; fp = (FILE*) data; if (myargs [17].intvalue) { PrintQualityScoresForContig (bsp, FALSE, fp); } else { PrintQualityScoresForContig (bsp, TRUE, fp); } } }
/* WARNING not called and not tested... */ static Int4 FillCSANWithSeq(PCSAN pcsanThis, BioseqPtr pbsq, Int4 iLen) { SeqPortPtr spp = NULL; Uint1 code = Seq_code_ncbieaa; Uint1 residue; Int4 iCount = 0; CharPtr pcA; if (!pcsanThis) return 0; if (!ISA_aa(pbsq->mol)) return 0; if (!iLen) return 0; spp = SeqPortNew(pbsq, 0, -1, 0, code); if (!spp) return 0; SeqPortSeek(spp, 0, SEEK_SET); pcsanThis->pcSeqAln = (CharPtr)MemNew((size_t) (1+ sizeof(char) * iLen)); pcA = pcsanThis->pcSeqAln; residue = SeqPortGetResidue(spp); iCount = 0; while ((residue != SEQPORT_EOF) && (residue != '\0') && (iLen < iCount)) { iCount++; *pcA = (char) residue; pcA++; residue = SeqPortGetResidue(spp); } while (iCount < iLen) { *pcA = '-'; pcA++; iCount++; } pcsanThis->pcSeqAln[iLen] = '\0'; SeqPortFree(spp); return iCount; }
Int2 LIBCALLBACK HydrophobicFunc (Pointer data) { OMProcControlPtr ompcp; BioseqPtr bsp = NULL; SeqFeatPtr sfp = NULL; WindoW w; GraphViewFormPtr gvp; SeqIdPtr psip; SeqPortPtr spp; FloatHi scr[24]; Char res[24]; ompcp = (OMProcControlPtr) data; if (ompcp == NULL || ompcp->input_itemtype == 0) return OM_MSG_RET_ERROR; switch (ompcp->input_itemtype) { case OBJ_BIOSEQ: bsp = (BioseqPtr) ompcp->input_data; if (!ISA_aa (bsp->mol)) return OM_MSG_RET_ERROR; break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) ompcp->input_data; break; default: return OM_MSG_RET_ERROR; } if (bsp != NULL) { w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; if (ReadAAC ("KSkyte.flt", scr, res) != 24) return OM_MSG_RET_ERROR; gvp->window = 19; gvp->type = AA_FILTER_COMP_KYTE; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa); gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res, &(gvp->window), gvp->type); SeqPortFree (spp); if (gvp->sgp == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else if (sfp != NULL) { if (sfp->data.choice != SEQFEAT_CDREGION) return OM_MSG_RET_ERROR; psip = SeqLocId (sfp->product); bsp = BioseqFind (psip); w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; if (ReadAAC ("KSkyte.flt", scr, res) != 24) return OM_MSG_RET_ERROR; gvp->window = 19; gvp->type = AA_FILTER_COMP_KYTE; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa); gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res, &(gvp->window), gvp->type); SeqPortFree (spp); if (gvp->sgp == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else { return OM_MSG_RET_ERROR; } Show (w); Select (w); return OM_MSG_RET_DONE; }
Int2 LIBCALLBACK PCCPredictFunc (Pointer data) { OMProcControlPtr ompcp; BioseqPtr bsp = NULL; SeqFeatPtr sfp = NULL; WindoW w; GraphViewFormPtr gvp; SeqIdPtr psip; ompcp = (OMProcControlPtr) data; if (ompcp == NULL || ompcp->input_itemtype == 0) return OM_MSG_RET_ERROR; switch (ompcp->input_itemtype) { case OBJ_BIOSEQ: bsp = (BioseqPtr) ompcp->input_data; if (!ISA_aa (bsp->mol)) return OM_MSG_RET_ERROR; break; case OBJ_SEQFEAT: sfp = (SeqFeatPtr) ompcp->input_data; break; default: return OM_MSG_RET_ERROR; } if (bsp != NULL) { w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; gvp->window = 22; gvp->type = AA_PCC; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else if (sfp != NULL) { if (sfp->data.choice != SEQFEAT_CDREGION) return OM_MSG_RET_ERROR; psip = SeqLocId (sfp->product); bsp = BioseqFind (psip); w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil", bsp, GRAPH_FILTER); if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { gvp->graphtype = GRAPH_FILTER; gvp->window = 22; gvp->type = AA_PCC; gvp->entityID = ompcp->input_entityID; gvp->itemID = ompcp->input_itemID; if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL) { /* w = Remove (w); */ return OM_MSG_RET_ERROR; } else { BioseqPtrToGraphViewForm (gvp->form, gvp->sgp); } } } else { return OM_MSG_RET_ERROR; } Show (w); Select (w); return OM_MSG_RET_DONE; }
static void DoProteins (BioseqPtr bsp, Pointer userdata) { Char buf [6]; SeqMgrFeatContext fcontext; Boolean firstIsSig = FALSE; Int4 left = 0, right = 0; ScanDataPtr sdp; SeqFeatPtr sfp, last = NULL; SeqInt sint; SeqPortPtr spp; ValNode vn; if (bsp == NULL) return; if (! ISA_aa (bsp->mol)) return; sdp = (ScanDataPtr) userdata; sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); while (sfp != NULL) { if (fcontext.featdeftype == FEATDEF_mat_peptide_aa || fcontext.featdeftype == FEATDEF_sig_peptide_aa || fcontext.featdeftype == FEATDEF_transit_peptide_aa) { if (last != NULL) { if (fcontext.left <= right) { if (firstIsSig && fcontext.left == right && fcontext.featdeftype != FEATDEF_sig_peptide_aa) { buf [0] = '\0'; if (right >= 4) { MemSet ((Pointer) &vn, 0, sizeof (ValNode)); vn.choice = SEQLOC_INT; vn.data.ptrvalue = &sint; MemSet ((Pointer) &sint, 0, sizeof (SeqInt)); sint.id = SeqLocId (sfp->location); sint.from = right - 3; sint.to = right; sint.strand = Seq_strand_plus; spp = SeqPortNewByLoc (&vn, Seq_code_ncbieaa); if (spp != NULL) { SeqPortRead (spp, (BytePtr) buf, 4); SeqPortFree (spp); } buf [4] = '\0'; } PrintFeatureMessage (sfp, sdp, "SP", buf); } else { PrintFeatureMessage (sfp, sdp, "OV", NULL); } } } else { last = sfp; left = fcontext.left; right = fcontext.right; if (fcontext.featdeftype == FEATDEF_sig_peptide_aa) { firstIsSig = TRUE; } } } sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext); } }
Int2 Main() { AsnIoPtr aip; BioseqPtr bsp; Pointer dataptr; Uint2 datatype; Boolean found; SPI_mRNAToHerdPtr h_head; SPI_mRNAToHerdPtr h_prev; SPI_mRNAToHerdPtr hptr; FILE *ifp; Boolean isGIlist; Char line[60]; Boolean lowercase; SeqLocPtr lcaseloc; FILE *ofp; FILE *ofp2; SeqAlignPtr sap; SeqAnnotPtr sanp; SeqEntryPtr sep; FILE *sfp; SeqIdPtr sip; SeqLocPtr slp; SPI_bsinfoPtr spig; SPI_bsinfoPtr spig_head; SPI_bsinfoPtr spig_prev; SPI_bsinfoPtr spim; SPI_bsinfoPtr spim_head; SPI_bsinfoPtr spim_prev; SPI_OptionsPtr spot; SPI_RegionInfoPtr srip = NULL; SPI_RegionInfoPtr srip_head; SPI_RegionInfoPtr srip_prev; CharPtr str; CharPtr txt; ID1BioseqFetchEnable("spidey", FALSE); LocalSeqFetchInit(FALSE); /* standard setup */ ErrSetFatalLevel (SEV_MAX); ErrClearOptFlags (EO_SHOW_USERSTR); UseLocalAsnloadDataAndErrMsg (); ErrPathReset (); if (! AllObjLoad ()) { Message (MSG_FATAL, "AllObjLoad failed"); return 1; } if (! SubmitAsnLoad ()) { Message (MSG_FATAL, "SubmitAsnLoad failed"); return 1; } if (! FeatDefSetLoad ()) { Message (MSG_FATAL, "FeatDefSetLoad failed"); return 1; } if (! SeqCodeSetLoad ()) { Message (MSG_FATAL, "SeqCodeSetLoad failed"); return 1; } if (! GeneticCodeTableLoad ()) { Message (MSG_FATAL, "GeneticCodeTableLoad failed"); return 1; } if (!GetArgs("SPIDEY", NUMARGS, myargs)) return 0; /* set the error message level high to suppress warnings from BLAST */ isGIlist = (Boolean)myargs[MYARGGILIST].intvalue; txt = myargs[MYARGGENFILE].strvalue; ifp = FileOpen(txt, "r"); spig_head = NULL; if (ifp == NULL) { bsp = SPI_GetBspFromGIOrAcc(txt); if (bsp == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "Can't open genomic input file\n"); return -1; } else { spig_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo)); spig_head->bsp = bsp; } } if (spig_head == NULL) { spig_prev = NULL; /* read in the genomic sequence(s) first and put them into bsinfo structures */ while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL) { if (datatype == OBJ_BIOSEQ) { spig = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo)); spig->bsp = (BioseqPtr)dataptr; if (spig_head == NULL) spig_head = spig_prev = spig; else { spig_prev->next = spig; spig_prev = spig; } } else if (datatype == OBJ_SEQENTRY) { sep = (SeqEntryPtr)dataptr; SeqEntryExplore(sep, &spig_head, SPI_FindAllNuc); } } FileClose(ifp); } if (spig_head == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in genomic file\n"); return -1; } else if (ISA_aa(spig_head->bsp->mol)) { ErrPostEx(SEV_ERROR, 0, 0, "At least one of the genomic sequences appears to be a protein.\n"); return -1; } if (spig_head->next != NULL) { ErrPostEx(SEV_ERROR, 0, 0, "This version can only process one genomic sequence at a time. Only the first sequence in this file will be used.\n"); spig_head->next = NULL; } spim_head = spim_prev = NULL; txt = myargs[MYARGMRNAFILE].strvalue; ifp = FileOpen(txt, "r"); if (ifp == NULL) { bsp = SPI_GetBspFromGIOrAcc(txt); if (bsp == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "Can't open mRNA input file\n"); return -1; } else { spim_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo)); spim_head->bsp = bsp; } } if (spim_head == NULL) { lowercase = (Boolean)myargs[MYARGMASKED].intvalue; lcaseloc = NULL; /* if the mRNA has lowercase masking, read it in carefully to record the masking */ if (lowercase == TRUE) { while ((sep = FastaToSeqEntryForDb(ifp, TRUE, NULL, TRUE, NULL, NULL, &lcaseloc)) != NULL) { SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc); if (lcaseloc != NULL) /* put masking info into the bsinfo structure */ { spim = spim_head; sip = SeqLocId(lcaseloc); found = FALSE; while (spim != NULL && !found) { if (SeqIdComp(sip, spim->bsp->id) == SIC_YES) { found = TRUE; spim->lcaseloc = lcaseloc; } spim = spim->next; } lcaseloc = NULL; } } } else if (isGIlist) /* mRNA file is a list of GIs, must fetch the bioseqs */ { str = ReadALine(line, sizeof(line), ifp); while (str != NULL) { bsp = SPI_GetBspFromGIOrAcc(str); if (bsp != NULL) { spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo)); spim->bsp = bsp; if (spim_head == NULL) spim_head = spim_prev = spim; else { spim_prev->next = spim; spim_prev = spim; } } str = ReadALine(line, sizeof(line), ifp); } } else /* mRNAs are FASTA or ASN.1, read them all in */ { while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL) { if (datatype == OBJ_BIOSEQ) { spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo)); spim->bsp = (BioseqPtr)dataptr; if (spim_head == NULL) spim_head = spim_prev = spim; else { spim_prev->next = spim; spim_prev = spim; } } else if (datatype == OBJ_SEQENTRY) { sep = (SeqEntryPtr)dataptr; SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc); } } } FileClose(ifp); } if (spim_head == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in mRNA file\n"); return -1; } else if (ISA_aa(spim_head->bsp->mol)) { ErrPostEx(SEV_ERROR, 0, 0, "At least one of the mRNA sequences appears to be a protein\n"); return -1; } txt = myargs[MYARGTABLE].strvalue; if (txt != NULL) { ifp = FileOpen(txt, "r"); if (ifp == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "Unable to open table file\n"); return -1; } SPI_ReadFeatureTable(ifp, spim_head); spim = spim_head; while (spim != NULL) { if (spim->lcaseloc != NULL) { slp = (SeqLocPtr)ValNodeNew(NULL); slp->choice = SEQLOC_MIX; slp->data.ptrvalue = (Pointer)spim->lcaseloc; spim->lcaseloc = slp; } spim = spim->next; } } spim = spim_head; spot = (SPI_OptionsPtr)MemNew(sizeof(SPI_Options)); spot->printaln = myargs[MYARGPRALIGN].intvalue; txt = myargs[MYARGOUTFILE].strvalue; ofp = FileOpen(txt, "w"); if (ofp == NULL && spot->printaln != 3) { ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file\n"); return -1; } if (spot->printaln >= 2) { txt = myargs[MYARGALNFILE].strvalue; ofp2 = FileOpen(txt, "a"); if (ofp2 == NULL) { ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file 2\n"); return -1; } } else ofp2 = NULL; /** ErrSetMessageLevel(SEV_MAX); **/ spot->firstpasseval = myargs[MYARG1STEVAL].floatvalue; spot->secpasseval = myargs[MYARG2NDEVAL].floatvalue; spot->thirdpasseval = myargs[MYARG3RDEVAL].floatvalue; spot->numreturns = myargs[MYARGNUMMOD].intvalue; spot->idcutoff = myargs[MYARGIDCUT].intvalue; spot->lencutoff = myargs[MYARGLENCUT].intvalue; spot->interspecies = (Boolean)myargs[MYARGSPEC].intvalue; spot->printasn = (Boolean)myargs[MYARGASN].intvalue; spot->fetchcds = (Boolean)myargs[MYARGGETCDS].intvalue; /*spot->ace = (Boolean)myargs[MYARGACEDB].intvalue;*/ spot->from = myargs[MYARGFROM].intvalue; spot->to = myargs[MYARGTO].intvalue; spot->makemult = (Boolean)myargs[MYARGMULT].intvalue; /*KSK*/ spot->bigintron = (Boolean)myargs[MYARGXL].intvalue; spot->bigintron_size = myargs[MYARGXL_SIZE].intvalue; spot->repeat_db_file = myargs[MYARGREPDB].strvalue; txt = myargs[MYARGORG].strvalue; if (!StringICmp(txt, "d") || !StringICmp(txt, "D")){ spot->organism = SPI_FLY; } else if (!StringICmp(txt, "p") || !StringICmp(txt, "P")){ spot->organism = SPI_PLANT; } else if (!StringICmp(txt, "c") || !StringICmp(txt, "C")){ spot->organism = SPI_CELEGANS; } else if (!StringICmp(txt, "m") || !StringICmp(txt, "M")){ spot->organism = SPI_DICTY; } else { spot->organism = SPI_VERTEBRATE; } sap = NULL; if (spot->printasn) spot->sap_head = &sap; txt = myargs[MYARGSTRAND].strvalue; if (txt != NULL) { if (StrChr(txt, 'p') || StrChr(txt, 'P')) spot->strand = Seq_strand_plus; else spot->strand = Seq_strand_minus; } else spot->strand = Seq_strand_both; /*txt = myargs[MYARGDRAFTFILE].strvalue; if (txt != NULL) spot->draftfile = StringSave(txt);*/ txt = myargs[MYARGDSPLICE].strvalue; if (txt != NULL) { sfp = FileOpen(txt, "r"); SPI_GetSpliceInfo(spot, sfp, TRUE); FileClose(sfp); } txt = myargs[MYARGASPLICE].strvalue; if (txt != NULL) { sfp = FileOpen(txt, "r"); SPI_GetSpliceInfo(spot, sfp, FALSE); FileClose(sfp); } h_head = h_prev = NULL; srip_head = srip_prev = NULL; while (spim != NULL) { spot->lcaseloc = spim->lcaseloc; if (spot->draftfile == NULL) srip = SPI_AlnSinglemRNAToGen(spig_head, spim, ofp, ofp2, spot); else { hptr = SPI_AlnSinglemRNAToPieces(spig_head, spim, ofp, ofp2, spot); if (h_head != NULL) { h_prev->next = hptr; h_prev = hptr; } else h_head = h_prev = hptr; } if (srip != NULL) { if (srip_head != NULL) { srip_prev->next = srip; srip_prev = srip; } else srip_head = srip_prev = srip; } spim = spim->next; } if (spot->makemult) { SPI_MakeMultipleAlignment(srip_head); SPI_PrintMultipleAlignment(srip_head, FALSE, spig_head->bsp, ofp); SPI_RegionListFree(srip_head); } else SPI_RegionListFree(srip_head); /* create the ASN.1 output, if requested; need to use the continuous alignment */ /* that was generated */ if (spot->printasn && *(spot->sap_head) != NULL && spot->draftfile == NULL) { sanp = SeqAnnotForSeqAlign(*(spot->sap_head)); txt = myargs[MYARGASNFILE].strvalue; aip = AsnIoOpen(txt, "w"); SeqAnnotAsnWrite(sanp, aip, NULL); AsnIoClose(aip); SeqAlignSetFree(*(spot->sap_head)); } FileClose(ofp); FileClose(ofp2); SPI_OptionsFree(spot); SPI_bsinfoFreeList(spim_head); SPI_bsinfoFreeList(spig_head); LocalSeqFetchDisable(); ID1BioseqFetchDisable(); return 0; }
static void CcpProc (ButtoN b) { Boolean flagHaveNet; SeqEntryPtr sep; Int4 i, gi; CharPtr fastafile; FILE *fiop; CharPtr title; GatherScopePtr gsp; XOSPtr xosp; XISPtr xisp; FloatHi minscore, maxscore; FloatHiPtr pcs, pccscr; SeqGraphPtr sgp; WindoW w; PaneL p; if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL) return; if (ReadPccData (xosp->pccp) == 0) { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Could not open or read %s data file", xosp->pccp->pccdatafile); ErrShow (); return; } gsp = xosp->gsp; gi = xosp->gi; fastafile = xosp->filename; if (gi > 0) { if (!EntrezInit ("ccpv", FALSE, &flagHaveNet)) { ErrPostEx (SEV_ERROR, TOP_ERROR, 102, "Entrez init failed"); ErrShow (); xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res); return; } } if (gi > 0) { sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ); } else if (fastafile != NULL) { if ((fiop = FileOpen (fastafile, "r")) == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 103, "Failed to open FastA file"); ErrShow (); xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res); return; } sep = FastaToSeqEntry (fiop, FALSE); } else { sep = NULL; } if (sep == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 104, "No seqentry"); ErrShow (); xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res); return; } else { xosp->sep = sep; xosp->bsp = NULL; xosp->gi = gi; GatherSeqEntry (sep, (Pointer) xosp, GetBioseq, (Pointer) gsp); if (xosp->bsp != NULL) { if (ISA_aa (xosp->bsp->mol)) { pcs = pccscr = PredictCCBioseq (xosp->bsp, 0, xosp->bsp->length-1, xosp->pccp); xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res); if (pccscr == NULL) { return; } for (i = 0; i < xosp->bsp->length; i++) { *pcs *= 100.0; pcs++; } minscore = 0.0; maxscore = 100.0; title = FastaTitle (xosp->bsp, "CCP: >", NULL); if ((sgp = SeqGraphNew ()) == NULL) { MemFree (pccscr); MemFree (title); return; } xisp = MemNew (sizeof (XIS)); xisp->sgp = sgp; xisp->Xscale = 1; xisp->Yscale = 1; xisp->Xaxislen = 250; xisp->Yaxislen = 100; xisp->Xprelen = 0; xisp->Yprelen = 50; xisp->Xpostlen = 50; xisp->Ypostlen = 50; sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand, xosp->bsp->id); sgp->title = title; sgp->flags[2] = 1; sgp->numval = xosp->bsp->length; sgp->max.realvalue = maxscore; sgp->min.realvalue = minscore; sgp->values = (Pointer) pccscr; w = FixedWindow (-50, -50, -10, -10, "Ccp", CloseGraphWindowProc); SetObjectExtra (w, xisp, NULL); p = SimplePanel (w, (Int2) (xisp->Xprelen+xisp->Xaxislen+xisp->Xpostlen), (Int2) (xisp->Yprelen+xisp->Yaxislen+xisp->Ypostlen), DrawGraph); SetPanelClick (p, NULL, NULL, NULL, CloseGraphPanelProc); RealizeWindow (w); Show (w); } else { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not an amino acid Bioseq"); ErrShow (); } } else { ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq in SeqEntry"); ErrShow (); } xosp->sep = sep = SeqEntryFree (sep); } if (gi > 0) EntrezFini (); else FileClose (fiop); return; }
Int2 Main (void) { Int2 argcount; Boolean flagHaveNet; Int4 gi; SeqEntryPtr sep; ComPatPtr cpp, cpph = NULL; SeqAlignPtr sap, sapn; StdSegPtr ssp; SeqLocPtr slp, slpn; Int4 start, stop; FILE *fiop; Char fastafile[256], namesfile[256]; CharPtr title; CharPtr taxon; FloatHi mw; ValNodePtr namelist = NULL; static CharPtr pattern_file = "ncbipros.dat"; static CharPtr protease_file = "ncbiendo.dat"; static CharPtr names_file = "ncbipnam.dat"; static GatherScope gs; GatherScopePtr gsp; static Gather_PBS gpbs; Gather_PBSPtr gpbsp; #ifndef NO_TAX_NET Int4 i; static Char taxdata[8]; static Gather_TaxId gti; Gather_TaxIdPtr gtip; #endif #ifndef NO_TAX_NET Int2 ia=4, ib=5, ic=6, id=7, ie=8, ig=9, ih=10, ii=11; #else Int2 ib=4, ic=5, id=6, ie=7, ig=8, ih=9, ii=10; #endif argcount = sizeof (myargs) / sizeof (Args); if (!GetArgs ("ProSiteSearch", argcount, myargs)) return 1; if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 100, "No gi or FastA file given :: for help : srchaa -"); ErrShow (); exit (1); } gsp = &gs; #ifndef NO_TAX_NET gtip = >i; #endif gpbsp = &gpbs; MemSet ((Pointer) gsp, 0, sizeof (GatherScope)); MemSet ((Pointer) gsp->ignore, (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean))); gsp->ignore[OBJ_SEQDESC] = TRUE; gsp->ignore[OBJ_BIOSEQ] = FALSE; gpbsp->bsp = NULL; gi = myargs[0].intvalue; if (myargs[1].strvalue != NULL) StrCpy (fastafile, myargs[1].strvalue); else fastafile[0] = '\0'; if (gi > 0) { if (!EntrezInit ("srchaa", FALSE, &flagHaveNet)) { ErrPostEx (SEV_ERROR, TOP_ERROR, 102, "Entrez init failed"); ErrShow (); exit (1); } } #ifndef NO_TAX_NET if (myargs[ia].intvalue) { if (!TaxArchInit ()) { ErrPostEx (SEV_ERROR, TOP_ERROR, 103, "Taxonomy init failed"); ErrShow (); exit (1); } } #endif fiop = NULL; if (gi > 0) { sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ); } else { if ((fiop = FileOpen (fastafile, "r")) == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 103, "Failed to open FastA file: %s", fastafile); ErrShow (); exit (1); } sep = FastaToSeqEntry (fiop, FALSE); } if (sep == NULL) { ErrPostEx (SEV_ERROR, TOP_ERROR, 104, "No seqentry found"); ErrShow (); exit (1); } while (sep != NULL) { gsp->ignore[OBJ_SEQDESC] = TRUE; gsp->ignore[OBJ_BIOSEQ] = FALSE; gpbsp->bsp = NULL; gpbsp->gi = gi; GatherSeqEntry (sep, (Pointer) gpbsp, GetBioseq, (Pointer) gsp); taxon = NULL; #ifndef NO_TAX_NET if (myargs[ia].intvalue) { for (i = 0; i < 8; i++) taxdata[i] = '-'; taxon = taxdata; gsp->ignore[OBJ_SEQDESC] = FALSE; gsp->ignore[OBJ_BIOSEQ] = TRUE; gtip->taxid = 0; GatherSeqEntry (sep, (Pointer) gtip, GetTaxId, (Pointer) gsp); if (gtip->taxid != 0) WhatOrg (gtip->taxid, taxon); else taxon = NULL; } #endif if (gpbsp->bsp != NULL) { if (ISA_aa (gpbsp->bsp->mol)) { if (cpph == NULL) { namesfile[0] = '\0'; if (myargs[id].intvalue) StrCpy (namesfile, names_file); if (myargs[ie].strvalue != NULL) StrCpy (namesfile, myargs[ie].strvalue); if (myargs[ig].strvalue != NULL) { if ((cpph = CompilePattern (myargs[ig].strvalue, 1)) != NULL) StrCpy (cpph->name, "User Pattern"); } else { namelist = ReadPatternNames (namesfile); if (myargs[ib].intvalue) cpph = ReadPrositePattern (protease_file, (Boolean) myargs[2].intvalue, myargs[3].intvalue, taxon, NULL); else cpph = ReadPrositePattern (pattern_file, (Boolean) myargs[2].intvalue, myargs[3].intvalue, taxon, namelist); } } if (!(Boolean) myargs[ih].intvalue) { title = FastaTitle (gpbsp->bsp, ">", NULL); printf ("%s\n", title); MemFree (title); } cpp = cpph; while (cpp != NULL) { sap = PatternMatchBioseq (gpbsp->bsp, cpp, (Int4)myargs[ii].intvalue); if (myargs[ib].intvalue) { printf (">%s\n", cpp->name); if (sap != NULL) printf (" Start Stop M.W.\n"); } if (myargs[ib].intvalue) { EmbedMolecularWeightInfo (sap, gpbsp->bsp); if (myargs[ic].intvalue) URK_SeqAlignSortByMolWt (&sap); while (sap != NULL) { ssp = (StdSegPtr) sap->segs; slp = ssp->loc; start = SeqLocStart (slp); stop = SeqLocStop (slp); mw = ssp->scores->value.realvalue; printf ("%8ld %8ld %9.2f\n", (long) start+1, (long) stop+1, mw); sapn = sap->next; SeqAlignFree (sap); sap = sapn; } } else { slp = MatchSa2Sl (&sap); if (myargs[ih].intvalue && slp != NULL) { title = FastaTitle (gpbsp->bsp, ">", NULL); printf ("%s\n", title); MemFree (title); } while (slp != NULL) { start = SeqLocStart (slp); stop = SeqLocStop (slp); printf ("%8ld %8ld %s\n", (long) start+1, (long) stop+1, cpp->name); slpn = slp->next; SeqLocFree (slp); slp = slpn; } } cpp = cpp->nextpattern; } } else { ErrPostEx (SEV_ERROR, TOP_ERROR, 106, "Not a protein bioseq"); ErrShow (); exit (1); } } else { ErrPostEx (SEV_ERROR, TOP_ERROR, 105, "No bioseq found"); ErrShow (); exit (1); } SeqEntryFree (sep); sep = NULL; if (fiop != NULL) sep = FastaToSeqEntry (fiop, FALSE); } ComPatFree (cpph); ValNodeFreeData (namelist); FileClose (fiop); if (gi > 0) EntrezFini (); #ifndef NO_TAX_NET if (myargs[ia].intvalue) TaxArchFini (); #endif return 0; }
static void VSMExportFeatureTableBioseqCallback (BioseqPtr bsp, Pointer userdata) { FeatureTablePtr ftp; CstType custom_flags = 0; Asn2gbJobPtr ajp; BaseBlockPtr bbp; XtraBlock extra; Int4 index; CharPtr string; if (bsp == NULL || userdata == NULL) return; ftp = (FeatureTablePtr) userdata; if (ftp->fp == NULL) return; if (!ftp->show_nucs && ISA_na (bsp->mol)) { return; } if (!ftp->show_prots && ISA_aa (bsp->mol)) { return; } if (ftp->hide_sources) { custom_flags |= HIDE_SOURCE_FEATS; } MemSet ((Pointer) &extra, 0, sizeof (XtraBlock)); ajp = asn2gnbk_setup (bsp, NULL, NULL, FTABLE_FMT, DUMP_MODE, NORMAL_STYLE, 0, 0, custom_flags, &extra); if (ftp->export_only_selected && ! BioseqHasSelectedFeatures (ajp, ftp->hide_sources)) { /* nothing to export */ } else if (ajp != NULL) { for (index = 0; index < ajp->numParagraphs; index++) { bbp = ajp->paragraphArray [index]; if (bbp->blocktype == FEATURE_BLOCK) { if (!ftp->export_only_selected || IsBaseBlockFeatureSelected (bbp)) { string = asn2gnbk_format (ajp, (Int4) index); if (ftp->suppress_protein_ids) { ExciseProteinIDLine (string); } fprintf (ftp->fp, "%s", string); MemFree (string); } } else if (bbp->blocktype == SOURCEFEAT_BLOCK) { if (!ftp->hide_sources && (!ftp->export_only_selected || IsBaseBlockFeatureSelected (bbp))) { string = asn2gnbk_format (ajp, (Int4) index); fprintf (ftp->fp, "%s", string); MemFree (string); } } else { string = asn2gnbk_format (ajp, (Int4) index); fprintf (ftp->fp, "%s", string); MemFree (string); } } } asn2gnbk_cleanup (ajp); }