Example #1
0
static void GetProtListCallback (BioseqPtr bsp, Pointer userdata)
{
  ValNodePtr PNTR   pList;
  SeqFeatPtr        sfp;
  SeqMgrFeatContext fcontext;
  ProtRefPtr        prp;
  AlphaProtPtr      app;
  
  if (bsp == NULL || userdata == NULL || ! ISA_aa (bsp->mol)) return;
  pList = (ValNodePtr PNTR) userdata;
  app = (AlphaProtPtr) MemNew (sizeof (AlphaProtData));
  if (app == NULL) return;
  app->bsp = bsp;
  app->prot_name = NULL;
  
  sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
  if (sfp != NULL && sfp->data.value.ptrvalue != NULL) 
  {
    prp = (ProtRefPtr) sfp->data.value.ptrvalue;
    if (prp->name != NULL)
    {
      app->prot_name = StringSave (prp->name->data.ptrvalue);
    }
    else
    {
      app->prot_name = StringSave (fcontext.label);
    }
  }
  ValNodeAddPointer (pList, 0, app);
}
Example #2
0
static void DoSuggestIntervals (
  BioseqPtr bsp,
  Pointer userdata
)

{
  CharPtr        caret5, caret3;
  CSpeedFlagPtr  cfp;
  Char           id [64];
  SeqLocPtr      loc, slp;
  Boolean        partial5, partial3;
  SeqAnnotPtr    sap;
  SeqFeatPtr     sfp;
  SeqIdPtr       sip;
  Int4           start, stop;

  if (bsp == NULL) return;
  if (! ISA_aa (bsp->mol)) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return;

  sip = SeqIdFindBest (bsp->id, 0);
  if (sip == NULL) return;
  SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);

  sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode);
  if (sap == NULL) return;
  if (sap->type == 1) {
    sfp = (SeqFeatPtr) sap->data;
    if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
      loc = sfp->location;
      if (loc != NULL) {
        fprintf (cfp->ofp, "%s\n", id);
        slp = SeqLocFindNext (loc, NULL);
        while (slp != NULL) {
          start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1;
          stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1;
          caret5 = "";
          caret3 = "";
          CheckSeqLocForPartial (slp, &partial5, &partial3);
          if (partial5) {
            caret5 = "<";
          }
          if (partial3) {
            caret3 = ">";
          }
          fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
          slp = SeqLocFindNext (loc, slp);
        }
      }
    }
  }
  SeqAnnotFree (sap);
}
Example #3
0
static void CollectBioseqsForConversion (BioseqPtr bsp, Pointer userdata)
{
  ValNodePtr PNTR list;
  
  if (bsp == NULL || bsp->repr != Seq_repr_raw || ISA_aa (bsp->mol)) return;
  if (userdata == NULL)
  {
    return;
  }
  list = (ValNodePtr PNTR) userdata;
  
  ValNodeAddPointer (list, 0, bsp);
}
Example #4
0
static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)

{
  LookForIDsPtr  lfip;
  SeqIdPtr       sip;
  TextSeqIdPtr   tsip;

  lfip = (LookForIDsPtr) userdata;
  if (ISA_na (bsp->mol)) {
    lfip->isNuc = TRUE;
  }
  if (ISA_aa (bsp->mol)) {
    lfip->isProt = TRUE;
  }
  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    switch (sip->choice) {
      case SEQID_GENBANK :
      case SEQID_EMBL :
      case SEQID_DDBJ :
        lfip->isGED = TRUE;
        break;
      case SEQID_TPG :
      case SEQID_TPE :
      case SEQID_TPD :
        lfip->isTPA = TRUE;
        break;
      case SEQID_OTHER :
        tsip = (TextSeqIdPtr) sip->data.ptrvalue;
        if (tsip != NULL) {
          if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
            lfip->isNC = TRUE;
          } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
            lfip->isNTorNW = TRUE;
          } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
            lfip->isNTorNW = TRUE;
          }
        }
        break;
      default :
        break;
    }
  }
}
Example #5
0
static void PopulateFetchItemCallback (BioseqPtr bsp, Pointer data)
{
  PopulatePtr pp;
  SeqIdPtr    sip;
  TextSeqIdPtr tsip;
  Char         buffer[15];
  FetchItemPtr fetch_item = NULL;

  if (bsp == NULL || ISA_aa(bsp->mol) || (pp = (PopulatePtr)data) == NULL) {
    return;
  }

  for (sip = bsp->id; sip != NULL && fetch_item == NULL; sip = sip->next) {
    switch (sip->choice) {
      case SEQID_GI:
        printf (buffer, "%d", sip->data.intvalue);
        fetch_item = FindInFetchIndex(buffer);
        break;
      case SEQID_GENBANK :
      case SEQID_EMBL :
      case SEQID_DDBJ :
      case SEQID_TPG :
      case SEQID_TPE :
      case SEQID_TPD :
      case SEQID_OTHER :
        tsip = (TextSeqIdPtr) sip->data.ptrvalue;
        if (tsip != NULL) {
          fetch_item = FindInFetchIndex(tsip->accession);
        }
        break;
      default :
        break;
    }
  }

  if (fetch_item != NULL && fetch_item->index_pos < 0) {
    /* collect field values */
    fetch_item->field_values = CollectBioseqLineValues (bsp, pp->field_list, pp->want_gi);
    fetch_item->index_pos = 0;
  }
}
Example #6
0
static void WriteOneProteinWithProduct (BioseqPtr bsp, Pointer data)
{
    FastaExportOptionsPtr fe;
    SeqFeatPtr            prot;
    SeqMgrFeatContext     fcontext;
    Char                  id [128];

    if (bsp != NULL && ISA_aa (bsp->mol) && (fe = (FastaExportOptionsPtr) data) != NULL) {
        prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &fcontext);
        if (prot == NULL) {
            BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen,
                                 TRUE, FALSE, FALSE);
        } else {
            
            SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
            fprintf (fe->fp, ">%s [prot=%s]\n", id, fcontext.label);
            BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen,
                                 FALSE, FALSE, FALSE);
        }
    }
}
Example #7
0
static void PrintFarQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)

{
	BioseqPtr  bsp;
	FILE       *fp;

	if (IS_Bioseq (sep)) {
		bsp = (BioseqPtr) sep->data.ptrvalue;

		/* WARNING: we're assuming here that asn2fast's quality-score
		   output is DNA-centric, thus protein bioseqs can be ignored
		   in the PrintQualScores callback. --MLC, 5/2000 */

		if (ISA_aa(bsp->mol))
		  return;

		fp = (FILE*) data;
		if (myargs [17].intvalue) {
		  PrintQualityScoresForContig (bsp, FALSE, fp);
		} else {
		  PrintQualityScoresForContig (bsp, TRUE, fp);
		}
	}
}
Example #8
0
/*  WARNING not called and not tested... */
static Int4 FillCSANWithSeq(PCSAN pcsanThis, BioseqPtr pbsq, Int4 iLen)
{
     SeqPortPtr spp = NULL;
     Uint1 code = Seq_code_ncbieaa;
     Uint1 residue;
     Int4 iCount = 0;
     CharPtr pcA;

     if (!pcsanThis) return 0;
     if (!ISA_aa(pbsq->mol)) return 0;
     if (!iLen) return 0;

     spp = SeqPortNew(pbsq, 0, -1, 0, code);
     if (!spp) return 0;
     SeqPortSeek(spp, 0, SEEK_SET);
     pcsanThis->pcSeqAln = (CharPtr)MemNew((size_t) (1+ sizeof(char) * iLen));
     pcA = pcsanThis->pcSeqAln;
     residue = SeqPortGetResidue(spp);
     iCount = 0;
     while ((residue != SEQPORT_EOF) && (residue != '\0') && (iLen < iCount))
      {
            iCount++;
            *pcA = (char) residue;
            pcA++;
            residue = SeqPortGetResidue(spp);
      }
     while (iCount < iLen)
       {
            *pcA = '-';
            pcA++;
            iCount++;
      }
    pcsanThis->pcSeqAln[iLen] = '\0';
    SeqPortFree(spp);
    return iCount;
}
Example #9
0
Int2 LIBCALLBACK HydrophobicFunc (Pointer data)
{
  OMProcControlPtr      ompcp;
  BioseqPtr             bsp = NULL;
  SeqFeatPtr            sfp = NULL;
  WindoW                w;
  GraphViewFormPtr      gvp;
  SeqIdPtr              psip;
  SeqPortPtr            spp;
  FloatHi               scr[24];
  Char                  res[24];

  ompcp = (OMProcControlPtr) data;
  if (ompcp == NULL || ompcp->input_itemtype == 0)
    return OM_MSG_RET_ERROR;

  switch (ompcp->input_itemtype)
  {
    case OBJ_BIOSEQ:
      bsp = (BioseqPtr) ompcp->input_data;
      if (!ISA_aa (bsp->mol))
        return OM_MSG_RET_ERROR;
      break;
    case OBJ_SEQFEAT:
      sfp = (SeqFeatPtr) ompcp->input_data;
      break;
    default:
      return OM_MSG_RET_ERROR;
  }

  if (bsp != NULL)
  {
    w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      if (ReadAAC ("KSkyte.flt", scr, res) != 24)
        return OM_MSG_RET_ERROR;
      gvp->window = 19;
      gvp->type = AA_FILTER_COMP_KYTE;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
      gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res,
                            &(gvp->window), gvp->type);
      SeqPortFree (spp);
      if (gvp->sgp == NULL)
      {

/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else if (sfp != NULL)
  {
    if (sfp->data.choice != SEQFEAT_CDREGION)
      return OM_MSG_RET_ERROR;
    psip = SeqLocId (sfp->product);
    bsp = BioseqFind (psip);
    w = (WindoW) CreateGraphViewForm (-50, -33, "Kyte-Doolittle-phobicity",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      if (ReadAAC ("KSkyte.flt", scr, res) != 24)
        return OM_MSG_RET_ERROR;
      gvp->window = 19;
      gvp->type = AA_FILTER_COMP_KYTE;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
      gvp->sgp = FilterSeq (spp, 0, bsp->length-1, scr, res,
                            &(gvp->window), gvp->type);
      SeqPortFree (spp);
      if (gvp->sgp == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else
  {
    return OM_MSG_RET_ERROR;
  }

  Show (w);
  Select (w);
  return OM_MSG_RET_DONE;
}
Example #10
0
Int2 LIBCALLBACK PCCPredictFunc (Pointer data)
{
  OMProcControlPtr      ompcp;
  BioseqPtr             bsp = NULL;
  SeqFeatPtr            sfp = NULL;
  WindoW                w;
  GraphViewFormPtr      gvp;
  SeqIdPtr              psip;

  ompcp = (OMProcControlPtr) data;
  if (ompcp == NULL || ompcp->input_itemtype == 0)
    return OM_MSG_RET_ERROR;

  switch (ompcp->input_itemtype)
  {
    case OBJ_BIOSEQ:
      bsp = (BioseqPtr) ompcp->input_data;
      if (!ISA_aa (bsp->mol))
        return OM_MSG_RET_ERROR;
      break;
    case OBJ_SEQFEAT:
      sfp = (SeqFeatPtr) ompcp->input_data;
      break;
    default:
      return OM_MSG_RET_ERROR;
  }

  if (bsp != NULL)
  {
    w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      gvp->window = 22;
      gvp->type = AA_PCC;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else if (sfp != NULL)
  {
    if (sfp->data.choice != SEQFEAT_CDREGION)
      return OM_MSG_RET_ERROR;
    psip = SeqLocId (sfp->product);
    bsp = BioseqFind (psip);
    w = (WindoW) CreateGraphViewForm (-50, -33, "Predict coiled-coil",
                                      bsp, GRAPH_FILTER);

    if ((gvp = (GraphViewFormPtr) GetObjectExtra (w)) == NULL)
    {
/*      w = Remove (w); */
      return OM_MSG_RET_ERROR;
    }
    else
    {
      gvp->graphtype = GRAPH_FILTER;
      gvp->window = 22;
      gvp->type = AA_PCC;
      gvp->entityID = ompcp->input_entityID;
      gvp->itemID = ompcp->input_itemID;
      if ((gvp->sgp = PCCProc (bsp, NULL, gvp->window)) == NULL)
      {
/*        w = Remove (w); */
        return OM_MSG_RET_ERROR;
      }
      else
      {
        BioseqPtrToGraphViewForm (gvp->form, gvp->sgp);
      }
    }
  }
  else
  {
    return OM_MSG_RET_ERROR;
  }

  Show (w);
  Select (w);
  return OM_MSG_RET_DONE;
}
Example #11
0
static void DoProteins (BioseqPtr bsp, Pointer userdata)

{
  Char               buf [6];
  SeqMgrFeatContext  fcontext;
  Boolean            firstIsSig = FALSE;
  Int4               left = 0, right = 0;
  ScanDataPtr        sdp;
  SeqFeatPtr         sfp, last = NULL;
  SeqInt             sint;
  SeqPortPtr         spp;
  ValNode            vn;

  if (bsp == NULL) return;
  if (! ISA_aa (bsp->mol)) return;

  sdp = (ScanDataPtr) userdata;

  sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
  while (sfp != NULL) {
    if (fcontext.featdeftype == FEATDEF_mat_peptide_aa ||
        fcontext.featdeftype == FEATDEF_sig_peptide_aa ||
        fcontext.featdeftype == FEATDEF_transit_peptide_aa) {
      if (last != NULL) {
        if (fcontext.left <= right) {
          if (firstIsSig && fcontext.left == right &&
              fcontext.featdeftype != FEATDEF_sig_peptide_aa) {

            buf [0] = '\0';

            if (right >= 4) {
              MemSet ((Pointer) &vn, 0, sizeof (ValNode));
              vn.choice = SEQLOC_INT;
              vn.data.ptrvalue = &sint;

              MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
              sint.id = SeqLocId (sfp->location);

              sint.from = right - 3;
              sint.to = right;
              sint.strand = Seq_strand_plus;

              spp = SeqPortNewByLoc (&vn, Seq_code_ncbieaa);
              if (spp != NULL) {
                SeqPortRead (spp, (BytePtr) buf, 4);
                SeqPortFree (spp);
              }
              buf [4] = '\0';
            }

            PrintFeatureMessage (sfp, sdp, "SP", buf);
          } else {
            PrintFeatureMessage (sfp, sdp, "OV", NULL);
          }
        }
      } else {
        last = sfp;
        left = fcontext.left;
        right = fcontext.right;
        if (fcontext.featdeftype == FEATDEF_sig_peptide_aa) {
          firstIsSig = TRUE;
        }
      }
    }
    sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
  }
}
Example #12
0
Int2 Main()
{
   AsnIoPtr           aip;
   BioseqPtr          bsp;
   Pointer            dataptr;
   Uint2              datatype;
   Boolean            found;
   SPI_mRNAToHerdPtr  h_head;
   SPI_mRNAToHerdPtr  h_prev;
   SPI_mRNAToHerdPtr  hptr;
   FILE               *ifp;
   Boolean            isGIlist;
   Char               line[60];
   Boolean            lowercase;
   SeqLocPtr          lcaseloc;
   FILE               *ofp;
   FILE               *ofp2;
   SeqAlignPtr        sap;
   SeqAnnotPtr        sanp;
   SeqEntryPtr        sep;
   FILE               *sfp;
   SeqIdPtr           sip;
   SeqLocPtr          slp;
   SPI_bsinfoPtr      spig;
   SPI_bsinfoPtr      spig_head;
   SPI_bsinfoPtr      spig_prev;
   SPI_bsinfoPtr      spim;
   SPI_bsinfoPtr      spim_head;
   SPI_bsinfoPtr      spim_prev;
   SPI_OptionsPtr     spot;
   SPI_RegionInfoPtr  srip = NULL;
   SPI_RegionInfoPtr  srip_head;
   SPI_RegionInfoPtr  srip_prev;
   CharPtr            str;
   CharPtr            txt;

   ID1BioseqFetchEnable("spidey", FALSE);
   LocalSeqFetchInit(FALSE);
   /* standard setup */
   ErrSetFatalLevel (SEV_MAX);
   ErrClearOptFlags (EO_SHOW_USERSTR);
   UseLocalAsnloadDataAndErrMsg ();
   ErrPathReset ();
   if (! AllObjLoad ())
   {
      Message (MSG_FATAL, "AllObjLoad failed");
      return 1;
   }
   if (! SubmitAsnLoad ())
   {
      Message (MSG_FATAL, "SubmitAsnLoad failed");
      return 1;
   }
   if (! FeatDefSetLoad ())
   {
      Message (MSG_FATAL, "FeatDefSetLoad failed");
      return 1;
   }
   if (! SeqCodeSetLoad ())
   {
      Message (MSG_FATAL, "SeqCodeSetLoad failed");
      return 1;
   }
   if (! GeneticCodeTableLoad ())
   {
      Message (MSG_FATAL, "GeneticCodeTableLoad failed");
      return 1;
   }
   if (!GetArgs("SPIDEY", NUMARGS, myargs))
      return 0;
   /* set the error message level high to suppress warnings from BLAST */
   isGIlist = (Boolean)myargs[MYARGGILIST].intvalue;
   txt = myargs[MYARGGENFILE].strvalue;
   ifp = FileOpen(txt, "r");
   spig_head = NULL;
   if (ifp == NULL)
   {
      bsp = SPI_GetBspFromGIOrAcc(txt);
      if (bsp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Can't open genomic input file\n");
         return -1;
      } else
      {
         spig_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
         spig_head->bsp = bsp;
      }
   }
   if (spig_head == NULL)
   {
      spig_prev = NULL;
      /* read in the genomic sequence(s) first and put them into bsinfo structures */
      while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL)
      {
         if (datatype == OBJ_BIOSEQ)
         {
            spig = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
            spig->bsp = (BioseqPtr)dataptr;
            if (spig_head == NULL)
               spig_head = spig_prev = spig;
            else
            {
               spig_prev->next = spig;
               spig_prev = spig;
            }
         } else if (datatype == OBJ_SEQENTRY)
         {
            sep = (SeqEntryPtr)dataptr;
            SeqEntryExplore(sep, &spig_head, SPI_FindAllNuc);
         }
      }
      FileClose(ifp);
   }
   if (spig_head == NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in genomic file\n");
      return -1;
   } else if (ISA_aa(spig_head->bsp->mol))
   {
      ErrPostEx(SEV_ERROR, 0, 0, "At least one of the genomic sequences appears to be a protein.\n");
      return -1;
   }
   if (spig_head->next != NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "This version can only process one genomic sequence at a time.  Only the first sequence in this file will be used.\n");
      spig_head->next = NULL;
   }
   spim_head = spim_prev = NULL;
   txt = myargs[MYARGMRNAFILE].strvalue;
   ifp = FileOpen(txt, "r");
   if (ifp == NULL)
   {
      bsp = SPI_GetBspFromGIOrAcc(txt);
      if (bsp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Can't open mRNA input file\n");
         return -1;
      } else
      {
         spim_head = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
         spim_head->bsp = bsp;
      }
   }
   if (spim_head == NULL)
   {
      lowercase = (Boolean)myargs[MYARGMASKED].intvalue;
      lcaseloc = NULL;
      /* if the mRNA has lowercase masking, read it in carefully to record the masking */
      if (lowercase == TRUE)
      {
         while ((sep = FastaToSeqEntryForDb(ifp, TRUE, NULL, TRUE, NULL, NULL, &lcaseloc)) != NULL)
         {
            SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc);
            if (lcaseloc != NULL)  /* put masking info into the bsinfo structure */
            {
               spim = spim_head;
               sip = SeqLocId(lcaseloc);
               found = FALSE;
               while (spim != NULL && !found)
               {
                  if (SeqIdComp(sip, spim->bsp->id) == SIC_YES)
                  {
                     found = TRUE;
                     spim->lcaseloc = lcaseloc;
                  }
                  spim = spim->next;
               }
               lcaseloc = NULL;
            }
         }
      } else if (isGIlist) /* mRNA file is a list of GIs, must fetch the bioseqs */
      {
         str = ReadALine(line, sizeof(line), ifp);
         while (str != NULL)
         {
            bsp = SPI_GetBspFromGIOrAcc(str);
            if (bsp != NULL)
            {
               spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
               spim->bsp = bsp;
               if (spim_head == NULL)
                  spim_head = spim_prev = spim;
               else
               {
                  spim_prev->next = spim;
                  spim_prev = spim;
               }
            }
            str = ReadALine(line, sizeof(line), ifp);
         }
      } else /* mRNAs are FASTA or ASN.1, read them all in */
      {
         while ((dataptr = ReadAsnFastaOrFlatFile (ifp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL)
         {
            if (datatype == OBJ_BIOSEQ)
            {
               spim = (SPI_bsinfoPtr)MemNew(sizeof(SPI_bsinfo));
               spim->bsp = (BioseqPtr)dataptr;
               if (spim_head == NULL)
                  spim_head = spim_prev = spim;
               else
               {
                  spim_prev->next = spim;
                  spim_prev = spim;
               }
            } else if (datatype == OBJ_SEQENTRY)
            {
               sep = (SeqEntryPtr)dataptr;
               SeqEntryExplore(sep, &spim_head, SPI_FindAllNuc);
            }
         }
      }
      FileClose(ifp);
   }
   if (spim_head == NULL)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "No valid bioseqs in mRNA file\n");
      return -1;
   } else if (ISA_aa(spim_head->bsp->mol))
   {
      ErrPostEx(SEV_ERROR, 0, 0, "At least one of the mRNA sequences appears to be a protein\n");
      return -1;
   }
   txt = myargs[MYARGTABLE].strvalue;
   if (txt != NULL)
   {
      ifp = FileOpen(txt, "r");
      if (ifp == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Unable to open table file\n");
         return -1;
      }
      SPI_ReadFeatureTable(ifp, spim_head);
      spim = spim_head;
      while (spim != NULL)
      {
         if (spim->lcaseloc != NULL)
         {
            slp = (SeqLocPtr)ValNodeNew(NULL);
            slp->choice = SEQLOC_MIX;
            slp->data.ptrvalue = (Pointer)spim->lcaseloc;
            spim->lcaseloc = slp;
         }
         spim = spim->next;
      }
   }
   spim = spim_head;
   spot = (SPI_OptionsPtr)MemNew(sizeof(SPI_Options));
   spot->printaln = myargs[MYARGPRALIGN].intvalue;
   txt = myargs[MYARGOUTFILE].strvalue;
   ofp = FileOpen(txt, "w");
   if (ofp == NULL && spot->printaln != 3)
   {
      ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file\n");
      return -1;
   }
   if (spot->printaln >= 2)
   {
      txt = myargs[MYARGALNFILE].strvalue;
      ofp2 = FileOpen(txt, "a");
      if (ofp2 == NULL)
      {
         ErrPostEx(SEV_ERROR, 0, 0, "Unable to open output file 2\n");
         return -1;
      }
   } else
      ofp2 = NULL;
   /** ErrSetMessageLevel(SEV_MAX); **/
   spot->firstpasseval = myargs[MYARG1STEVAL].floatvalue;
   spot->secpasseval = myargs[MYARG2NDEVAL].floatvalue;
   spot->thirdpasseval = myargs[MYARG3RDEVAL].floatvalue;
   spot->numreturns = myargs[MYARGNUMMOD].intvalue;
   spot->idcutoff = myargs[MYARGIDCUT].intvalue;
   spot->lencutoff = myargs[MYARGLENCUT].intvalue;
   spot->interspecies = (Boolean)myargs[MYARGSPEC].intvalue;
   spot->printasn = (Boolean)myargs[MYARGASN].intvalue;
   spot->fetchcds = (Boolean)myargs[MYARGGETCDS].intvalue;
   /*spot->ace = (Boolean)myargs[MYARGACEDB].intvalue;*/
   spot->from = myargs[MYARGFROM].intvalue;
   spot->to = myargs[MYARGTO].intvalue;
   spot->makemult = (Boolean)myargs[MYARGMULT].intvalue;
   /*KSK*/
   spot->bigintron = (Boolean)myargs[MYARGXL].intvalue; 
   spot->bigintron_size = myargs[MYARGXL_SIZE].intvalue;
   spot->repeat_db_file = myargs[MYARGREPDB].strvalue;
   txt = myargs[MYARGORG].strvalue;
   if (!StringICmp(txt, "d") || !StringICmp(txt, "D")){
       spot->organism = SPI_FLY;
   }
   else if (!StringICmp(txt, "p") || !StringICmp(txt, "P")){
       spot->organism = SPI_PLANT;
   }
   else if (!StringICmp(txt, "c") || !StringICmp(txt, "C")){
       spot->organism = SPI_CELEGANS;
   }
   else if (!StringICmp(txt, "m") || !StringICmp(txt, "M")){
       spot->organism = SPI_DICTY;
   }
   else {
       spot->organism = SPI_VERTEBRATE;
   }
   sap = NULL;
   if (spot->printasn)
      spot->sap_head = &sap;
   txt = myargs[MYARGSTRAND].strvalue;
   if (txt != NULL)
   {
      if (StrChr(txt, 'p') || StrChr(txt, 'P'))
         spot->strand = Seq_strand_plus;
      else
         spot->strand = Seq_strand_minus;
   } else
      spot->strand = Seq_strand_both;
   /*txt = myargs[MYARGDRAFTFILE].strvalue;
   if (txt != NULL)
      spot->draftfile = StringSave(txt);*/
   txt = myargs[MYARGDSPLICE].strvalue;
   if (txt != NULL)
   {
      sfp = FileOpen(txt, "r");
      SPI_GetSpliceInfo(spot, sfp, TRUE);
      FileClose(sfp);
   }
   txt = myargs[MYARGASPLICE].strvalue;
   if (txt != NULL)
   {
      sfp = FileOpen(txt, "r");
      SPI_GetSpliceInfo(spot, sfp, FALSE);
      FileClose(sfp);
   }
   h_head = h_prev = NULL;
   srip_head = srip_prev = NULL;
   while (spim != NULL)
   {
      spot->lcaseloc = spim->lcaseloc;
      if (spot->draftfile == NULL)
         srip = SPI_AlnSinglemRNAToGen(spig_head, spim, ofp, ofp2, spot);
      else
      {
         hptr = SPI_AlnSinglemRNAToPieces(spig_head, spim, ofp, ofp2, spot);
         if (h_head != NULL)
         {
            h_prev->next = hptr;
            h_prev = hptr;
         } else
            h_head = h_prev = hptr;
      }
      if (srip != NULL)
      {
         if (srip_head != NULL)
         {
            srip_prev->next = srip;
            srip_prev = srip;
         } else
            srip_head = srip_prev = srip;
      }
      spim = spim->next;
   }
   if (spot->makemult)
   {
      SPI_MakeMultipleAlignment(srip_head);
      SPI_PrintMultipleAlignment(srip_head, FALSE, spig_head->bsp, ofp);
      SPI_RegionListFree(srip_head);
   } else
      SPI_RegionListFree(srip_head);
   /* create the ASN.1 output, if requested; need to use the continuous alignment */
   /* that was generated */
   if (spot->printasn && *(spot->sap_head) != NULL && spot->draftfile == NULL)
   {
      sanp = SeqAnnotForSeqAlign(*(spot->sap_head));
      txt = myargs[MYARGASNFILE].strvalue;
      aip = AsnIoOpen(txt, "w");
      SeqAnnotAsnWrite(sanp, aip, NULL);
      AsnIoClose(aip);
      SeqAlignSetFree(*(spot->sap_head));
   }
   FileClose(ofp);
   FileClose(ofp2);
   SPI_OptionsFree(spot);
   SPI_bsinfoFreeList(spim_head);
   SPI_bsinfoFreeList(spig_head);
   LocalSeqFetchDisable();
   ID1BioseqFetchDisable();
   return 0;
}
Example #13
0
static void CcpProc (ButtoN b)
{
  Boolean     flagHaveNet;

  SeqEntryPtr sep;
  Int4        i, gi;
  CharPtr     fastafile;
  FILE        *fiop;
  CharPtr     title;

  GatherScopePtr      gsp;
  XOSPtr              xosp;
  XISPtr              xisp;

  FloatHi        minscore, maxscore;
  FloatHiPtr     pcs, pccscr;
  SeqGraphPtr    sgp;

  WindoW     w;
  PaneL      p;

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  if (ReadPccData (xosp->pccp) == 0)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
               "Could not open or read %s data file", xosp->pccp->pccdatafile);
    ErrShow ();
    return;
  }

  gsp = xosp->gsp;
  gi = xosp->gi;
  fastafile = xosp->filename;

  if (gi > 0)
  {
    if (!EntrezInit ("ccpv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else if (fastafile != NULL)
  {
    if ((fiop = FileOpen (fastafile, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
      return;
    }
    sep = FastaToSeqEntry (fiop, FALSE);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No seqentry");
    ErrShow ();
    xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
    return;
  }
  else
  {
    xosp->sep = sep;
    xosp->bsp = NULL;
    xosp->gi = gi;
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq,
                    (Pointer) gsp);
    if (xosp->bsp != NULL)
    {
      if (ISA_aa (xosp->bsp->mol))
      {
        pcs = pccscr = PredictCCBioseq (xosp->bsp, 0, xosp->bsp->length-1,
                                        xosp->pccp);
        xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
        if (pccscr == NULL)
        {
          return;
        }
        for (i = 0; i < xosp->bsp->length; i++)
        {
          *pcs *= 100.0;
          pcs++;
        }
        minscore = 0.0;
        maxscore = 100.0;

        title = FastaTitle (xosp->bsp, "CCP: >", NULL);

        if ((sgp = SeqGraphNew ()) == NULL)
        {
          MemFree (pccscr);
          MemFree (title);
          return;
        }

        xisp = MemNew (sizeof (XIS));
        xisp->sgp = sgp;
        xisp->Xscale = 1;
        xisp->Yscale = 1;
        xisp->Xaxislen = 250;
        xisp->Yaxislen = 100;
        xisp->Xprelen = 0;
        xisp->Yprelen = 50;
        xisp->Xpostlen = 50;
        xisp->Ypostlen = 50;
        sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                                 xosp->bsp->id);
        sgp->title = title;
        sgp->flags[2] = 1;
        sgp->numval = xosp->bsp->length;
        sgp->max.realvalue = maxscore;
        sgp->min.realvalue = minscore;
        sgp->values = (Pointer) pccscr;

        w = FixedWindow (-50, -50, -10, -10, "Ccp", CloseGraphWindowProc);
        SetObjectExtra (w, xisp, NULL);
        p = SimplePanel (w,
                         (Int2) (xisp->Xprelen+xisp->Xaxislen+xisp->Xpostlen),
                         (Int2) (xisp->Yprelen+xisp->Yaxislen+xisp->Ypostlen),
                         DrawGraph);
        SetPanelClick (p, NULL, NULL, NULL, CloseGraphPanelProc);
        RealizeWindow (w);
        Show (w);
      }
      else
      {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not an amino acid Bioseq");
        ErrShow ();
      }
    }
    else
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq in SeqEntry");
      ErrShow ();
    }
    xosp->sep = sep = SeqEntryFree (sep);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);
  return;
}
Example #14
0
Int2 Main (void)
{
    Int2        argcount;
    Boolean     flagHaveNet;

    Int4        gi;
    SeqEntryPtr sep;
    ComPatPtr   cpp, cpph = NULL;
    SeqAlignPtr sap, sapn;
    StdSegPtr   ssp;
    SeqLocPtr   slp, slpn;
    Int4        start, stop;

    FILE        *fiop;
    Char        fastafile[256], namesfile[256];
    CharPtr     title;
    CharPtr     taxon;

    FloatHi     mw;
    ValNodePtr  namelist = NULL;

    static CharPtr pattern_file = "ncbipros.dat";
    static CharPtr protease_file = "ncbiendo.dat";
    static CharPtr names_file = "ncbipnam.dat";

    static GatherScope  gs;
    GatherScopePtr      gsp;
    static Gather_PBS   gpbs;
    Gather_PBSPtr       gpbsp;

#ifndef NO_TAX_NET
    Int4   i;
    static Char taxdata[8];
    static Gather_TaxId gti;
    Gather_TaxIdPtr     gtip;
#endif

#ifndef NO_TAX_NET
    Int2   ia=4, ib=5, ic=6, id=7, ie=8, ig=9, ih=10, ii=11;
#else
    Int2         ib=4, ic=5, id=6, ie=7, ig=8, ih=9,  ii=10;
#endif

    argcount = sizeof (myargs) / sizeof (Args);
    if (!GetArgs ("ProSiteSearch", argcount, myargs))
        return 1;

    if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
                   "No gi or FastA file given :: for help :   srchaa -");
        ErrShow ();
        exit (1);
    }

    gsp = &gs;

#ifndef NO_TAX_NET
    gtip = &gti;
#endif
    gpbsp = &gpbs;

    MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
    MemSet ((Pointer) gsp->ignore, (int) (TRUE),
            (size_t) (OBJ_MAX * sizeof (Boolean)));

    gsp->ignore[OBJ_SEQDESC] = TRUE;
    gsp->ignore[OBJ_BIOSEQ] = FALSE;

    gpbsp->bsp = NULL;

    gi = myargs[0].intvalue;
    if (myargs[1].strvalue != NULL)
        StrCpy (fastafile, myargs[1].strvalue);
    else
        fastafile[0] = '\0';

    if (gi > 0)
    {
        if (!EntrezInit ("srchaa", FALSE, &flagHaveNet))
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                       "Entrez init failed");
            ErrShow ();
            exit (1);
        }
    }

#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
    {
        if (!TaxArchInit ())
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Taxonomy init failed");
            ErrShow ();
            exit (1);
        }
    }
#endif

    fiop = NULL;
    if (gi > 0)
    {
        sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
    }
    else
    {
        if ((fiop = FileOpen (fastafile, "r")) == NULL)
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Failed to open FastA file: %s", fastafile);
            ErrShow ();
            exit (1);
        }
        sep = FastaToSeqEntry (fiop, FALSE);
    }

    if (sep == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
                   "No seqentry found");
        ErrShow ();
        exit (1);
    }

    while (sep != NULL)
    {
        gsp->ignore[OBJ_SEQDESC] = TRUE;
        gsp->ignore[OBJ_BIOSEQ] = FALSE;
        gpbsp->bsp = NULL;
        gpbsp->gi = gi;
        GatherSeqEntry (sep, (Pointer) gpbsp, GetBioseq, (Pointer) gsp);

        taxon = NULL;
#ifndef NO_TAX_NET
        if (myargs[ia].intvalue)
        {
            for (i = 0; i < 8; i++)
                taxdata[i] = '-';
            taxon = taxdata;

            gsp->ignore[OBJ_SEQDESC] = FALSE;
            gsp->ignore[OBJ_BIOSEQ] = TRUE;

            gtip->taxid = 0;
            GatherSeqEntry (sep, (Pointer) gtip, GetTaxId, (Pointer) gsp);

            if (gtip->taxid != 0)
                WhatOrg (gtip->taxid, taxon);
            else
                taxon = NULL;
        }
#endif

        if (gpbsp->bsp != NULL)
        {
            if (ISA_aa (gpbsp->bsp->mol))
            {
                if (cpph == NULL)
                {
                    namesfile[0] = '\0';
                    if (myargs[id].intvalue)
                        StrCpy (namesfile, names_file);
                    if (myargs[ie].strvalue != NULL)
                        StrCpy (namesfile, myargs[ie].strvalue);

                    if (myargs[ig].strvalue != NULL)
                    {
                        if ((cpph = CompilePattern (myargs[ig].strvalue, 1)) != NULL)
                            StrCpy (cpph->name, "User Pattern");
                    }
                    else
                    {
                        namelist = ReadPatternNames (namesfile);
                        if (myargs[ib].intvalue)
                            cpph = ReadPrositePattern (protease_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, NULL);
                        else
                            cpph = ReadPrositePattern (pattern_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, namelist);
                    }
                }

                if (!(Boolean) myargs[ih].intvalue)
                {
                    title = FastaTitle (gpbsp->bsp, ">", NULL);
                    printf ("%s\n", title);
                    MemFree (title);
                }
                cpp = cpph;
                while (cpp != NULL)
                {
                    sap = PatternMatchBioseq (gpbsp->bsp, cpp,
                                              (Int4)myargs[ii].intvalue);
                    if (myargs[ib].intvalue)
                    {
                        printf (">%s\n", cpp->name);
                        if (sap != NULL)
                            printf ("   Start     Stop       M.W.\n");
                    }
                    if (myargs[ib].intvalue)
                    {
                        EmbedMolecularWeightInfo (sap, gpbsp->bsp);
                        if (myargs[ic].intvalue)
                            URK_SeqAlignSortByMolWt (&sap);
                        while (sap != NULL)
                        {
                            ssp = (StdSegPtr) sap->segs;
                            slp = ssp->loc;
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            mw = ssp->scores->value.realvalue;
                            printf ("%8ld %8ld    %9.2f\n",
                                    (long) start+1, (long) stop+1, mw);
                            sapn = sap->next;
                            SeqAlignFree (sap);
                            sap = sapn;
                        }
                    }
                    else
                    {
                        slp = MatchSa2Sl (&sap);
                        if (myargs[ih].intvalue && slp != NULL)
                        {
                            title = FastaTitle (gpbsp->bsp, ">", NULL);
                            printf ("%s\n", title);
                            MemFree (title);
                        }
                        while (slp != NULL)
                        {
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            printf ("%8ld %8ld    %s\n",
                                    (long) start+1, (long) stop+1, cpp->name);
                            slpn = slp->next;
                            SeqLocFree (slp);
                            slp = slpn;
                        }
                    }
                    cpp = cpp->nextpattern;
                }
            }
            else
            {
                ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
                           "Not a protein bioseq");
                ErrShow ();
                exit (1);
            }
        }
        else
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                       "No bioseq found");
            ErrShow ();
            exit (1);
        }
        SeqEntryFree (sep);
        sep = NULL;
        if (fiop != NULL)
            sep = FastaToSeqEntry (fiop, FALSE);
    }

    ComPatFree (cpph);
    ValNodeFreeData (namelist);
    FileClose (fiop);
    if (gi > 0)
        EntrezFini ();
#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
        TaxArchFini ();
#endif
    return 0;
}
Example #15
0
static void VSMExportFeatureTableBioseqCallback (BioseqPtr bsp, Pointer userdata)

{
  FeatureTablePtr ftp;
  CstType         custom_flags = 0;
  Asn2gbJobPtr    ajp;
  BaseBlockPtr    bbp;
  XtraBlock       extra;
  Int4            index;
  CharPtr         string;
  
  if (bsp == NULL || userdata == NULL) return;
  
  ftp = (FeatureTablePtr) userdata;
  if (ftp->fp == NULL) return;
  if (!ftp->show_nucs && ISA_na (bsp->mol))
  {
    return;
  }
  if (!ftp->show_prots && ISA_aa (bsp->mol))
  {
    return;
  }
  if (ftp->hide_sources)
  {
    custom_flags |= HIDE_SOURCE_FEATS;
  }
  MemSet ((Pointer) &extra, 0, sizeof (XtraBlock));
  ajp = asn2gnbk_setup (bsp, NULL, NULL, FTABLE_FMT, DUMP_MODE, NORMAL_STYLE,
                           0, 0, custom_flags, &extra);
                           
  if (ftp->export_only_selected 
      && ! BioseqHasSelectedFeatures (ajp, ftp->hide_sources))
  {
    /* nothing to export */
  }                    
  else if (ajp != NULL) 
  {
    for (index = 0; index < ajp->numParagraphs; index++) 
    {
      bbp = ajp->paragraphArray [index];
      if (bbp->blocktype == FEATURE_BLOCK)
      {
        if (!ftp->export_only_selected || IsBaseBlockFeatureSelected (bbp))
        {
          string = asn2gnbk_format (ajp, (Int4) index);
          if (ftp->suppress_protein_ids)
          {
            ExciseProteinIDLine (string);
          }
          fprintf (ftp->fp, "%s", string);
          MemFree (string);
        }
      }
      else if (bbp->blocktype == SOURCEFEAT_BLOCK)
      {
        if (!ftp->hide_sources 
            && (!ftp->export_only_selected || IsBaseBlockFeatureSelected (bbp)))
        {
          string = asn2gnbk_format (ajp, (Int4) index);
          fprintf (ftp->fp, "%s", string);
          MemFree (string);
        }
      }
      else
      {
        string = asn2gnbk_format (ajp, (Int4) index);
        fprintf (ftp->fp, "%s", string);
        MemFree (string);
      }
    }
  }
  asn2gnbk_cleanup (ajp);
}