Esempio n. 1
0
Int2 Main(void)
{
	AsnIoPtr aip;
	SeqEntryPtr sep;
	BytePtr buf;
	Uint2 size = 32000, count;
	AsnIoMemPtr aimp;

					/* check command line arguments */

	if ( ! GetArgs("testmem",NUMARG, myargs))
		return 1;

					/* load the sequence alphabets  */
					/* (and sequence parse trees)   */
	if (! SeqEntryLoad())
		ErrShow();

					/* open the ASN.1 input file in the right mode */

	if ((aip = AsnIoOpen (myargs[0].strvalue, myargs[1].intvalue?"rb":"r"))
          == NULL)
		ErrShow();
	sep = SeqEntryAsnRead(aip, NULL);   	/* read the entry */
	AsnIoClose(aip);

	SeqEntryPack(sep);						/* pack it */

	buf = MemNew(size);                     /* allocate a buffer */
	aimp = AsnIoMemOpen("wb", buf, size);	/* open to write asn1 to it */
	SeqEntryAsnWrite(sep, aimp->aip, NULL);	/* write it */
	AsnIoFlush(aimp->aip);					/* flush it */
	count = aimp->count;					/* record how many bytes in it */
	AsnIoMemClose(aimp);					/* close it */

	SeqEntryFree(sep);						/* release the object */

	aimp = AsnIoMemOpen("rb", buf, count);	/* open to read from buffer */
	sep = SeqEntryAsnRead(aimp->aip, NULL);	/* read it */
	AsnIoMemClose(aimp);					/* close it */
	MemFree(buf);							/* free the buffer */

				  				/* open the output file */

	aip = AsnIoOpen (myargs[2].strvalue, "w");
	SeqEntryAsnWrite(sep, aip, NULL);	/* print it */
	AsnIoClose(aip);

	SeqEntryFree(sep);

	return(0);
}
Esempio n. 2
0
static void CleanUpXOS (XOSPtr xosp)
{
  SeqLocPtr slp, slpn;
  SeqIdPtr  id;

  if (xosp != NULL)
  {
    xosp->gsp  = NULL;          /* is static */
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->bsp = NULL;           /* should be in seqentry */
    xosp->gcd = NULL;
    xosp->gcdi = NULL;
    slp = xosp->slpa;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpa = slp;
    slp = xosp->slpb;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpb = slp;
    slp = xosp->slps;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slps = slp;
    slp = xosp->slpk;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpk = slp;
  }
  return;
}
Esempio n. 3
0
static void PromoteToSeqEntry (Uint2 entityID, Uint2 datatype, Pointer dataptr)

{
    BioseqPtr     bsp;
    BioseqSetPtr  bssp;
    SeqEntryPtr   sep;

    sep = GetTopSeqEntryForEntityID (entityID);
    if (sep != NULL) return;
    sep = SeqEntryNew ();
    if (sep == NULL) return;
    if (datatype == OBJ_BIOSEQ) {
        bsp = (BioseqPtr) dataptr;
        sep->choice = 1;
        sep->data.ptrvalue = bsp;
        SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
    } else if (datatype == OBJ_BIOSEQSET) {
        bssp = (BioseqSetPtr) dataptr;
        sep->choice = 2;
        sep->data.ptrvalue = bssp;
        SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
    } else {
        sep = SeqEntryFree (sep);
    }
}
Esempio n. 4
0
static void ReadPubSeqRecords (LinkSetPtr lsp, Int2 db, Boolean makeCDS, FILE *fp)

{
  Int4              count;
  Uint2             entityID;
  Uint1             format = TYP_NT;
  Int2              num;
  SeqEntryPtr PNTR  list;  /* see <objsset.h> */
  SeqEntryPtr       sep;

  if (lsp == NULL || lsp->num == 0 || lsp->uids == NULL) return;
  list = (SeqEntryPtr PNTR) MemNew (lsp->num * sizeof (SeqEntryPtr));
  if (list != NULL) {

    /* EntrezSeqEntryListGet get a maximum of 32767 records at once */
    num = EntrezSeqEntryListGet (list, lsp->num, lsp->uids, 0, FALSE);

    if (db == TYP_AA) {
      format = GENPEPT_FMT;
    } else if (db == TYP_NT) {
      format = GENBANK_FMT;
    }

    for (count = 0; count < num; count++) {
      sep = list [count];
      if (sep != NULL) {

        /* indexing of features */
        entityID = SeqMgrIndexFeatures (0, sep);

        if (makeCDS && db == TYP_NT) {

          /* uses new explore functions to extract coding regions */
          SeqMgrExploreBioseqs (entityID, NULL, (Pointer) fp, ExtractCodingRegions, TRUE, FALSE, FALSE);

        } else {

        /* the following call saves the record in GenBank or GenPept format */
          if (SeqEntryToFlat (sep, fp, format, RELEASE_MODE)) {
            fprintf (fp, "\n\n");
          }
        }
      }
    }

    for (count = 0; count < lsp->num; count++) {
      list [count] = SeqEntryFree (list [count]);
    }
    MemFree (list);
  }
}
Esempio n. 5
0
/*****************************************************************************
*
*   opens a file and reads SeqEntrys
*   calls ProcessSeqEntry to do the actual work on it
*
*****************************************************************************/
static void ProcessFile (FileListPtr flp, CharPtr root, CharPtr outputfile)
{
  CASN_Handle  casnh;
  FILE         *fp;
  Char         path [PATH_MAX];
  SeqEntryPtr  sep;
  CASN_Type    type;
  Char buf[40];
  Int4 ctr = 0;

  if (flp != NULL) {
    fp = FileOpen (outputfile, "a");
    if (fp != NULL) {
	  if (pmon != NULL)
	  {
	  	sprintf(path, "Opening [%s]", flp->fname);
		MonitorStrValue(pmon, path);
	  }
      StringCpy (path, root);
      FileBuildPath (path, flp->fdir, NULL);
      FileBuildPath (path, NULL, flp->fname);
      if ((casnh = CASN_Open (path)) != NULL) {
        if (! is_na) {
          type = CASN_Type_aa;
        } else {
          type = CASN_Type_nt;
        }
        if (CASN_DocType (casnh) == type) {
          while ((sep = CASN_NextSeqEntry (casnh)) != NULL) {
            if (pmon != NULL)
            {
		ctr++;
		sprintf(buf, "Processing %s Entry %ld", flp->fname,(long)ctr);
                MonitorStrValue(pmon, buf);
	    }
            ProcessSeqEntry (sep, fp);
            SeqEntryFree (sep);
          }
        }
        CASN_Close (casnh);
      }
	  else
		Message(MSG_ERROR, "Can't open [%s]", path);
      FileClose (fp);
    } else {
      Message (MSG_FATAL, "Unable to reopen output file [%s]", outputfile);
    }
  }
}
Esempio n. 6
0
static void CloseConsortParentProc (WindoW w)
{
  XOSPtr  xosp;

  if ((xosp = (XOSPtr) GetObjectExtra (w)) != NULL)
  {
    xosp->gsp  = NULL;          /* is static */
    MemFree (xosp->filename);
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->bsp = NULL;           /* should be in seqentry */
    MemFree (xosp);
  }
  Remove (w);
  QuitProgram ();
  return;
}
Esempio n. 7
0
static Boolean OkToReplaceId (SeqIdPairPtr pair, CharPtr seq_str, char *has_errors)
{
  Boolean rval = FALSE;
  SeqEntryPtr fetched_sep, old_scope;
  BioseqPtr   bsp_replace;

  if (StringHasNoText (seq_str)) {
    rval = FALSE;
  }

  if (pair == NULL || pair->sip_replace == NULL) {
    rval = FALSE;
  } else if ((fetched_sep = FetchRead (pair->sip_replace)) == NULL) {
    rval = FALSE;
    ReportInvalidReplacement (pair->sip_replace, "Unable to fetch far sequence", has_errors);
  } else {
    old_scope = SeqEntrySetScope (fetched_sep);
    bsp_replace = BioseqFind (pair->sip_replace);
    SeqEntrySetScope (old_scope);
    if (bsp_replace == NULL) {
      rval = FALSE;
      ReportInvalidReplacement (pair->sip_replace, "Unable to locate far sequence after fetch", has_errors);
    } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_plus)) {
      /* matches */
      rval = TRUE;
      pair->ti = GetTraceIDFromIdList (bsp_replace->id);
    } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_minus)) {
      /* matches on complement */
      pair->is_complement = TRUE;
      rval = TRUE;
      pair->ti = GetTraceIDFromIdList (bsp_replace->id);
    } else {
      /* later, are we going to try to find trim lengths? */
      rval = FALSE;
      ReportInvalidReplacement (pair->sip_replace, "Replacement does not match local", has_errors);
    }
    SeqEntryFree (fetched_sep);
  }
  return rval;
}
Esempio n. 8
0
static void GetThisBioseq (XOSPtr xosp)
{
  GatherScopePtr  gsp;
  CharPtr         asnseqentfile;
  Int4            gi;
  AsnIoPtr        aiop;
  Boolean         flagHaveNet;
  SeqEntryPtr     sep;

  gsp = xosp->gsp;

  aiop = NULL;
  asnseqentfile = xosp->filename;
  gi = xosp->gi;
  xosp->sep = SeqEntryFree (xosp->sep);

  if (gi > 0)
  {
    if (!EntrezInit ("twopv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else if (asnseqentfile != NULL)
  {
    if ((aiop = AsnIoOpen (asnseqentfile, "r")) == NULL)
    {
      Message (MSG_ERROR, "Failed to open: %s", asnseqentfile);
    }
    sep = SeqEntryAsnRead (aiop, NULL);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No SeqEntry");
    ErrShow ();
  }
  else
  {
    xosp->sep = sep;
    xosp->bsp = NULL;
    xosp->gi = gi;
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq, (Pointer) gsp);
  }

  if (gi > 0)
    EntrezFini ();
  else
    AsnIoClose (aiop);

  if (xosp->bsp != NULL)
  {
    if (!ISA_na (xosp->bsp->mol))
    {
      xosp->sep = SeqEntryFree (xosp->sep);
      xosp->bsp = NULL;
      xosp->filename = (CharPtr) MemFree (xosp->filename);
      xosp->gi = 0;
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not nucleic acid Bioseq");
      ErrShow ();
    }
  }
  else
  {
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->gi = 0;
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
  }
  return;
}
Esempio n. 9
0
void BEPrintIds(BEDataPtr pBdata, Uint4 *ids, int count)
{
    Int4 i;
    SeqEntryPtr sep, sep_all;
    Boolean retvalue = TRUE;
    SeqIdPtr sip = NULL;
    BioseqPtr bsp;
    BGenBankPtr bgbp;
    AsnIoPtr aip;
    Boolean is_na = FALSE;
    
    if(pBdata->format == F_GILIST) {
        for(i = 0; i < count; i++)
            fprintf(stdout, "%d\n", (int) ids[i]);
        return;
    }
    
    if(pBdata->database == 0)
        is_na = TRUE;

    if(pBdata->format == F_ASN1_GENB)
        bgbp = BGenBankInit();
    
    for(i = 0; i < count; i++) {
        
        sep_all = BESeqEntryGet(ids[i]);
        
        if(sep_all == NULL) {
            ErrPostEx(SEV_ERROR, 88, 67, "Retrieving of blob for the "
                      "gi=%d failed", (int)ids[i]);
            continue;
        }
        
        if(!pBdata->allset) {
            ObjMgrRegister(OBJ_SEQENTRY, sep_all);
            sip = ValNodeNew(NULL);
            sip->choice = SEQID_GI;
            sip->data.intvalue = ids[i];
            
            if((bsp = BioseqFind(sip)) == NULL) {
                ErrPostEx(SEV_ERROR, 88, 67, 
                          "Error finding bioseq for gi=%d\n", (int)ids[i]);
                continue;
            }

            sep = SeqEntryNew();
            sep->choice = 1; /* Bioseq */
            sep->data.ptrvalue = bsp;
        } else {
            sep = sep_all;
        }
        
        switch(pBdata->format) {        
        case F_FASTA:               /* 1 */

            if(!SeqEntryToFasta(sep, stdout, is_na)) {
                if(!SeqEntryToFasta(sep, stdout, !is_na)) {
                    ErrPostEx(SEV_ERROR, 88, 67, "Printing of FASTA format "
                              "(gi=%d) failed\r\n", (int)ids[i]);
                }
            }

            break;
        case F_ASN1:                /* 2 */

            aip = AsnIoNew(ASNIO_TEXT_OUT, stdout, NULL, NULL, NULL);
            SeqEntryAsnWrite(sep, aip, NULL);
            AsnIoClose(aip);

            break;
        case F_GILIST:              /* 3 */
            for(i = 0; i < count; i++)
                fprintf(stdout, "%d\n", (int) ids[i]);
            break;
        case F_DLIST:               /* 4 */
            if (IS_Bioseq(sep))
                retvalue = SeqEntrysToDefline(sep, stdout, is_na, 3);
            else
                retvalue = SeqEntrysToDefline(sep, stdout, is_na, 0);
            
            if(retvalue == FALSE) {
                if (IS_Bioseq(sep))
                    retvalue = SeqEntrysToDefline(sep, stdout, !is_na, 3);
                else
                    retvalue = SeqEntrysToDefline(sep, stdout, !is_na, 0);
            }
            break;
        case F_ASN1_GENB:           /* 5 */
            retvalue = SeqEntryAsnWrite(sep, bgbp->aip, bgbp->atp);
            break;
        default:
        case F_GEN:                 /* 0 */
            if(!SeqEntryToFlatEx(sep_all, stdout, 
                                 is_na ? GENBANK_FMT : GENPEPT_FMT, 
                                 RELEASE_MODE, sip, FF_REGULAR)) {
                
                if(!SeqEntryToFlatEx(sep_all, stdout, 
                                     is_na ? GENPEPT_FMT : GENBANK_FMT, 
                                     RELEASE_MODE, sip, FF_REGULAR)) {
                }
            }
            break;
        }
        
        SeqEntryFree(sep_all);
        ValNodeFree(sip);
    }

    if(pBdata->format == F_ASN1_GENB)
        BGenBankClose(bgbp);
    
    return;
}
Esempio n. 10
0
static void DoQuery (
  FILE *fp,
  FILE *dfp,
  XtraPtr extra,
  Boolean get_var,
  Boolean do_nuc,
  Boolean do_prot
)

{
  Entrez2BooleanReplyPtr  e2br;
  Entrez2IdListPtr        e2lp;
  Entrez2RequestPtr       e2rq;
  Entrez2ReplyPtr         e2ry;
  Int4                    flags = 0;
  Int4                    i;
  Char                    line [256];
  E2ReplyPtr              reply;
  SeqEntryPtr             sep;
  CharPtr                 str;
  Uint4                   uid;

  if (get_var) {
    flags = 1;
  }

  e2rq = EntrezCreateBooleanRequest (TRUE, FALSE, "Nucleotide", NULL, 0, 0, NULL, 0, 0);
  if (e2rq == NULL) return;

  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);

  str = ReadALine (line, sizeof (line), fp);
  if (! StringHasNoText (str)) {
    EntrezAddToBooleanRequest (e2rq, NULL, 0, "ACCN", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  }

  while (str != NULL) {
    if (! StringHasNoText (str)) {
      EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_OR, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
      EntrezAddToBooleanRequest (e2rq, NULL, 0, "ACCN", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
    }
    str = ReadALine (line, sizeof (line), fp);
  }

  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_AND, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);

  str = ReadALine (line, sizeof (line), dfp);
  if (! StringHasNoText (str)) {
    EntrezAddToBooleanRequest (e2rq, NULL, 0, "MDAT", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  }

  while (str != NULL) {
    if (! StringHasNoText (str)) {
      EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_OR, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
      EntrezAddToBooleanRequest (e2rq, NULL, 0, "MDAT", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
    }
    str = ReadALine (line, sizeof (line), dfp);
  }

  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
  EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);

  e2ry = EntrezSynchronousQuery (e2rq);
  e2rq = Entrez2RequestFree (e2rq);

  if (e2ry == NULL) return;
  reply = e2ry->reply;
  if (reply == NULL || reply->choice != E2Reply_eval_boolean) return;
  e2br = EntrezExtractBooleanReply (e2ry);
  if (e2br == NULL) return;

  e2lp = e2br->uids;
  if (e2lp != NULL) {
    BSSeek (e2lp->uids, 0, SEEK_SET);
    for (i = 0; i < e2lp->num; i++) {
      uid = Nlm_BSGetUint4 (e2lp->uids);
      if (uid < 1) continue;

      sep = PubSeqSynchronousQuery (uid, 0, flags);
      if (sep == NULL) continue;

      if (do_nuc) {
        DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
      }
      if (do_prot) {
        DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
      }

      SeqEntryFree (sep);
    }
  }

  Entrez2BooleanReplyFree (e2br);
}
Esempio n. 11
0
Int2 Main_new(void)

{
        BioseqPtr query_bsp=NULL, subject_bsp=NULL;
        BioseqPtr bsp1=NULL, bsp2=NULL;
        BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
        BlastFormattingInfo* format_info = NULL;
        BLAST_SummaryOptions* options=NULL;
        Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew();
        Boolean believe_query= FALSE;
        Boolean seq1_is_na, seq2_is_na;  /* seq1/2 is DNA if TRUE. */
        Boolean seqannot_output;   /* SeqAlign will be output. */
        Boolean entrez_lookup;     /* QUery/subject fetched from Entrez. */
        Boolean mask_at_hash=FALSE;  /* masking only on lookup table if TRUE. */
        DbtagPtr        dbtagptr;
        EBlastProgramType program_number;
        Int2 status; /* return value */
        EAlignView align_view = eAlignViewPairwise; /* Used for formatting */
        SeqAlignPtr seqalign=NULL;
        SeqEntryPtr sep=NULL, sep1=NULL;
        SeqLocPtr slp1, slp2;   /* Used for actual search. */
        SeqLocPtr filter_loc=NULL;  /* Location of regions filtered (returned by engine) */
        SeqLocPtr lcase_mask=NULL;    /* For lower-case masking info from query FASTA. */
        SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
        Uint1 strand_option = 0; /* FIXME */
        SBlastOptions* search_options = NULL; /* Needed for formatting. */
        SBlastSeqalignArray* seqalign_arr = NULL;
        GeneticCodeSingletonInit();
        
        strand_option = (Uint1) myargs[ARG_STRAND].intvalue;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
        believe_query = (seqannot_output || entrez_lookup); 
        /* Non-zero value for -m option means tabular output. */
        if (myargs[ARG_FORMAT].intvalue != 0)
           align_view = eAlignViewTabularWithComments; 

        BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);

        seq1_is_na = (program_number == eBlastTypeBlastn ||
                  program_number == eBlastTypeBlastx ||
                  program_number == eBlastTypeRpsTblastn ||
                  program_number == eBlastTypeTblastx);

        seq2_is_na = (program_number == eBlastTypeBlastn ||
               program_number == eBlastTypeTblastn ||
               program_number == eBlastTypeTblastx);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &lcase_mask, believe_query) 
            == FALSE)
        {
                ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
                return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else { /* Query and subject Bioseqs are already "fake". */
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE)
                return 1;

        if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE)
                return 1;

        /* Find repeat mask, if necessary */
        if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue,
                                &repeat_mask, &extra_returns->error)) != 0)
        {
            if (extra_returns && extra_returns->error)
            {
                   ErrSev max_sev = SBlastMessageErrPost(extra_returns->error);
                   if (max_sev >= SEV_ERROR)
                         return status;
            }
        }

        /* Combine repeat mask with lower case mask */
        if (repeat_mask)
            lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
        
        status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr, 
                                     &filter_loc, &mask_at_hash, 
                                     &extra_returns);

        /* Free the lower case mask in SeqLoc form. */
        lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);

        /* Post warning or error messages, no matter what the search status 
           was. */
        SBlastMessageErrPost(extra_returns->error);

        if (status != 0)
        {
                ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed");
                return status;
        }

        if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) {
            AsnIoPtr asnout =
               AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
            GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout);
            asnout = AsnIoClose(asnout);
        }

        /* Pass NULL for the database name, since there is no database. */
        BlastFormattingInfoNewBasic(align_view, options, slp1, 
                                    myargs[ARG_OUT].strvalue, &search_options,
                                    &format_info);
        
        /* Always show gis in the output, hence pass TRUE for respective 
           argument. */
        BlastFormattingInfoSetUpOptions(format_info, 0, 1,
                                        (Boolean) myargs[ARG_HTML].intvalue,
                                        (Boolean) myargs[ARG_USEMEGABLAST].intvalue,
                                        TRUE, believe_query);

        /* If masking was at hash only, free the masking locations,
         * to prevent them from being used for formatting.
         */
        if (SBlastOptionsGetMaskAtHash(search_options))
            filter_loc = Blast_ValNodeMaskListFree(filter_loc);

        /* Format the results */
        status = 
            BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info, 
                                extra_returns);
        
        status = Blast_PrintOutputFooter(format_info, extra_returns);

        /* Free masking locations if they haven't been freed already. */
        filter_loc = Blast_ValNodeMaskListFree(filter_loc);

        format_info = BlastFormattingInfoFree(format_info);
        extra_returns = Blast_SummaryReturnFree(extra_returns);
        search_options = SBlastOptionsFree(search_options);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }

        options = BLAST_SummaryOptionsFree(options);
        seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
        slp1 = SeqLocSetFree(slp1);
        slp2 = SeqLocSetFree(slp2);

        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
        GeneticCodeSingletonFini();

        return 0;

}
Esempio n. 12
0
Int2 Main (void)
{
    Int2        argcount;
    Boolean     flagHaveNet;

    Int4        gi;
    SeqEntryPtr sep;
    ComPatPtr   cpp, cpph = NULL;
    SeqAlignPtr sap, sapn;
    StdSegPtr   ssp;
    SeqLocPtr   slp, slpn;
    Int4        start, stop;

    FILE        *fiop;
    Char        fastafile[256], namesfile[256];
    CharPtr     title;
    CharPtr     taxon;

    FloatHi     mw;
    ValNodePtr  namelist = NULL;

    static CharPtr pattern_file = "ncbipros.dat";
    static CharPtr protease_file = "ncbiendo.dat";
    static CharPtr names_file = "ncbipnam.dat";

    static GatherScope  gs;
    GatherScopePtr      gsp;
    static Gather_PBS   gpbs;
    Gather_PBSPtr       gpbsp;

#ifndef NO_TAX_NET
    Int4   i;
    static Char taxdata[8];
    static Gather_TaxId gti;
    Gather_TaxIdPtr     gtip;
#endif

#ifndef NO_TAX_NET
    Int2   ia=4, ib=5, ic=6, id=7, ie=8, ig=9, ih=10, ii=11;
#else
    Int2         ib=4, ic=5, id=6, ie=7, ig=8, ih=9,  ii=10;
#endif

    argcount = sizeof (myargs) / sizeof (Args);
    if (!GetArgs ("ProSiteSearch", argcount, myargs))
        return 1;

    if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
                   "No gi or FastA file given :: for help :   srchaa -");
        ErrShow ();
        exit (1);
    }

    gsp = &gs;

#ifndef NO_TAX_NET
    gtip = &gti;
#endif
    gpbsp = &gpbs;

    MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
    MemSet ((Pointer) gsp->ignore, (int) (TRUE),
            (size_t) (OBJ_MAX * sizeof (Boolean)));

    gsp->ignore[OBJ_SEQDESC] = TRUE;
    gsp->ignore[OBJ_BIOSEQ] = FALSE;

    gpbsp->bsp = NULL;

    gi = myargs[0].intvalue;
    if (myargs[1].strvalue != NULL)
        StrCpy (fastafile, myargs[1].strvalue);
    else
        fastafile[0] = '\0';

    if (gi > 0)
    {
        if (!EntrezInit ("srchaa", FALSE, &flagHaveNet))
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                       "Entrez init failed");
            ErrShow ();
            exit (1);
        }
    }

#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
    {
        if (!TaxArchInit ())
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Taxonomy init failed");
            ErrShow ();
            exit (1);
        }
    }
#endif

    fiop = NULL;
    if (gi > 0)
    {
        sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
    }
    else
    {
        if ((fiop = FileOpen (fastafile, "r")) == NULL)
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                       "Failed to open FastA file: %s", fastafile);
            ErrShow ();
            exit (1);
        }
        sep = FastaToSeqEntry (fiop, FALSE);
    }

    if (sep == NULL)
    {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
                   "No seqentry found");
        ErrShow ();
        exit (1);
    }

    while (sep != NULL)
    {
        gsp->ignore[OBJ_SEQDESC] = TRUE;
        gsp->ignore[OBJ_BIOSEQ] = FALSE;
        gpbsp->bsp = NULL;
        gpbsp->gi = gi;
        GatherSeqEntry (sep, (Pointer) gpbsp, GetBioseq, (Pointer) gsp);

        taxon = NULL;
#ifndef NO_TAX_NET
        if (myargs[ia].intvalue)
        {
            for (i = 0; i < 8; i++)
                taxdata[i] = '-';
            taxon = taxdata;

            gsp->ignore[OBJ_SEQDESC] = FALSE;
            gsp->ignore[OBJ_BIOSEQ] = TRUE;

            gtip->taxid = 0;
            GatherSeqEntry (sep, (Pointer) gtip, GetTaxId, (Pointer) gsp);

            if (gtip->taxid != 0)
                WhatOrg (gtip->taxid, taxon);
            else
                taxon = NULL;
        }
#endif

        if (gpbsp->bsp != NULL)
        {
            if (ISA_aa (gpbsp->bsp->mol))
            {
                if (cpph == NULL)
                {
                    namesfile[0] = '\0';
                    if (myargs[id].intvalue)
                        StrCpy (namesfile, names_file);
                    if (myargs[ie].strvalue != NULL)
                        StrCpy (namesfile, myargs[ie].strvalue);

                    if (myargs[ig].strvalue != NULL)
                    {
                        if ((cpph = CompilePattern (myargs[ig].strvalue, 1)) != NULL)
                            StrCpy (cpph->name, "User Pattern");
                    }
                    else
                    {
                        namelist = ReadPatternNames (namesfile);
                        if (myargs[ib].intvalue)
                            cpph = ReadPrositePattern (protease_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, NULL);
                        else
                            cpph = ReadPrositePattern (pattern_file,
                                                       (Boolean) myargs[2].intvalue,
                                                       myargs[3].intvalue,
                                                       taxon, namelist);
                    }
                }

                if (!(Boolean) myargs[ih].intvalue)
                {
                    title = FastaTitle (gpbsp->bsp, ">", NULL);
                    printf ("%s\n", title);
                    MemFree (title);
                }
                cpp = cpph;
                while (cpp != NULL)
                {
                    sap = PatternMatchBioseq (gpbsp->bsp, cpp,
                                              (Int4)myargs[ii].intvalue);
                    if (myargs[ib].intvalue)
                    {
                        printf (">%s\n", cpp->name);
                        if (sap != NULL)
                            printf ("   Start     Stop       M.W.\n");
                    }
                    if (myargs[ib].intvalue)
                    {
                        EmbedMolecularWeightInfo (sap, gpbsp->bsp);
                        if (myargs[ic].intvalue)
                            URK_SeqAlignSortByMolWt (&sap);
                        while (sap != NULL)
                        {
                            ssp = (StdSegPtr) sap->segs;
                            slp = ssp->loc;
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            mw = ssp->scores->value.realvalue;
                            printf ("%8ld %8ld    %9.2f\n",
                                    (long) start+1, (long) stop+1, mw);
                            sapn = sap->next;
                            SeqAlignFree (sap);
                            sap = sapn;
                        }
                    }
                    else
                    {
                        slp = MatchSa2Sl (&sap);
                        if (myargs[ih].intvalue && slp != NULL)
                        {
                            title = FastaTitle (gpbsp->bsp, ">", NULL);
                            printf ("%s\n", title);
                            MemFree (title);
                        }
                        while (slp != NULL)
                        {
                            start = SeqLocStart (slp);
                            stop = SeqLocStop (slp);
                            printf ("%8ld %8ld    %s\n",
                                    (long) start+1, (long) stop+1, cpp->name);
                            slpn = slp->next;
                            SeqLocFree (slp);
                            slp = slpn;
                        }
                    }
                    cpp = cpp->nextpattern;
                }
            }
            else
            {
                ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
                           "Not a protein bioseq");
                ErrShow ();
                exit (1);
            }
        }
        else
        {
            ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                       "No bioseq found");
            ErrShow ();
            exit (1);
        }
        SeqEntryFree (sep);
        sep = NULL;
        if (fiop != NULL)
            sep = FastaToSeqEntry (fiop, FALSE);
    }

    ComPatFree (cpph);
    ValNodeFreeData (namelist);
    FileClose (fiop);
    if (gi > 0)
        EntrezFini ();
#ifndef NO_TAX_NET
    if (myargs[ia].intvalue)
        TaxArchFini ();
#endif
    return 0;
}
Esempio n. 13
0
static void DustProc (ButtoN b)
{
  Boolean     flagHaveNet;

  SeqEntryPtr sep;
  Int4        gi;
  CharPtr     fastafile;
  FILE        *fiop;
  CharPtr     title;

  GatherScopePtr      gsp;
  XOSPtr              xosp;
  XISPtr              xisp;

  Int4           i;
  DustRegionPtr  drp;
  FloatHiPtr     fhi, fhit;
  FloatHi        minscore, maxscore, deltascore;
  Int4           Yscale;

  SeqGraphPtr    sgp;

  WindoW     w;
  PaneL      p;

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  gsp = xosp->gsp;
  gi = xosp->gi;
  fastafile = xosp->filename;

  if (gi > 0)
  {
    if (!EntrezInit ("dustv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else if (fastafile != NULL)
  {
    if ((fiop = FileOpen (fastafile, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      return;
    }
    sep = FastaToSeqEntry (fiop, TRUE);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No seqentry");
    ErrShow ();
    return;
  }
  else
  {
    xosp->sep = sep;
    xosp->bsp = NULL;
    xosp->gi = gi;
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq,
                    (Pointer) gsp);
    if (xosp->bsp != NULL)
    {
      if (ISA_na (xosp->bsp->mol))
      {
        drp = DustBioseq (xosp->bsp, 0, xosp->bsp->length-1, xosp->ddp);
        fhit = fhi = DustForGraph (drp, xosp->bsp->length, 0,
                                   xosp->bsp->length-1);
        DustRegionFree (drp);
        if (fhi == NULL)
        {
          return;
        }
        minscore = 0.0;
        maxscore = 0.0;
        for (i = 0; i < xosp->bsp->length; i++)
        {
          if (*fhit > maxscore)
            maxscore = *fhit;
          fhit++;
        }

        title = FastaTitle (xosp->bsp, "DUST: >", NULL);

        if ((sgp = SeqGraphNew ()) == NULL)
        {
          MemFree (fhi);
          MemFree (title);
          return;
        }

        xisp = MemNew (sizeof (XIS));
        xisp->sgp = sgp;
        xisp->Xscale = 1;
        xisp->Yscale = 1;
        xisp->Xaxislen = 250;
        xisp->Yaxislen = 100;
        xisp->Xprelen = 0;
        xisp->Yprelen = 50;
        xisp->Xpostlen = 50;
        xisp->Ypostlen = 50;
        sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                                 xosp->bsp->id);
        sgp->title = title;
        sgp->flags[2] = 1;
        sgp->numval = xosp->bsp->length;
        sgp->values = (Pointer) fhi;

        fhit = fhi;
        deltascore = maxscore - minscore;
        if ((Yscale = 100 / (Int4) deltascore) > 0)
        {
          minscore *= Yscale;
          maxscore *= Yscale;
          for (i = 0; i < xosp->bsp->length; i++)
          {
            *fhit *= Yscale;
            fhit++;
          }
          xisp->Yscale = Yscale;
        }
        sgp->max.realvalue = maxscore;
        sgp->min.realvalue = minscore;

        w = FixedWindow (-50, -50, -10, -10, "Dust", CloseGraphWindowProc);
        SetObjectExtra (w, xisp, NULL);
        p = SimplePanel (w,
                         (Int2) (xisp->Xprelen+xisp->Xaxislen+xisp->Xpostlen),
                         (Int2) (xisp->Yprelen+xisp->Yaxislen+xisp->Ypostlen),
                         DrawGraph);
        SetPanelClick (p, NULL, NULL, NULL, CloseGraphPanelProc);
        RealizeWindow (w);
        Show (w);
      }
      else
      {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not a nucleic acid Bioseq");
        ErrShow ();
      }
    }
    else
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq in SeqEntry");
      ErrShow ();
    }
    xosp->sep = sep = SeqEntryFree (sep);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);
  return;
}
Esempio n. 14
0
static void ProcessSingleRecord (
  CharPtr filename,
  CSpeedFlagPtr cfp
)

{
  AsnIoPtr      aip;
  BioseqPtr     bsp;
  ValNodePtr    bsplist = NULL;
  BioseqSetPtr  bssp;
  Pointer       dataptr = NULL;
  Uint2         datatype, entityID = 0;
  FileCache     fc;
  FILE          *fp;
  Int1          iotype;
  Char          line [512];
  Int4          maxio = 1;
  SeqEntryPtr   sep;
  time_t        starttime, stoptime, worsttime;
  CharPtr       str;
  Int4          x;

  if (cfp == NULL) return;

  if (StringHasNoText (filename)) return;

  if (StringChr (cfp->io, 'r') != NULL) {
    maxio = cfp->maxcount;
  }

  starttime = GetSecs ();

  for (x = 0; x < maxio; x++) {
    if (entityID != 0) {
      ObjMgrFreeByEntityID (entityID);
      entityID = 0;
      dataptr = NULL;
    }

    if (cfp->type == 1) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);

      FileClose (fp);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type >= 2 && cfp->type <= 5) {

      aip = AsnIoOpen (filename, cfp->binary? "rb" : "r");
      if (aip == NULL) {
        Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename);
        return;
      }

      switch (cfp->type) {
        case 2 :
          dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
          datatype = OBJ_SEQENTRY;
          break;
        case 3 :
          dataptr = (Pointer) BioseqAsnRead (aip, NULL);
          datatype = OBJ_BIOSEQ;
          break;
        case 4 :
          dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
          datatype = OBJ_BIOSEQSET;
          break;
        case 5 :
          dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
          datatype = OBJ_SEQSUB;
          break;
        default :
          break;
      }

      AsnIoClose (aip);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type == 6) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);

      FileClose (fp);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type == 7) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      FileCacheSetup (&fc, fp);

      str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
      while (str != NULL) {
        str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
      }

      FileClose (fp);

      return;

    } else {
      Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) cfp->type);
      return;
    }
  }

  if (entityID < 1 || dataptr == NULL) {
    Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
    return;
  }

  if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
        datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {

    stoptime = GetSecs ();
    worsttime = stoptime - starttime;
    if (cfp->logfp != NULL) {
      fprintf (cfp->logfp, "ASN reading time %ld seconds\n", (long) worsttime);
      fflush (cfp->logfp);
    }

    sep = GetTopSeqEntryForEntityID (entityID);

    if (sep == NULL) {
      sep = SeqEntryNew ();
      if (sep != NULL) {
        if (datatype == OBJ_BIOSEQ) {
          bsp = (BioseqPtr) dataptr;
          sep->choice = 1;
          sep->data.ptrvalue = bsp;
          SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
        } else if (datatype == OBJ_BIOSEQSET) {
          bssp = (BioseqSetPtr) dataptr;
          sep->choice = 2;
          sep->data.ptrvalue = bssp;
          SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
        } else {
          sep = SeqEntryFree (sep);
        }
      }
      sep = GetTopSeqEntryForEntityID (entityID);
    }

    if (sep != NULL) {

      if (cfp->lock) {
        starttime = GetSecs ();

        bsplist = LockFarComponents (sep);

        stoptime = GetSecs ();
        worsttime = stoptime - starttime;
        if (cfp->logfp != NULL) {
          fprintf (cfp->logfp, "Far component locking time %ld seconds\n", (long) worsttime);
          fflush (cfp->logfp);
        }
      }

      if (StringChr (cfp->io, 'w') != NULL) {
        starttime = GetSecs ();

        iotype = ASNIO_TEXT_OUT;
        if (StringChr (cfp->io, 'b') != NULL) {
          iotype = ASNIO_BIN_OUT;
        }

        for (x = 0; x < cfp->maxcount; x++) {
          aip = AsnIoNew (iotype, cfp->ofp, NULL, NULL, NULL);
          if (aip != NULL) {
            SeqEntryAsnWrite (sep, aip, NULL);
            AsnIoFree (aip, FALSE);
          }
        }

        stoptime = GetSecs ();
        worsttime = stoptime - starttime;
        if (cfp->logfp != NULL) {
          fprintf (cfp->logfp, "ASN writing time %ld seconds\n", (long) worsttime);
          fflush (cfp->logfp);
        }
      }

      starttime = GetSecs ();

      for (x = 0; x < cfp->maxcount; x++) {
        DoProcess (sep, entityID, cfp);
      }

      stoptime = GetSecs ();
      worsttime = stoptime - starttime;
      if (cfp->logfp != NULL) {
        fprintf (cfp->logfp, "Internal processing time %ld seconds\n", (long) worsttime);
        fflush (cfp->logfp);
      }

      ObjMgrFreeByEntityID (entityID);

      bsplist = UnlockFarComponents (bsplist);
    }

  } else {

    Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
  }
}
Esempio n. 15
0
static void ProcessAccession (
  CharPtr accn,
  XtraPtr extra,
  Boolean only_new,
  Boolean get_var,
  Boolean do_nuc,
  Boolean do_prot
)

{
  Char         ch;
  Int4         flags = 0;
  Int4         gi = 0;
  Char         id [41];
  Boolean      is_numeric = TRUE;
  Int4         newgi = 0;
  CharPtr      ptr;
  SeqEntryPtr  sep;
  SeqIdPtr     sip;
  Char         tmp [41];
  long         val;

  ptr = accn;
  ch = *ptr;
  while (ch != '\0' && is_numeric) {
    if (! IS_DIGIT (ch)) {
      is_numeric = FALSE;
    }
    ptr++;
    ch = *ptr;
  }

  if (is_numeric) {
    if (sscanf (accn, "%ld", &val) == 1) {
      gi = (Int4) val;
      if (gi < 1) return;
      if (only_new) {
        sip = GetSeqIdForGI (gi);
        if (sip != NULL) {
          SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp));
          SeqIdFree (sip);
          ptr = StringChr (tmp, '.');
          if (ptr != NULL) {
            *ptr = '\0';
            sip = SeqIdFromAccessionDotVersion (tmp);
            newgi = GetGIForSeqId (sip);
            SeqIdFree (sip);
            if (newgi == gi) return;
          }
        }
      }
    }
  } else {
    sip = SeqIdFromAccessionDotVersion (accn);
    gi = GetGIForSeqId (sip);
    SeqIdFree (sip);
    if (only_new) {
      sip = GetSeqIdForGI (gi);
      if (sip != NULL) {
        SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id));
        SeqIdFree (sip);
        if (StringICmp (accn, id) == 0) return;
      }
    }
  }
  if (gi < 1) return;

  if (get_var) {
    flags = 1;
  }
  sep = PubSeqSynchronousQuery (gi, 0, flags);
  if (sep == NULL) return;

  if (do_nuc) {
    DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
  }
  if (do_prot) {
    DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
  }

  SeqEntryFree (sep);
}
Esempio n. 16
0
static void GetThisBioseq (XOSPtr xosp)
{
  GatherScopePtr  gsp;
  CharPtr         filename;
  Int4            gi;
  FILE            *fiop;
  Boolean         flagHaveNet;
  SeqEntryPtr     sep;
  ValNodePtr      vnp;
  Int2            gcode;
  GeneticCodePtr  gcp;
  SeqLocPtr       slp, slpn;
  SeqIdPtr        id;

  gsp = xosp->gsp;

  fiop = NULL;
  filename = xosp->filename;
  gi = xosp->gi;
  xosp->sep = SeqEntryFree (xosp->sep);

  if (gi > 0)
  {
    if (!EntrezInit ("cnsgnv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_NUC_PROT);
  }
  else if (filename != NULL)
  {
    if ((fiop = FileOpen (filename, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      return;
    }
    sep = FastaToSeqEntry (fiop, TRUE);
    AddBioSourceToSeqEntry (sep);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No SeqEntry");
    ErrShow ();
  }
  else
  {
    CleanUpXOS (xosp);
    xosp->sep = sep;
    xosp->gi = gi;
    xosp->filename = StringSave (filename);
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq, (Pointer) gsp);
  }

  if (xosp->bsp != NULL)
  {
    vnp = xosp->bsp->descr;
    gcode = 0;
    while (vnp != NULL)
    {
      if (vnp->choice == Seq_descr_source)
      {
        gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
        break;
      }
      vnp = vnp->next;
    }
    if (gcode == 0 && sep->choice == 2)
    {
      vnp = ((BioseqSetPtr) (sep->data.ptrvalue))->descr;
      while (vnp != NULL)
      {
        if (vnp->choice == Seq_descr_source)
        {
          gcode = BioSourceToGeneticCode ((BioSourcePtr) vnp->data.ptrvalue);
          break;
        }
        vnp = vnp->next;
      }
    }
    if (gcode == 0)
      gcode = 1; /* standard */
    gcp = GeneticCodeFind (gcode, NULL);
    if (gcp != NULL)
    {
      xosp->gcd = xosp->gcdi = NULL;
      vnp = (ValNodePtr) gcp->data.ptrvalue;
      while (vnp != NULL)
      {
        if (vnp->choice == 6)       /* sncbieaa */
          xosp->gcdi = (CharPtr) vnp->data.ptrvalue;
        else if (vnp->choice == 3)  /* ncbieaa */
          xosp->gcd = (CharPtr) vnp->data.ptrvalue;
        vnp = vnp->next;
      }
    }
    if (xosp->gcdi == NULL)
      xosp->gcdi = xosp->gcd;
    if (xosp->gcdi == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                 "Could not get genetic code for translation");
      ErrShow ();
      xosp->bsp = NULL;
    }
  }

  if (gi > 0)
  {
    slp = xosp->slpk;
    while (slp != NULL)
    {
      slpn = slp->next;
      id = SeqLocId (slp);
      if (id != NULL)
        id->next = SeqIdSetFree (id->next);
      SeqLocFree (slp);
      slp = slpn;
    }
    xosp->slpk = slp;
    GatherSeqEntry (sep, (Pointer) xosp, GatherKnownOrfs, (Pointer) gsp);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);

  if (xosp->bsp != NULL)
  {
    if (!ISA_na (xosp->bsp->mol))
    {
      xosp->sep = SeqEntryFree (xosp->sep);
      xosp->bsp = NULL;
      xosp->filename = (CharPtr) MemFree (xosp->filename);
      xosp->gi = 0;
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not nucleic acid Bioseq");
      ErrShow ();
    }
  }
  else
  {
    xosp->sep = SeqEntryFree (xosp->sep);
    xosp->filename = (CharPtr) MemFree (xosp->filename);
    xosp->gi = 0;
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq");
    ErrShow ();
  }
  return;
}
Esempio n. 17
0
static void CcpProc (ButtoN b)
{
  Boolean     flagHaveNet;

  SeqEntryPtr sep;
  Int4        i, gi;
  CharPtr     fastafile;
  FILE        *fiop;
  CharPtr     title;

  GatherScopePtr      gsp;
  XOSPtr              xosp;
  XISPtr              xisp;

  FloatHi        minscore, maxscore;
  FloatHiPtr     pcs, pccscr;
  SeqGraphPtr    sgp;

  WindoW     w;
  PaneL      p;

  if ((xosp = (XOSPtr) GetObjectExtra (b)) == NULL)
    return;

  if (ReadPccData (xosp->pccp) == 0)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
               "Could not open or read %s data file", xosp->pccp->pccdatafile);
    ErrShow ();
    return;
  }

  gsp = xosp->gsp;
  gi = xosp->gi;
  fastafile = xosp->filename;

  if (gi > 0)
  {
    if (!EntrezInit ("ccpv", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
      return;
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else if (fastafile != NULL)
  {
    if ((fiop = FileOpen (fastafile, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
      return;
    }
    sep = FastaToSeqEntry (fiop, FALSE);
  }
  else
  {
    sep = NULL;
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No seqentry");
    ErrShow ();
    xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
    return;
  }
  else
  {
    xosp->sep = sep;
    xosp->bsp = NULL;
    xosp->gi = gi;
    GatherSeqEntry (sep, (Pointer) xosp, GetBioseq,
                    (Pointer) gsp);
    if (xosp->bsp != NULL)
    {
      if (ISA_aa (xosp->bsp->mol))
      {
        pcs = pccscr = PredictCCBioseq (xosp->bsp, 0, xosp->bsp->length-1,
                                        xosp->pccp);
        xosp->pccp->res = (CharPtr) MemFree (xosp->pccp->res);
        if (pccscr == NULL)
        {
          return;
        }
        for (i = 0; i < xosp->bsp->length; i++)
        {
          *pcs *= 100.0;
          pcs++;
        }
        minscore = 0.0;
        maxscore = 100.0;

        title = FastaTitle (xosp->bsp, "CCP: >", NULL);

        if ((sgp = SeqGraphNew ()) == NULL)
        {
          MemFree (pccscr);
          MemFree (title);
          return;
        }

        xisp = MemNew (sizeof (XIS));
        xisp->sgp = sgp;
        xisp->Xscale = 1;
        xisp->Yscale = 1;
        xisp->Xaxislen = 250;
        xisp->Yaxislen = 100;
        xisp->Xprelen = 0;
        xisp->Yprelen = 50;
        xisp->Xpostlen = 50;
        xisp->Ypostlen = 50;
        sgp->loc = SeqLocIntNew (0, xosp->bsp->length-1, xosp->bsp->strand,
                                 xosp->bsp->id);
        sgp->title = title;
        sgp->flags[2] = 1;
        sgp->numval = xosp->bsp->length;
        sgp->max.realvalue = maxscore;
        sgp->min.realvalue = minscore;
        sgp->values = (Pointer) pccscr;

        w = FixedWindow (-50, -50, -10, -10, "Ccp", CloseGraphWindowProc);
        SetObjectExtra (w, xisp, NULL);
        p = SimplePanel (w,
                         (Int2) (xisp->Xprelen+xisp->Xaxislen+xisp->Xpostlen),
                         (Int2) (xisp->Yprelen+xisp->Yaxislen+xisp->Ypostlen),
                         DrawGraph);
        SetPanelClick (p, NULL, NULL, NULL, CloseGraphPanelProc);
        RealizeWindow (w);
        Show (w);
      }
      else
      {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "Not an amino acid Bioseq");
        ErrShow ();
      }
    }
    else
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 101, "No Bioseq in SeqEntry");
      ErrShow ();
    }
    xosp->sep = sep = SeqEntryFree (sep);
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);
  return;
}
Esempio n. 18
0
static Int2 Main_old (void)
 
{
   AsnIoPtr aip, xml_aip = NULL;
   BioseqPtr query_bsp, PNTR query_bsp_array;
   BioSourcePtr source;
   BLAST_MatrixPtr matrix;
   BLAST_OptionsBlkPtr options;
   BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
   BlastPruneSapStructPtr prune;
   Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
   Boolean html=FALSE;
   CharPtr params_buffer=NULL;
   Int4 number_of_descriptions, number_of_alignments;
   SeqAlignPtr  seqalign, PNTR seqalign_array;
   SeqAnnotPtr seqannot;
   SeqEntryPtr PNTR sepp;
   TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
   Uint1 align_type, align_view, out_type;
   Uint4 align_options, print_options;
   ValNodePtr mask_loc, mask_loc_start, next_mask_loc;
   ValNodePtr vnp, other_returns, error_returns;
   
   CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
   FILE *infp, *outfp, *mqfp=NULL;
   Int4 index, num_bsps, total_length, total_processed = 0;
   Int2 ctr = 1;
   Char prefix[2];
   SeqLocPtr last_mask, mask_slp;
   Boolean done, hits_found;
   Boolean lcase_masking;
   MBXmlPtr mbxp = NULL;
   Boolean traditional_formatting;

    blast_program = "blastn";
    blast_database = myargs [ARG_DB].strvalue;
    blast_inputfile = myargs [ARG_QUERY].strvalue;
    blast_outputfile = myargs [ARG_OUT].strvalue;
    if (myargs[ARG_HTML].intvalue)
        html = TRUE;

    if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
       ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile);
       return (1);
    }

    align_view = (Int1) myargs[ARG_FORMAT].intvalue;
    /* Geo mod: 
      -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on
    */
    out_type=(Int1) myargs[ARG_OUTTYPE].intvalue;
    if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) {
      align_view = 12 + (out_type-MGBLAST_FLTHITS ); 
      out_type=MBLAST_ALIGNMENTS;
      //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format
      // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1
       if (align_view>12) { // this is MGBLAST_HITGAPS output
            gap_Info=TRUE;
            if (dbgaps_buf==NULL)
                  dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1);
            if (qgaps_buf==NULL) 
                qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1);
            }
      }

    outfp = NULL;

    traditional_formatting = 
        (out_type == MBLAST_ALIGNMENTS ||
         out_type == MBLAST_DELAYED_TRACEBACK);

    if ((!traditional_formatting ||
            (align_view != 7 && align_view != 10 && align_view != 11)) && 
            blast_outputfile != NULL) {
       if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
          ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
          return (1);
       }
    }

    //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    /*
    if (!traditional_formatting)
        believe_query = TRUE;
    else
        believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
    */
    //Geo mod: 
    believe_query=FALSE;
    //If ASN.1 output is requested and believe_query is not set to TRUE,
    //   exit with an error.    
    if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
                           align_view == 10 || align_view == 11)) {
        ErrPostEx(SEV_FATAL, 1, 0, 
                  "-J option must be TRUE to produce ASN.1 output; before "
                  "changing -J to TRUE please also ensure that all query "
                  "sequence identifiers are unique");
        return -1;
    }
        
    options = BLASTOptionNewEx(blast_program, TRUE, TRUE);
    if (options == NULL)
        return 3;

    options->do_sum_stats = FALSE;
    options->is_neighboring = FALSE;
        options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
    number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;    
    number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;    
    options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);

    if (myargs[ARG_XDROP].intvalue != 0)
           options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
    if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0)
           options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue;
        if (myargs[ARG_XDROP_FINAL].intvalue != 0)
           options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;

    if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0)
       options->filter_string = StringSave("D");
    else
       options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
    
    show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
    options->penalty = myargs[ARG_MISMATCH].intvalue;
    options->reward = myargs[ARG_MATCH].intvalue;
        if (myargs[ARG_GAPOPEN].intvalue >= 0)
        options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue >= 0)
        options->gap_extend = myargs[ARG_GAPEXT].intvalue;

    if (options->gap_open == 0 && options->reward % 2 == 0 && 
        options->gap_extend == options->reward / 2 - options->penalty)
       /* This is the default value */
    options->gap_extend = 0;

    options->genetic_code = 1;
    options->db_genetic_code = 1; /* Default; it's not needed here anyway */
    options->number_of_cpus = myargs[ARG_THREADS].intvalue;
    if (myargs[ARG_WORDSIZE].intvalue != 0)
           options->wordsize = myargs[ARG_WORDSIZE].intvalue;
        if (myargs[ARG_MINSCORE].intvalue == 0)
           options->cutoff_s2 = options->wordsize*options->reward;
        else 
           options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue;

        options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
        options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;

    options->perform_culling = FALSE;
    /* Kludge */
    options->block_width  = myargs[ARG_MAXPOS].intvalue;

    options->strand_option = myargs[ARG_STRAND].intvalue;
        options->window_size = myargs[ARG_WINDOW].intvalue;
#ifdef DO_NOT_SUPPRESS_BLAST_OP        
        options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue;
        if (myargs[ARG_TEMPL_LEN].intvalue != 0)
            options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue;
        options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue;
#endif
        lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue;
        /* Allow dynamic programming gapped extension only with affine 
           gap scores */
        if (options->gap_open != 0 || options->gap_extend != 0)
           options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue;

        print_options = 0;
        align_options = 0;
        align_options += TXALIGN_COMPRESS;
        align_options += TXALIGN_END_NUM;
        if (show_gi) {
       align_options += TXALIGN_SHOW_GI;
       print_options += TXALIGN_SHOW_GI;
        }
            
        if (align_view) {
       align_options += TXALIGN_MASTER;
       if (align_view == 1 || align_view == 3)
          align_options += TXALIGN_MISMATCH;
       if (align_view == 3 || align_view == 4 || align_view == 6)
          align_options += TXALIGN_FLAT_INS;
       if (align_view == 5 || align_view == 6)
          align_options += TXALIGN_BLUNT_END;
        } else {
       align_options += TXALIGN_MATRIX_VAL;
       align_options += TXALIGN_SHOW_QS;
    }

    if (html) {
       align_options += TXALIGN_HTML;
       print_options += TXALIGN_HTML;
    }

    if (myargs[ARG_GILIST].strvalue)
       options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
   
    if (out_type == MBLAST_ENDPOINTS)
      options->no_traceback = 1;
   else if (out_type == MBLAST_DELAYED_TRACEBACK)
       options->no_traceback = 2;
    else
       options->no_traceback = 0;

    options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue;
    options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue;
    options->hsp_num_max = myargs[ARG_MAXHSP].intvalue;

    if (!believe_query)
           options->megablast_full_deflines = TRUE;
        /*if (options->megablast_full_deflines)
          believe_query = FALSE;*/

    query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr));
    sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr));

    StrCpy(prefix, "");

    global_fp = outfp;
        options->output = outfp;

    if (traditional_formatting) {
       if (align_view < 7) {
              if (html) {
                 fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
                 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
                         "VLINK=\"#660099\" ALINK=\"#660099\">\n");
                 fprintf(outfp, "<PRE>\n");
              }
              init_buff_ex(90);
              BlastPrintVersionInfo("mgblast", html, outfp);
              fprintf(outfp, "\n");
              MegaBlastPrintReference(html, 90, outfp);
              fprintf(outfp, "\n");
              
              if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
                 return 1;
              
              free_buff();
    
#ifdef OS_UNIX
              fprintf(global_fp, "%s", "Searching");
#endif
           }
    }
    
        aip = NULL;
        if (myargs[ARG_ASNOUT].strvalue != NULL) {
           if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
              ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
              return 1;
           }
        }
        else if (align_view == 10 || align_view == 11)
        {
            const char* mode = (align_view == 10) ? "w" : "wb";
            if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
                    ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
                    return 1;
            }
        }


        if (align_view == 7) {
           xml_aip = AsnIoOpen(blast_outputfile, "wx");
        }

        if (myargs[ARG_QUERYLOC].strvalue) {       
            Int4 start, end;
            Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
            options->required_start = start - 1;
            options->required_end = end -1;
        }

    done = FALSE;
    while (!done) {
       num_bsps = 0;
       total_length = 0;
       done = TRUE;
       SeqMgrHoldIndexing(TRUE);
       mask_slp = last_mask = NULL;
   
       while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL,
                               believe_query, prefix, &ctr, 
                               &mask_slp)) != NULL) {
              if (!lcase_masking) /* Lower case ignored */
                 mask_slp = SeqLocFree(mask_slp);
         if (mask_slp) {
           if (!last_mask)
              options->query_lcase_mask = last_mask = mask_slp;
           else {
              last_mask->next = mask_slp;
              last_mask = last_mask->next;
              }
           mask_slp = NULL;
           }
          query_bsp = NULL;
         SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc);
         //debug:
         /*
         char query_buffer[255];
         SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
         fprintf(stderr, "===> query_buf=%s\n", query_buffer);
         */
         if (query_bsp == NULL) {
           ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
           return 2;
          }
          
          source = BioSourceNew();
          source->org = OrgRefNew();
          source->org->orgname = OrgNameNew();
          source->org->orgname->gcode = options->genetic_code;
          ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
          
          query_bsp_array[num_bsps++] = query_bsp;
          
          total_length += query_bsp->length;
          if (total_length > myargs[ARG_MAXQUERY].intvalue || 
          num_bsps >= MAX_NUM_QUERIES) {
         done = FALSE;
         break;
          }
       }

           if (num_bsps == 0)
               break;

       SeqMgrHoldIndexing(FALSE);
       other_returns = NULL;
       error_returns = NULL;
       
       if (out_type==MBLAST_ENDPOINTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0, 
                             MegaBlastPrintEndpoints);
       else if (out_type==MBLAST_SEGMENTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                             MegaBlastPrintSegments);
       else if (out_type==MBLAST_ALIGN_INFO) {
              /* -- Geo mod: do not print header
              PrintTabularOutputHeader(blast_database, 
                                       (num_bsps==1) ? query_bsp_array[0] : NULL,
                                       NULL, "megablast", 0, believe_query,
                                       global_fp);*/
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                                MegaBlastPrintAlignInfo);
       } else if (out_type==MBLAST_ALIGNMENTS) {
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                  blast_database, options, &other_returns, 
                                  &error_returns, align_view < 7 ? tick_callback : NULL,
                                  NULL, NULL, 0, NULL);
          }
       
#ifdef OS_UNIX
       fflush(global_fp);
#endif

       if (error_returns) {
             BlastErrorPrint(error_returns);
              for (vnp = error_returns; vnp; vnp = vnp->next) {
                 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
              }
              ValNodeFree(error_returns);
           }
              
              
       if (traditional_formatting) {
          dbinfo = NULL;
          ka_params = NULL;
          ka_params_gap = NULL;
          params_buffer = NULL;
          mask_loc = NULL;
          matrix = NULL;
          for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXDBINFO:
              dbinfo = vnp->data.ptrvalue;
              break;
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
          }    
          
#ifdef OS_UNIX
        if(align_view < 7) {
                 fprintf(global_fp, "%s\n", " done");
              }
#endif
          
        if (myargs[ARG_MASKEDQUERY].strvalue) {
                 if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL)
                    ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n",
                              myargs[ARG_MASKEDQUERY].strvalue);
              }

        hits_found = FALSE;

        mask_loc_start = next_mask_loc = mask_loc;
        mask_loc = NULL;

        if (align_view == 7) {
           mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, 
                             options, query_bsp_array[0], 0);
           }

        if (seqalign_array) { //results returned back for processing
             ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE);
             for (index=0; index<num_bsps; index++) {
                    seqalign = seqalign_array[index];
                    if (next_mask_loc && 
                        SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), 
                                  query_bsp_array[index]->id) == SIC_YES) {
                       mask_loc = (SeqLocPtr) 
                       MemDup(next_mask_loc, sizeof(SeqLoc));
                       next_mask_loc = next_mask_loc->next;
                       mask_loc->next = NULL;
                    }
                    if (mqfp) {
                       /* convert mask locations from all sources into
                          a single seqloc */
                       mask_slp = NULL;
                       if (mask_loc) 
                          mask_slp = blastMergeFilterLocs(mask_slp, 
                              (SeqLocPtr)mask_loc->data.ptrvalue,
                              FALSE, 0, 0);
                       PrintMaskedSequence(query_bsp_array[index], mask_slp,
                                           mqfp, 50, lcase_masking);
                       SeqLocSetFree(mask_slp);
                       }
                    if (seqalign==NULL) {
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    hits_found = TRUE;
                    if (align_view < 7) {
                       init_buff_ex(70);
                       AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, 
                                             believe_query, html);
                       free_buff();
                       }
                    if (align_view == 8 || align_view == 9) {
                       if (align_view == 9)
                          PrintTabularOutputHeader(blast_database, 
                             query_bsp_array[index], NULL, blast_program, 0,
                             believe_query, global_fp);
                       /* debug:
                       char qbuf[512];
                       strcpy(qbuf, BioseqGetTitle(query_bsp_array[index]));
                       fprintf(stderr, "---> Here: query title=%s\n", qbuf);
                       */
                       BlastPrintTabulatedResults(seqalign, 
                           query_bsp_array[index], NULL, number_of_alignments,
                            blast_program, !options->gapped_calculation, 
                            believe_query, 0, 0, 
                            global_fp, (align_view == 9));
                            

                       ObjMgrFreeCache(0);

                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    } 
                       //Geo mod:   
                   else if (align_view>=12)  {
                        MGBlastPrintTab(seqalign, 
                            query_bsp_array[index], number_of_alignments,
                            !options->gapped_calculation, 
                            global_fp);
                        ObjMgrFreeCache(0);

                        SeqAlignSetFree(seqalign);
                        mask_loc = MemFree(mask_loc);
                        continue;
                        }
                    else if(align_view == 7) {
                       IterationPtr iterp;

                       iterp = BXMLBuildOneQueryIteration(seqalign, 
                                  NULL, FALSE, 
                                  !options->gapped_calculation, index, 
                                  NULL, query_bsp_array[index], mask_loc);
                       IterationAsnWrite(iterp, mbxp->aip, mbxp->atp);
                       AsnIoFlush(mbxp->aip);
                       IterationFree(iterp);
                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    seqannot = SeqAnnotNew();
                    seqannot->type = 2;
                    AddAlignInfoToSeqAnnot(seqannot, align_type);
                    seqannot->data = seqalign;
                    if (aip) {
                       SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
                       AsnIoReset(aip);
                    }
                    if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL);
                       ObjMgrSetHold();
                       init_buff_ex(85);
                       PrintDefLinesFromSeqAlign(prune->sap, 80,
                                                 outfp, print_options, FIRST_PASS, NULL);
                       free_buff();
                       
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune);
                       seqannot->data = prune->sap;
                       if (align_view != 0)
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL,
                                                 NULL, align_options, NULL, 
                                                 mask_loc, NULL);
                       else
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc);
                       seqannot->data = seqalign;
                       prune = BlastPruneSapStructDestruct(prune);
                       ObjMgrClearHold();
                       ObjMgrFreeCache(0);
                    }
                    seqannot = SeqAnnotFree(seqannot);
                    mask_loc = MemFree(mask_loc);
                 } /* End loop on seqaligns for different queries */
                 ReadDBBioseqFetchDisable();
              } 

              if (mbxp != NULL) {
                 MBXmlClose(mbxp, other_returns, !options->gapped_calculation);
              }

              if (mqfp)
                 FileClose(mqfp);

              if (!hits_found && align_view < 7)
                 fprintf(outfp, "\n\n ***** No hits found ******\n\n");

              matrix = BLAST_MatrixDestruct(matrix);
          
              if(html) 
                 fprintf(outfp, "<PRE>\n");
              init_buff_ex(85);
              dbinfo_head = dbinfo;
              if(align_view < 7) {
                 while (dbinfo) {
                    PrintDbReport(dbinfo, 70, outfp);
                    dbinfo = dbinfo->next;
                 }
              }
              dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
              
              if (ka_params) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
                 MemFree(ka_params);
              }
              if (ka_params_gap) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
                 MemFree(ka_params_gap);
              }
              if(align_view < 7)
                 PrintTildeSepLines(params_buffer, 70, outfp);
              MemFree(params_buffer);
              free_buff();
              mask_loc = mask_loc_start;
              while (mask_loc) {
                 SeqLocSetFree(mask_loc->data.ptrvalue);
                 mask_loc = mask_loc->next;
              }
              ValNodeFree(mask_loc_start);
       } else { //not traditional formatting
          /* Just destruct all other_returns parts */
          for (vnp=other_returns; vnp; vnp = vnp->next) {
         switch (vnp->choice) {
         case TXDBINFO:
            TxDfDbInfoDestruct(vnp->data.ptrvalue);
            break;
         case TXKABLK_NOGAP:
         case TXKABLK_GAP:
         case TXPARAMETERS:
            MemFree(vnp->data.ptrvalue);
            break;
         case TXMATRIX:
            BLAST_MatrixDestruct(vnp->data.ptrvalue);
            break;
         case SEQLOC_MASKING_NOTSET:
         case SEQLOC_MASKING_PLUS1:
         case SEQLOC_MASKING_PLUS2:
         case SEQLOC_MASKING_PLUS3:
         case SEQLOC_MASKING_MINUS1:
         case SEQLOC_MASKING_MINUS2:
         case SEQLOC_MASKING_MINUS3:
                    mask_loc = vnp->data.ptrvalue;
                    SeqLocSetFree(mask_loc);
         default:
            break;
         }
          }
       }
       other_returns = ValNodeFree(other_returns);
       MemFree(seqalign_array);
           options->query_lcase_mask = 
              SeqLocSetFree(options->query_lcase_mask);

       /* Freeing SeqEntries can be very expensive, do this only if 
          this is not the last iteration of search */
       if (!done) { 
          for (index=0; index<num_bsps; index++) {
         sepp[index] = SeqEntryFree(sepp[index]);
         query_bsp_array[index] = NULL;
          }       
           }
           total_processed += num_bsps;
    } /* End of loop on complete searches */
        
        aip = AsnIoClose(aip);

        /*if (align_view == 7)
          xml_aip = AsnIoClose(xml_aip);*/

        if (align_view < 7 && html) 
           fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
        if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
           fprintf(outfp, "Mega BLAST run finished, processed %d queries\n",
                   total_processed);
    MemFree(query_bsp_array);
    MemFree(sepp);
    MemFree(qgaps_buf);
    MemFree(dbgaps_buf);
    options = BLASTOptionDelete(options);
    FileClose(infp);
        FileClose(outfp);
    
    return 0;
}
Esempio n. 19
0
Int2 Main_old (void)
 
{
	
	AsnIoPtr aip;
	BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, 
                  subject_bsp = NULL;
        BioseqPtr bsp1, bsp2;
	BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
	BLAST_OptionsBlkPtr options=NULL;
	Boolean seq1_is_na, seq2_is_na;
	CharPtr params_buffer=NULL;
        DbtagPtr        dbtagptr;
	Uint1 align_type;
	Uint4 align_options;
	SeqAlignPtr  seqalign;
        SeqAnnotPtr seqannot;
	SeqEntryPtr sep = NULL, sep1 = NULL;
	CharPtr program_name, blast_outputfile;
	FILE *outfp;
	ValNodePtr  mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
	BLAST_MatrixPtr matrix;
        Int4Ptr PNTR txmatrix;
        int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
        Boolean entrez_lookup = FALSE;
        Boolean html, seqannot_output, believe_query;
        Uint1 tabular_output;
        Boolean gapped_calculation;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        html = (Boolean) myargs[ARG_HTML].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);

        blast_outputfile = myargs [ARG_OUT].strvalue;

	program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
	if (StringCmp(program_name, "blastn") && 
	    StringCmp(program_name, "blastp") && 
	    StringCmp(program_name, "blastx") && 
	    StringCmp(program_name, "tblastn") && 
	    StringCmp(program_name, "tblastx")) {
		ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
		return (1);
	}
	   
	align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);

	if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
	{
		ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
		return (1);
	}

        gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
        believe_query = (seqannot_output || entrez_lookup); 

        options = BLASTOptionNewEx(program_name, gapped_calculation,
                                   (Boolean) myargs[ARG_USEMEGABLAST].intvalue);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &(options->query_lcase_mask), 
                                believe_query) == FALSE)
        {
            ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
            return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else {
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; 


    	if (myargs[ARG_SEARCHSP].floatvalue)
           options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;


	options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
	options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;

        if (StringICmp("blastn", program_name) == 0)
        {
                options->penalty = myargs[ARG_MISMATCH].intvalue;
                options->reward = myargs[ARG_MATCH].intvalue;
        }

	options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;

	options->discontinuous = FALSE;

        if (myargs[ARG_XDROP].intvalue != 0)
	{
               options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
	}
        if (myargs[ARG_WORDSIZE].intvalue != 0)
               options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;

	if (options->is_megablast_search) {
	   options->cutoff_s2 = options->wordsize*options->reward;
        }
	options->matrix = MemFree(options->matrix);
        BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 

        if (myargs[ARG_GAPOPEN].intvalue != -1)
              options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue != -1)
               options->gap_extend = myargs[ARG_GAPEXT].intvalue;

	options->strand_option = myargs[ARG_STRAND].intvalue;

        /* Input longest intron length is in nucleotide scale; in the lower 
           level code it will be used in protein scale */
        if (myargs[ARG_INTRON].intvalue > 0) 
           options->longest_intron = myargs[ARG_INTRON].intvalue;


        if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
           seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, 
              options, &other_returns, &error_returns, handle_results);
        } else {
            SeqLocPtr slp1=NULL, slp2=NULL;
            if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
                return 1;
           seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
           SeqLocFree(slp1);
           SeqLocFree(slp2);
        }

        if (error_returns) {
           BlastErrorPrint(error_returns);
           for (vnp = error_returns; vnp; vnp = vnp->next) {
              BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
           }
           ValNodeFree(error_returns);
        }
       
        ka_params = NULL;
        ka_params_gap = NULL;
        params_buffer = NULL;
        mask_loc = NULL;
        matrix = NULL;
        txmatrix = NULL;
        for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              if (matrix && !tabular_output)
                 txmatrix = BlastMatrixToTxMatrix(matrix);
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
        }	
        if (!tabular_output || seqannot_output) {
           align_options = 0;
           align_options += TXALIGN_MATRIX_VAL;
           align_options += TXALIGN_SHOW_QS;
           align_options += TXALIGN_COMPRESS;
           align_options += TXALIGN_END_NUM;
           if (StringICmp("blastx", program_name) == 0) {
              align_options += TXALIGN_BLASTX_SPECIAL;
           }
           
           if (html)
              align_options += TXALIGN_HTML;

           seqannot = SeqAnnotNew();
           seqannot->type = 2;
           AddAlignInfoToSeqAnnot(seqannot, align_type);
           seqannot->data = seqalign;
           aip = NULL;
           if (seqannot_output)
              aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
           
           if (aip && seqannot) {
              SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
              AsnIoReset(aip);
              aip = AsnIoClose(aip);
           }
        }
        if (!tabular_output) {    
           AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
           ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
           
           seqannot = SeqAnnotFree(seqannot);
           if (txmatrix)
              txmatrix = TxMatrixDestruct(txmatrix);
           init_buff_ex(85);
        
           if (ka_params) {
              PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
           }
        
           if (ka_params_gap) {
              PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
           }
        
           PrintTildeSepLines(params_buffer, 70, outfp);
           free_buff();
        } else {
           PrintTabularOutputHeader(NULL, query_bsp, NULL, 
              program_name, 0, believe_query, outfp);

           BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 
              1, program_name, !gapped_calculation,
              believe_query, 0, 0, outfp, FALSE);
           SeqAlignSetFree(seqalign);
        }

        matrix = BLAST_MatrixDestruct(matrix);
        MemFree(ka_params);
        MemFree(ka_params_gap);
        MemFree(params_buffer);
    
        mask_loc_start = mask_loc;
        while (mask_loc) {
           SeqLocSetFree(mask_loc->data.ptrvalue);
           mask_loc = mask_loc->next;
        }
        ValNodeFree(mask_loc_start);
        
        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);

        other_returns = ValNodeFree(other_returns);
    options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
	options = BLASTOptionDelete(options);
	MemFree(program_name);
	FileClose(outfp);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }
	return 0;
}
Esempio n. 20
0
char *readASNSeq(const short whichEntry, const char *filename,
                const long skiplines,
                const short format,     /* note: this is kASNseqentry or kASNseqset */
                long *seqlen, short *nseq,
                short *error, char **seqid )
{
  AsnIoPtr aip = NULL;
  SeqEntryPtr the_set;
  AsnTypePtr atp, atp2;
  AsnModulePtr amp;
  Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
  char  *seq, stemp[200];
  int   i, count;

  *seqlen= 0;
  *nseq= 0;
  *error= 0;
  seq= NULL;

/*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/
    /* asn dictionary setups */
  if (! SeqEntryLoad()) goto errxit; /*  sequence alphabets (and sequence parse trees) */
  amp = AsnAllModPtr();   /* get pointer to all loaded ASN.1 modules */
  if (amp == NULL) goto errxit;
  atp = AsnFind("Bioseq-set");    /* get the initial type pointers */
  if (atp == NULL) goto errxit;
  atp2 = AsnFind("Bioseq-set.seq-set.E");
  if (atp2 == NULL) goto errxit;

      /* open the ASN.1 input file in the right mode */
/*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/
  if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
  for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp);  /* this may mess up asn routines... */

  if (! ErrSetLog ("stderr"))  goto errxit;
  else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON);    /*??  log errors instead of die */

  seq= NULL;
  if (format == kASNseqentry) {  /* read one Seq-entry */
/*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/
    the_set = SeqEntryAsnRead(aip, NULL);
    SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
    SeqEntryFree(the_set);
    goto goodexit;
    }

  else   {                   /* read Seq-entry's from a Bioseq-set */
    count = 0;
/*fprintf(stderr,"readASNseq: AsnReadId\n");*/
    while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
      if (atp == atp2)  {  /* top level Seq-entry */
        the_set = SeqEntryAsnRead(aip, atp);
        SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
        SeqEntryFree(the_set);
        if (*nseq >= whichEntry) goto goodexit;
        }
      else
        AsnReadVal(aip, atp, NULL);
      count++;
      }
    }

goodexit:
  AsnIoClose(aip);
  *error= 0;
  return seq;

errxit:
  AsnIoClose(aip);
  *error= eASNerr;
  if (seq) free(seq);
  return NULL;
}
Esempio n. 21
0
Int2 Main(void)
{
	AsnIoPtr aip;
	FILE * aa = NULL, * na = NULL, * ql = NULL;
	SeqEntryPtr sep;
	SeqSubmitPtr ssp;
	AsnTypePtr atp, atp2;
	AsnModulePtr amp;
	Uint1 group_segs = 0;
	Boolean limit_to_genbank,
		make_dna,
		make_protein,
		make_quality,
		far_quality,
		do_it;
	

					/* check command line arguments */

	if ( ! GetArgs("asn2fast",NUMARG, myargs))
		return 1;

					/* load the sequence alphabets  */
					/* (and sequence parse trees)   */
	if (! SeqEntryLoad())
	{
		ErrShow();
		return 1;
	}
				    /* get pointer to all loaded ASN.1 modules */
	amp = AsnAllModPtr();
	if (amp == NULL)
	{
		ErrShow();
		return 1;
	}

	if (myargs[11].intvalue) {
		if (! SubmitAsnLoad())
			Message(MSG_FATAL, "Unable to load parse trees.");
		
		atp2 = AsnFind("Seq-submit");
		if (atp2 == NULL)
			Message(MSG_FATAL, "Unable to find Seq-submit");
		atp = AsnFind("Seq-submit");
		if (atp == NULL)
			Message(MSG_FATAL, "Unable to find Seq-submit");

	} else {
		atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
		if (atp == NULL)
		{
			ErrShow();
			return 1;
		}
	
		atp2 = AsnFind("Bioseq-set.seq-set.E");
		if (atp2 == NULL)
		{
			ErrShow();
			return 1;
		}
	}

	make_protein = (Boolean)(myargs[7].intvalue);
	make_dna = (Boolean)(myargs[8].intvalue);
	make_quality = (Boolean)(myargs[12].intvalue);
	far_quality = (Boolean)(myargs[14].intvalue);

					/* open the ASN.1 input file in the right mode */

	if ((aip = AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r"))
          == NULL)
	{
		ErrShow();
		return 1;
	}

				  				/* open the output file */

	if ((myargs[3].strvalue != NULL) && (make_protein))
	{
		if ( (aa = FileOpen (myargs[3].strvalue, "w")) == NULL)
		{
			ErrShow();
			return 1;
		}
	}

	if ((myargs[4].strvalue != NULL) && (make_dna))
	{
		if ( (na = FileOpen (myargs[4].strvalue, "w")) == NULL)
		{
			ErrShow();
			return 1;
		}
	}

	if ((myargs[13].strvalue != NULL) && (make_quality))
	{
		if ( (ql = FileOpen (myargs[13].strvalue, "w")) == NULL)
		{
			ErrShow();
			return 1;
		}
	}

                                /* log errors instead of die */
    if (myargs[5].strvalue != NULL)
    {
        if (! ErrSetLog (myargs[5].strvalue))
            ErrShow();
        else
            ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
   }

	if (myargs[6].intvalue)  /* combine segmented seqs */
	{
		group_segs = 1;
		if (myargs[10].intvalue)
			group_segs = 3;       /* and instantiate virtuals */
	}

	limit_to_genbank = (Boolean)(myargs[9].intvalue);

	if (myargs [15].intvalue) {
		ID1BioseqFetchEnable ("asn2fast", FALSE);
	}
	if (myargs [16].intvalue) {
		LocalSeqFetchInit (FALSE);
	}

	if ( myargs[1].intvalue)   /* read one Seq-entry */
	{

		sep = SeqEntryAsnRead(aip, NULL);
		do_it = TRUE;
		if (limit_to_genbank)
			do_it = CheckIsGenBank(sep);
		if (do_it)
		{
			if (make_protein)
				SeqEntrysToFasta(sep, aa, FALSE, group_segs);
			if (make_dna)
				SeqEntrysToFasta(sep, na, TRUE, group_segs);
			if (make_quality) {
				if (far_quality) {
					SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
				} else {
					SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
				}
			}
		}
		SeqEntryFree(sep);
	}
	else if ( myargs[11].intvalue)   /* read Seq-submit's */
	{
		while ((atp = AsnReadId(aip, amp, atp)) != NULL)
		{
			if (atp == atp2)    /* top level Seq-entry */
			{
				ssp = SeqSubmitAsnRead(aip, atp);
				if (ssp->datatype == 1)
				{
					sep = (SeqEntryPtr) ssp->data;
					do_it = TRUE;
					if (limit_to_genbank)
						do_it = CheckIsGenBank(sep);
					if (do_it)
					{
						if (make_protein)
							SeqEntrysToFasta(sep, aa, FALSE, group_segs);
						if (make_dna)
							SeqEntrysToFasta(sep, na, TRUE, group_segs);
						if (make_quality) {
							if (far_quality) {
								SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
							} else {
								SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
							}
						}
					}
				}
				SeqSubmitFree(ssp);
			}
			else
			{
				AsnReadVal(aip, atp, NULL);
			}
		}
	}
	else                      /* read Seq-entry's from a Bioseq-set */
	{
		while ((atp = AsnReadId(aip, amp, atp)) != NULL)
		{
			if (atp == atp2)    /* top level Seq-entry */
			{
				sep = SeqEntryAsnRead(aip, atp);
				do_it = TRUE;
				if (limit_to_genbank)
					do_it = CheckIsGenBank(sep);
				if (do_it)
				{
					if (make_protein)
						SeqEntrysToFasta(sep, aa, FALSE, group_segs);
					if (make_dna)
						SeqEntrysToFasta(sep, na, TRUE, group_segs);
					if (make_quality) {
						if (far_quality) {
							SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
						} else {
							SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
						}
					}
				}
				SeqEntryFree(sep);
			}
			else
			{
				AsnReadVal(aip, atp, NULL);
			}
		}
	}

	AsnIoClose(aip);
	if (make_protein)
		FileClose(aa);
	if (make_dna)
		FileClose(na);
	if (make_quality)
		FileClose (ql);

	if (myargs [16].intvalue) {
		LocalSeqFetchDisable ();
	}
	if (myargs [15].intvalue) {
		ID1BioseqFetchDisable ();
	}

	return(0);
}
Esempio n. 22
0
Int2 Main(void)
{
	AsnIoPtr       aip;
	SeqEntryPtr    sep;
	BioseqPtr PNTR seqlist;
	Int4           seqnum, i, numseg, lens[10], j;
	Int2           ctr;
	SeqPortPtr     spp;
	Uint1          residue;
	FILE*          fp;
	CharPtr        title;
	Char           buffer[101];
	MonitorPtr     mon;

						/* check command line arguments */

	if ( ! GetArgs("SeqTest",NUMARG, myargs))
		return 1;

	mon = MonitorStrNew("SeqTest", 40);
	SetProgMon(StdProgMon, (Pointer)mon);

	/*
	** Load SeqEntry object loader and sequence alphabets
	*/

	if (! SeqEntryLoad()) {
		Message(MSG_ERROR, "SeqEntryLoad failed");
		return 1;
	}

	/*
	** Use the file "example.prt" as the ASN I/O stream.  This file
	** can be found in the ncbi/demo.  It is in ASN.1 Print Value format.
	*/

	if ((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL)
		return 1;

	/*
	** Write the output to "seqtest.out".
	*/

	fp = FileOpen(myargs[1].strvalue, "w");
	fprintf(fp, "Sequence summary:\n\n");

	/*
	** Read in the whole entry into the Sequence Entry Pointer, sep.
	** Close the ASN stream, which in turn closes the input file.
	*/

	sep = SeqEntryAsnRead(aip, NULL);
	aip = AsnIoClose(aip);

	mon = MonitorFree(mon);
	SetProgMon(NULL, NULL);

	/*
	** Determine how many Bioseqs are in this SeqEntry. Allocate
	** enough memory to hold a list of pointer to all of these
	** Bioseqs.  Invoke an Explore function to "visit"each Bioseq.
	** We are allowed to pass one pointer for use by the exploring
	** function, in this case, "BuildList".
	*/

	seqnum = BioseqCount(sep);
	seqlist = MemNew((size_t)(seqnum * sizeof(BioseqPtr)));
	BioseqExplore(sep, (Pointer) seqlist, BuildList);

	/*
	** For each Bioseq in the SeqEntry write out it's title
	** len, number of gaps, and number of segments. Write out
	** the length of each segment, up to 10.
	*/

	for(i = 0; i < seqnum; i++) {
		numseg = BioseqCountSegs(seqlist[i]);
		title = BioseqGetTitle(seqlist[i]);
		FilePuts((VoidPtr)title, fp);
		FilePuts("\n", fp);
		fprintf(fp, "len=%ld gaps=%ld segs=%ld\n", BioseqGetLen(seqlist[i]),
		BioseqGetGaps(seqlist[i]), numseg);
		if ((numseg > 1) && (numseg <= 10)) {
			BioseqGetSegLens (seqlist[i], lens);
			for (j = 0; j < numseg; j++)
				fprintf(fp, "  len = %ld\n", lens[j]);
		}
		FilePuts("\n", fp);
	}

	spp = SeqPortNew(seqlist[0], 0, -1, 0, Seq_code_iupacna);
	if (spp == NULL)
		Message(MSG_ERROR, "fail on SeqPortNew");

	fprintf(fp, "SeqPort: plus strand with SeqPortGetResidue\n\n");

	i = 0;
	while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
		if (! IS_residue(residue)) {
			buffer[i] = '\0';
			fprintf(fp, "%s\n", buffer);
			i = 0;
			switch (residue)
			{
				case SEQPORT_VIRT:
					fprintf(fp, "[Gap]\n");
					break;
				case SEQPORT_EOS:
					fprintf(fp, "[EOS]\n");
					break;
				default:
					fprintf(fp, "[Invalid Residue]\n");
					break;
			}

		}
		else {
			buffer[i] = residue;
			i++;
			if (i == 60) {
				buffer[i] = '\0';
				fprintf(fp, "%s\n", buffer);
				i = 0;
			}
		}
	}

	if (i) {
		buffer[i] = '\0';
		fprintf(fp, "%s\n", buffer);
	}

	fprintf(fp, "[EOF]\n");
	SeqPortFree(spp);

	fprintf(fp, "\nSeqPort on minus with SeqPortRead\n\n");
	spp = SeqPortNew(seqlist[0], 0, -1, Seq_strand_minus, Seq_code_iupacna);

	if (spp == NULL)
		Message(MSG_ERROR, "fail on SeqPortNew");

	do {
		ctr = SeqPortRead(spp, (Uint1Ptr)buffer, 60);
		if (ctr > 0) {
			buffer[ctr] = '\0';
			fprintf(fp, "%s\n", buffer);
		} else {
			ctr *= -1;
			switch (ctr)
			{
				case SEQPORT_VIRT:
					fprintf(fp, "[Gap]\n");
					break;
				case SEQPORT_EOS:
					fprintf(fp, "[EOS]\n");
					break;
				case SEQPORT_EOF:
					fprintf(fp, "[EOF]\n");
					break;
				default:
					fprintf(fp, "[Invalid Residue]\n");
					break;
			}
		}
	} while (ctr != SEQPORT_EOF);

	SeqPortFree(spp);

	/*
	** Write out the nucleic acid sequences in this SeqEntry
	*/

	fprintf(fp, "\nNucleic Acids in FASTA format:\n\n");
	SeqEntryToFasta(sep, fp, TRUE);

	/*
	** Write out the protein sequences in this SeqEntry.
	*/

	fprintf(fp, "\nProteins in FASTA format:\n\n");
	SeqEntryToFasta(sep, fp, FALSE);

	/*
	** Close the output file and free up allocated space.
	*/

	fclose(fp);
	MemFree(seqlist);
	SeqEntryFree(sep);

	return 0;
}