Exemplo n.º 1
0
void 
Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, 
                      Boolean accession_only, Boolean search_for_id)
{
   char* seqid_buffer = NULL;
   Int4 gi = 0;
   Boolean numeric_id_type = FALSE;

   *buffer_ptr = NULL;

   if (sip == NULL)
	return;

   /* Check for ad hoc ID's generated by formatdb if the user does not provide 
      any. */
   if (search_for_id && (sip->choice != SEQID_GENERAL ||
       StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID")))  
   {
      if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) {
         seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else if (accession_only) {
         seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, 
                    PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH);
      } else if (ncbi_gi) {
         numeric_id_type = 
            GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                  &gi, &seqid_buffer);
      } else {
         numeric_id_type = 
	    GetAccessionFromSeqId(SeqIdFindBestAccession(sip), 
                                  &gi, &seqid_buffer);
      }
   }

   if (numeric_id_type && gi > 0) {
      seqid_buffer = (char*) malloc(16);
      sprintf(seqid_buffer, "%ld", (long) gi);
   }   
   if (!seqid_buffer) {
      /* If it's still NULL make a last ditch effort to get info. */
      char* title=NULL;
      Bioseq* bsp = BioseqLockById(sip);
      if (bsp) {
         if (BioseqGetTitle(bsp) != NULL)
            title = strdup(BioseqGetTitle(bsp));
         else
            title = strdup("No definition line found");
      }
      BioseqUnlock(bsp);
      
      if (title) /* Use first token as id. */
         seqid_buffer = StringTokMT(title, " \t\n\r", &title);  
   }
   *buffer_ptr = seqid_buffer;

}
Exemplo n.º 2
0
static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi)
{
  SeqDescrPtr       sdp;
  SeqMgrDescContext dcontext;
  Char              id_txt[255], id_txt2[255];
  SeqIdPtr          sip, sip_gi = NULL, sip_gb = NULL;
  ValNodePtr        line_list = NULL, line_values;

  if (bsp == NULL) {
    return NULL;
  }

  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    if (sip->choice == SEQID_GENBANK
        || (sip->choice == SEQID_EMBL && sip_gb == NULL)
        || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
        || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
        || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
      sip_gb = sip;
    } else if (sip->choice == SEQID_GI) {
      sip_gi = sip;
    }
  }

  if (sip_gb == NULL && sip_gi == NULL) {
    SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    id_txt2[0] = 0;
  } else {
    if (sip_gb == NULL) {
      id_txt[0] = 0;
    } else {
      SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    }
    if (sip_gi == NULL) {
      id_txt2[0] = 0;
    } else {
      SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
    }
  }

  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
       sdp != NULL;
       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
    line_values = NULL;
    ValNodeAddPointer (&line_values, 0, StringSave (id_txt));
    if (want_gi) {
      ValNodeAddPointer (&line_values, 0, StringSave (id_txt2));
    }
    ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list));
    ValNodeAddPointer (&line_list, 0, line_values);
  }
  return line_list;
}
Exemplo n.º 3
0
static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi)
{
  SeqDescrPtr       sdp;
  SeqMgrDescContext dcontext;
  Char              id_txt[255], id_txt2[255];
  SeqIdPtr          sip, sip_gi = NULL, sip_gb = NULL;

  if (fp == NULL || bsp == NULL) {
    return;
  }

  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    if (sip->choice == SEQID_GENBANK
        || (sip->choice == SEQID_EMBL && sip_gb == NULL)
        || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
        || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
        || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
      sip_gb = sip;
    } else if (sip->choice == SEQID_GI) {
      sip_gi = sip;
    }
  }

  if (sip_gb == NULL && sip_gi == NULL) {
    SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    id_txt2[0] = 0;
  } else {
    if (sip_gb == NULL) {
      id_txt[0] = 0;
    } else {
      SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    }
    if (sip_gi == NULL) {
      id_txt2[0] = 0;
    } else {
      SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
    }
  }

  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
       sdp != NULL;
       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
    if (want_gi) {
      fprintf (fp, "%s\t%s", id_txt, id_txt2);
    } else {
      fprintf (fp, "%s", id_txt);
    }
    PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
    fprintf (fp, "\n");
  }
}
Exemplo n.º 4
0
static Boolean UpdateContigReadId (TContigReadPtr read, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
{
  SeqIdPairPtr pair;
  SeqIdPtr     sip_find;
  Char         id_buf[255];
  Boolean      rval = TRUE;

  if (read == NULL || StringHasNoText (read->read_id)) {
    rval = FALSE;
  } else {
    sip_find = MakeSeqID (read->read_id);
    pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
    if (pair != NULL && (no_lookup || OkToReplaceId (pair, read->read_seq, has_errors))) {
      if (pair->is_complement) {
        if (read->is_complement) {
          read->is_complement = FALSE;
        } else {
          read->is_complement = TRUE;
        }
      }
      if (pair->ti > 0) {
        read->ti = pair->ti;
      } else {
        if (pair->sip_replace->choice == SEQID_LOCAL) {
          SeqIdWrite (pair->sip_replace, id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
        } else {
          SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
        }
        if (is_srr) {
          if (read->srr != NULL) {
            free (read->srr);
          }
          read->srr = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
          sprintf (read->srr, "%s", id_buf);
          free (read->read_id);
          read->read_id = NULL;
        } else {
          free (read->read_id);
          read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
          sprintf (read->read_id, "%s", id_buf);
        }
      }
      read->local = FALSE;
    }
    sip_find = SeqIdFree (sip_find);
  }
  return rval;
}
Exemplo n.º 5
0
static SeqIdPairPtr FindReplacementInSeqIdReplaceList (SeqIdPtr sip, SeqIdReplaceListPtr pair_list)
{
  Int4         l, r, m;
  Char         buf_find[100];
  int          cmp;

  if (sip == NULL || pair_list == NULL) return NULL;

  SeqIdWrite (sip, buf_find, PRINTID_REPORT, sizeof (buf_find) - 1);
  l = 0;
  r = pair_list->num_ids - 1;
  m = (r + l) / 2;

  while ((cmp = StringICmp (buf_find, pair_list->list[m].buf_find)) != 0 && l <= r) {
    if (cmp < 0) {
      r = m - 1;
    } else {
      l = m + 1;
    }
    m = (r + l) / 2;
  }
  if (cmp == 0) {
    return pair_list->list + m;
  } else {
    return NULL;
  }
}
Exemplo n.º 6
0
/** Fills the Iteration ASN.1 structure, for part of the BLAST XML report
 * corresponding to one query.
 * @param seqalign Seq-align list with results [in]
 * @param sum_returns Search summary data [in]
 * @param is_ooframe Was out-of-frame gapping used in this search? [in]
 * @param ungapped Was this an ungapped search? [in]
 * @param iter_num Index of this "iteration" (query). [in]
 * @param message Error or warning message [in]
 * @param query Query Bioseq [in]
 * @param mask_loc List of masking locations [in]
 * @return Populated structure.
 */
static Iteration* 
s_XMLBuildOneQueryIteration(SeqAlign* seqalign,
                            Blast_SummaryReturn* sum_returns,
                            Boolean is_ooframe, Boolean ungapped,
                            Int4 iter_num, char* message,
                            Bioseq* query, ValNode* mask_loc)
{
    Iteration* iterp = IterationNew();
    iterp->iter_num = iter_num;

    if (query) {
       char buffer[1024];
       SeqIdWrite(query->id, buffer, PRINTID_FASTA_LONG, sizeof(buffer));
       iterp->query_ID = strdup(buffer);

       if(BioseqGetTitle(query) != NULL)
          iterp->query_def = strdup(BioseqGetTitle(query));
       else
          iterp->query_def = strdup("No definition line found");

       iterp->query_len = query->length;
    }

    if(seqalign != NULL) {
       iterp->hits =
          BXMLSeqAlignToHits(seqalign, ungapped, is_ooframe, mask_loc);
    }

    iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped);

    if (message)
        iterp->message = strdup(message);

    return iterp;
}
Exemplo n.º 7
0
static void ReportInvalidReplacement (SeqIdPtr sip, CharPtr reason, char *has_errors)
{
  Char         buf[128];

  SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
  PrintACEFormatErrorXMLStart (buf, has_errors);
  printf ("%s", reason);
  PrintACEFormatErrorXMLEnd ();
}
Exemplo n.º 8
0
static void DoSuggestIntervals (
  BioseqPtr bsp,
  Pointer userdata
)

{
  CharPtr        caret5, caret3;
  CSpeedFlagPtr  cfp;
  Char           id [64];
  SeqLocPtr      loc, slp;
  Boolean        partial5, partial3;
  SeqAnnotPtr    sap;
  SeqFeatPtr     sfp;
  SeqIdPtr       sip;
  Int4           start, stop;

  if (bsp == NULL) return;
  if (! ISA_aa (bsp->mol)) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return;

  sip = SeqIdFindBest (bsp->id, 0);
  if (sip == NULL) return;
  SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);

  sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode);
  if (sap == NULL) return;
  if (sap->type == 1) {
    sfp = (SeqFeatPtr) sap->data;
    if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
      loc = sfp->location;
      if (loc != NULL) {
        fprintf (cfp->ofp, "%s\n", id);
        slp = SeqLocFindNext (loc, NULL);
        while (slp != NULL) {
          start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1;
          stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1;
          caret5 = "";
          caret3 = "";
          CheckSeqLocForPartial (slp, &partial5, &partial3);
          if (partial5) {
            caret5 = "<";
          }
          if (partial3) {
            caret3 = ">";
          }
          fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
          slp = SeqLocFindNext (loc, slp);
        }
      }
    }
  }
  SeqAnnotFree (sap);
}
Exemplo n.º 9
0
static void VecScreen_AddToQueue (
  VQUEUE * queue,
  VecScreenResultProc resultproc,
  VecScreenAnnounceProc announceproc,
  Nlm_VoidPtr userdata,
  CONN conn,
  BioseqPtr bsp
)

{
  VQueuePtr       cqp;
  VQueuePtr PNTR  qptr;
  VQueuePtr       tmp;

  if (queue == NULL || resultproc == NULL || conn == NULL || bsp == NULL) return;

  /* allocate queue element */

  cqp = (VQueuePtr) MemNew (sizeof (VecScreenQueue));
  if (cqp == NULL) return;

  cqp->rid [0] = '\0';
  SeqIdWrite (bsp->id, cqp->seqid, PRINTID_FASTA_LONG, 40);
  cqp->estTime = 0;
  cqp->initialTime = GetSecs ();
  cqp->postedTime = cqp->initialTime;
  cqp->secondsToWait = 15;
  cqp->resultproc = resultproc;
  cqp->announceproc = announceproc;
  cqp->userdata = userdata;
  cqp->connqueue = NULL;
  cqp->done = FALSE;

  /* add to polling queue */

  qptr = (VQueuePtr PNTR) queue;
  if (qptr != NULL) {
    if (*qptr != NULL) {
      tmp = *qptr;
      if (tmp != NULL) {
        while (tmp->next != NULL) {
          tmp = tmp->next;
        }
        tmp->next = cqp;
      }
    } else {
      *qptr = cqp;
    }
  }

  /* queue the request for a rID */

  QUERY_AddToQueue (&(cqp->connqueue), conn, FirstVecScreenCallback, (Pointer) cqp, TRUE);
}
Exemplo n.º 10
0
extern CharPtr ErrorDescString (SeqIdPtr sip)
{
  SeqIdPtr    bestid;
  CharPtr     errbuf;

  bestid = SeqIdFindBest(sip, SEQID_GI);

  errbuf = (CharPtr) MemNew ((size_t) (sizeof (Char) * 32));
  SeqIdWrite (bestid, errbuf, PRINTID_FASTA_LONG, 32-1);

  return errbuf;
}
Exemplo n.º 11
0
NLM_EXTERN SeqIdReplaceListPtr ReadSeqIdPairListFromFile (FILE *fp)
{
  ReadBufferData rbd;
  CharPtr        linestring, cp, id2, buf = NULL;
  Int4           len, buf_len = 0;
  SeqIdPairPtr   pair;
  ValNodePtr     pair_list = NULL, last = NULL, vnp;
  SeqIdReplaceListPtr replace_list = NULL;
  
  if (fp == NULL) return NULL;

  rbd.fp = fp;
  rbd.current_data = NULL;

  linestring = AbstractReadFunction (&rbd);
  while (linestring != NULL && linestring[0] != EOF) {
    cp = linestring + StringSpn (linestring, " \t");
    if (*cp != 0) {
      len = StringCSpn (cp, " \t");
      id2 = cp + len + StringSpn (cp + len, " \t");
      if (*id2 != 0) {
        if (len + 1 > buf_len) {
          buf = MemFree (buf);
          buf_len = len + 1;
          buf = (CharPtr) MemNew (sizeof (Char) * buf_len);
        }
        StringNCpy (buf, cp, len);
        buf[len] = 0;
        pair = SeqIdPairNew ();
        pair->sip_find = MakeSeqID (buf);
        SeqIdWrite (pair->sip_find, pair->buf_find, PRINTID_REPORT, sizeof (pair->buf_find) - 1);
        pair->sip_replace = MakeSeqID (id2);
        vnp = ValNodeNew (NULL);
        vnp->data.ptrvalue = pair;
        if (last == NULL) {
          pair_list = vnp;
        } else {
          last->next = vnp;
        }
        last = vnp;
      }
    }
    free (linestring);
    linestring = AbstractReadFunction (&rbd);     
  }
  pair_list = ValNodeSort (pair_list, SortSeqIdPairList);

  replace_list = SeqIdReplaceListNew (pair_list);
  pair_list = SeqIdPairListFree (pair_list);

  return replace_list;
}
Exemplo n.º 12
0
static void DoVisitCodingRegions (
  SeqFeatPtr sfp,
  Pointer userdata
)

{
  BioseqPtr      bsp;
  CharPtr        caret5, caret3;
  CSpeedFlagPtr  cfp;
  Char           id [64];
  SeqLocPtr      loc, slp;
  Boolean        partial5, partial3;
  SeqIdPtr       sip;
  Int4           start, stop;

  if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL || cfp->ofp == NULL) return;

  loc = sfp->location;
  bsp = BioseqFindFromSeqLoc (loc);
  if (bsp == NULL) return;

  StringCpy (id, "?");
  if (sfp->product != NULL) {
    sip = SeqLocId (sfp->product);
    if (sip != NULL) {
      SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);
    }
  }

  fprintf (cfp->ofp, "%s\n", id);
  slp = SeqLocFindNext (loc, NULL);
  while (slp != NULL) {
    start = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1;
    stop = GetOffsetInBioseq (slp, bsp, SEQLOC_STOP) + 1;
    caret5 = "";
    caret3 = "";
    CheckSeqLocForPartial (slp, &partial5, &partial3);
    if (partial5) {
      caret5 = "<";
    }
    if (partial3) {
      caret3 = ">";
    }
    fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
    slp = SeqLocFindNext (loc, slp);
  }
}
Exemplo n.º 13
0
static void PrintGraphMessage (BioseqPtr bsp, ScanDataPtr sdp,
                               CharPtr prefix, CharPtr suffix)

{
  Char  buf [41];

  if (bsp == NULL || sdp == NULL || prefix == NULL) return;

  SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
  fprintf (sdp->fp, "QA - %s - %s", buf, prefix);
  if (! StringHasNoText (suffix)) {
    fprintf (sdp->fp, " - %s", suffix);
  }
  fprintf (sdp->fp, "\n");
}
Exemplo n.º 14
0
static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip)
{
  Char              id_txt[255];

  if (fp == NULL || sip == NULL) {
    return;
  }

  SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);

  if (sip->choice == SEQID_GI) {
    fprintf (fp, "\t%s\n", id_txt);
  } else {
    fprintf (fp, "%s\t\n", id_txt);
  }
}
Exemplo n.º 15
0
/*******************************************************************************

  Function : DDV_DrawSequenceName()
  
  Purpose : draw the name of the sequence (left column of the DDV panel)
  
  Parameters : GrData; graphical data (font size, etc)
				ScaleStyle;style of the ParaG scale
				top, left; coord to start the draw
  
  Return value : none

*******************************************************************************/
static void  DDV_DrawSequenceName(UnDViewerGraphDataPtr GrData,ParaGPtr pgp,
	Int2 top,Int2 left,Int4 cur_row,Int4 CurEditRow,Int4 CurMasterRow)
{
SeqIdPtr  sip = NULL;
RecT      rc;
Int2      x,y,decal=1,size;/*text position/size*/
Char      szAccess[21];
BioseqPtr bsp;

	/*get a name*/	
    bsp = BioseqLockById(pgp->sip);
    if(bsp) {
        sip = SeqIdFindBestAccession(bsp->id);
        BioseqUnlock(bsp);
    }
	if (!sip)
		sip = SeqIdFindBest(pgp->sip, 0);
	SeqIdWrite(sip, szAccess,PRINTID_TEXTID_ACCESSION, 20);   

	/*compute position*/
	if (pgp->ScaleStyle==SCALE_POS_TOP) decal++;
	
	/*draw name*/
	size=StringWidth(szAccess);
	x=left/*-GrData->udv_scale.cxLeftScale*/-size;
	y=top+decal*GrData->udv_font.LineHeight;
	MoveTo(x,y);
	if (cur_row==CurEditRow){
		Magenta();
	}
	PaintString (szAccess);
	if (cur_row==CurMasterRow){
		Blue();
		MoveTo(x,y);
		LineTo(x+size,y);
	}
	/*draw a little box (for selection a full sequence)*/
	left+=GrData->udv_font.cxChar;
	top+=GrData->udv_font.cxChar/2;
	LoadRect(&rc,left,top,left+GrData->udv_font.cxChar,
		top+GrData->udv_font.cxChar);	
	Blue();
	PaintOval(&rc);
	Black();
}
Exemplo n.º 16
0
static void DoNewFastaDefline (
  BioseqPtr bsp,
  Pointer userdata
)

{
  BioseqSetPtr   bssp;
  CSpeedFlagPtr  cfp;
  Char           id [128];
  CharPtr        title;

  if (bsp == NULL) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL) return;

  if (StringChr (cfp->skip, 's') != NULL) {
    if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
      bssp = (BioseqSetPtr) bsp->idx.parentptr;
      if (bssp != NULL) {
        if (bssp->_class == BioseqseqSet_class_segset ||
            bssp->_class == BioseqseqSet_class_parts) return;
      }
    }
  }
  if (StringChr (cfp->skip, 'v') != NULL) {
    if (bsp->repr == Seq_repr_virtual) return;
  }

  id [0] = '\0';
  SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
  title = NewCreateDefLine (NULL, bsp, FALSE, FALSE);
  if (StringHasNoText (title)) {
    title = StringSave ("?");
  }

  if (cfp->ofp != NULL) {
    fprintf (cfp->ofp, ">%s %s\n", id, title);
  }

  MemFree (title);
}
Exemplo n.º 17
0
static void WriteOneProteinWithProduct (BioseqPtr bsp, Pointer data)
{
    FastaExportOptionsPtr fe;
    SeqFeatPtr            prot;
    SeqMgrFeatContext     fcontext;
    Char                  id [128];

    if (bsp != NULL && ISA_aa (bsp->mol) && (fe = (FastaExportOptionsPtr) data) != NULL) {
        prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &fcontext);
        if (prot == NULL) {
            BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen,
                                 TRUE, FALSE, FALSE);
        } else {
            
            SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
            fprintf (fe->fp, ">%s [prot=%s]\n", id, fcontext.label);
            BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen,
                                 FALSE, FALSE, FALSE);
        }
    }
}
Exemplo n.º 18
0
static void PrintFeatureMessage (SeqFeatPtr sfp, ScanDataPtr sdp,
                                 CharPtr prefix, CharPtr suffix)

{
  BioseqPtr  bsp;
  Char       buf [41];

  if (sfp == NULL || sdp == NULL || prefix == NULL) return;

  bsp = BioseqFindFromSeqLoc (sfp->location);
  if (bsp != NULL) {
    SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
    fprintf (sdp->fp, "%s - %s", prefix, buf);
  } else {
    fprintf (sdp->fp, "%s - %s", prefix, sdp->buf);
  }
  if (! StringHasNoText (suffix)) {
    fprintf (sdp->fp, " - %s", suffix);
  }
  fprintf (sdp->fp, "\n");
}
Exemplo n.º 19
0
static void DoFastaComp (
  BioseqPtr bsp,
  Pointer userdata,
  Boolean ignoreExisting
)

{
  Char           buf [4096];
  CSpeedFlagPtr  cfp;
  Char           id [128];
  CharPtr        title;

  if (bsp == NULL) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL) return;

  id [0] = '\0';
  SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
  buf [0] = '\0';
  CreateDefLineExEx (NULL, bsp, buf, sizeof (buf) - 1, 0,
                     NULL, NULL, ignoreExisting, FALSE);
  title = NewCreateDefLine (NULL, bsp, ignoreExisting, FALSE);
  if (StringHasNoText (title)) {
    title = StringSave ("?");
  }

  if (StringCmp (buf, title) != 0) {
    if (cfp->ofp != NULL) {
      fprintf (cfp->ofp, "<  %s %s\n", id, buf);
      fprintf (cfp->ofp, ">  %s %s\n", id, title);
    }
    printf ("<  %s %s\n", id, buf);
    printf (">  %s %s\n", id, title);
    fflush (stdout);
  }

  MemFree (title);
}
Exemplo n.º 20
0
static Boolean ValidateContigReadId (TContigReadPtr read, char *has_errors)
{
  SeqIdPairData pair;
  Char          id_buf[255];
  Boolean       rval = TRUE;

  if (read == NULL || StringHasNoText (read->read_id)) {
    rval = FALSE;
  } else if (!read->local) {
    rval = TRUE;
  } else {
    pair.sip_find = NULL;
    pair.is_complement = FALSE;
    pair.is_consensus = FALSE;
    pair.trim3 = 0;
    pair.trim5 = 0;
    pair.sip_replace = MakeSeqID (read->read_id);
    pair.ti = 0;
    if (OkToReplaceId (&pair, read->read_seq, has_errors)) {
      if (pair.is_complement && !read->is_complement) {
        read->is_complement = TRUE;
      } else if (!pair.is_complement && read->is_complement) {
        read->is_complement = FALSE;
      }
      if (pair.ti > 0) {
        read->ti = pair.ti;
      } else {
        SeqIdWrite (pair.sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
        free (read->read_id);
        read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
        sprintf (read->read_id, "%s", id_buf);
      }
      read->local = FALSE;
    }
    pair.sip_replace = SeqIdFree (pair.sip_replace);
  }
  return rval;
}
Exemplo n.º 21
0
static void DoFastaDefline (
  BioseqPtr bsp,
  Pointer userdata
)

{
  Char           buf [4096];
  CSpeedFlagPtr  cfp;
  Char           id [128];

  if (bsp == NULL) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL) return;

  id [0] = '\0';
  SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
  buf [0] = '\0';
  CreateDefLine (NULL, bsp, buf, sizeof (buf) - 1, 0, NULL, NULL);

  if (cfp->ofp != NULL) {
    fprintf (cfp->ofp, ">%s %s\n", id, buf);
  }
}
Exemplo n.º 22
0
NLM_EXTERN Boolean UpdateContigIds (TContigPtr contig, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
{
  Int4 i;
  SeqIdPairPtr pair;
  SeqIdPtr     sip_find;
  Char         id_buf[255];
  Boolean      rval = TRUE;

  if (contig == NULL) return FALSE;
  if (pair_list == NULL) return TRUE;

  if (contig->consensus_id != NULL) {
    sip_find = MakeSeqID (contig->consensus_id);
    pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
    if (pair != NULL && (no_lookup || OkToReplaceId (pair, contig->consensus_seq, has_errors))) {
      if (pair->is_complement) {
        if (contig->is_complement) {
          contig->is_complement = FALSE;
        } else {
          contig->is_complement = TRUE;
        }
      }
      SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
      free (contig->consensus_id);
      contig->consensus_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
      sprintf (contig->consensus_id, "%s", id_buf);
    } else {
      rval = FALSE;
    }
    sip_find = SeqIdFree (sip_find);
  }
  for (i = 0; i < contig->num_reads; i++) {
    rval &= UpdateContigReadId (contig->reads[i], pair_list, no_lookup, is_srr, has_errors);
  }
  return rval;
}
Exemplo n.º 23
0
static void DoRecord (SeqEntryPtr sep, Pointer userdata)

{
  BioseqPtr     bsp;
  SeqEntryPtr   nsep;
  ScanDataPtr   sdp;

  sdp = (ScanDataPtr) userdata;
  (sdp->recordCount)++;

  nsep = FindNthBioseq (sep, 1);
  if (nsep != NULL && IS_Bioseq (nsep)) {
    bsp = (BioseqPtr) nsep->data.ptrvalue;
    if (bsp != NULL) {
      SeqIdWrite (bsp->id, sdp->buf, PRINTID_FASTA_LONG, sizeof (sdp->buf));
    }
  }
#ifdef OS_UNIX
  /* printf ("%s\n", sdp->buf); */
#endif

  VisitPubdescsInSep (sep, (Pointer) sdp, DoThesis);

  /* check for 'genomic DNA' in DoTitle suppressed for bulk submissions */

  sdp->bulk = FALSE;
  VisitDescriptorsInSep (sep, (Pointer) sdp, LookForBulk);
  VisitDescriptorsInSep (sep, (Pointer) sdp, DoTitle);

  VisitFeaturesInSep (sep, (Pointer) sdp, DoImpCDSandTrna);

  /* index for pseudo cds, impfeat peptides codon frame */

  SeqMgrIndexFeatures (0, sep->data.ptrvalue);

  VisitFeaturesInSep (sep, (Pointer) sdp, DoPseudoCDS);

  VisitFeaturesInSep (sep, (Pointer) sdp, DoPeptide);

  /* now cleanup, index for overlapping peptides */

  SeriousSeqEntryCleanup (sep, NULL, NULL);
  SeqMgrIndexFeatures (0, sep->data.ptrvalue);

  VisitBioseqsInSep (sep, (Pointer) sdp, DoProteins);

  /*
  VisitBioseqsInSep (sep, (Pointer) sdp, DoGraphs);
  */

#if 0
  {
    Boolean  hasUser = FALSE;

    VisitFeaturesInSep (sep, (Pointer) &hasUser, DoUser);

    if (hasUser && sdp->aop != NULL && sdp->atp_se != NULL) {
      SeqEntryAsnWrite (sep, sdp->aop, sdp->atp_se);
    }
  }
#endif
}
Exemplo n.º 24
0
static void DoProcess (
  SeqEntryPtr sep,
  Uint2 entityID,
  CSpeedFlagPtr cfp
)

{
  Char            id [64];
  ErrSev          oldErrSev;
  ValidStructPtr  vsp;

  if (sep == NULL || cfp == NULL) return;

  if (StringChr (cfp->clean, 't') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
  }
  if (StringChr (cfp->clean, 'a') != NULL) {
    AssignIDsInEntity (entityID, 0, NULL);
  }
  if (StringChr (cfp->clean, 'b') != NULL) {
    BasicSeqEntryCleanup (sep);
  }
  if (StringChr (cfp->clean, 's') != NULL) {
    SeriousSeqEntryCleanup (sep, NULL, NULL);
  }

  if (StringChr (cfp->index, 'f') != NULL) {
    SeqMgrIndexFeatures (entityID, 0);
  }

  if (StringChr (cfp->seq, 'c') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaExist);
  }
  if (StringChr (cfp->seq, 'C') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRegen);
  }
  if (StringChr (cfp->seq, 's') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
  }
  if (StringChr (cfp->seq, 'S') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
  }
  if (StringChr (cfp->seq, 'r') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRaw);
  }
  if (StringChr (cfp->seq, 'd') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'D') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'T') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
    SeqMgrIndexFeatures (entityID, 0);
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'x') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
  }
  if (StringChr (cfp->seq, 'X') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
    SeqMgrIndexFeatures (entityID, 0);
    VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
  }
  
  if (StringChr (cfp->seq, 'f') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaFeat);
  }
  if (StringChr (cfp->seq, 't') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaTrans);
  }

  if (StringChr (cfp->feat, 'v') != NULL) {
    VisitFeaturesInSep (sep, NULL, DoVisitFeaturesTest);
  }
  if (StringChr (cfp->feat, 'g') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapPrintTest);
  }
  if (StringChr (cfp->feat, 'h') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapSpeedTest);
  }
  if (StringChr (cfp->feat, 'x') != NULL) {
  }
  if (StringChr (cfp->feat, 'o') != NULL) {
  }
  if (StringChr (cfp->feat, 'd') != NULL) {
  }
  if (StringChr (cfp->feat, 't') != NULL) {
    SeqEntryToGnbk (sep, NULL, FTABLE_FMT, SEQUIN_MODE, NORMAL_STYLE,
                    0, 0, SHOW_PROT_FTABLE, NULL, cfp->ofp);
  }
  if (StringChr (cfp->feat, 's') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    cfp->nucbsp = FindNucBioseq (sep);
    if (cfp->nucbsp != NULL) {
      BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
      SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
      fprintf (cfp->ofp, "%s\n", id);
      VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
      cfp->nucbsp = NULL;
      cfp->genCode = 0;
    }
  }
  if (StringChr (cfp->feat, 'S') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    cfp->nucbsp = FindNucBioseq (sep);
    if (cfp->nucbsp != NULL) {
      BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
      SetBatchSuggestNucleotide (cfp->nucbsp, cfp->genCode);
      SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
      fprintf (cfp->ofp, "%s\n", id);
      VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
      ClearBatchSuggestNucleotide ();
      cfp->nucbsp = NULL;
      cfp->genCode = 0;
    }
  }
  if (StringChr (cfp->feat, 'c') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoVisitCodingRegions);
  }

  if (StringChr (cfp->desc, 'b') != NULL) {
  }
  if (StringChr (cfp->desc, 't') != NULL) {
  }

  if (StringChr (cfp->verify, 'v') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    vsp = ValidStructNew ();
    if (vsp != NULL) {
      vsp->useSeqMgrIndexes = TRUE;
      vsp->suppressContext = TRUE;
      vsp->seqSubmitParent = TRUE;
      vsp->testLatLonSubregion = TRUE;
      oldErrSev = ErrSetMessageLevel (SEV_NONE);
      vsp->errfunc = ValidCallback;
      vsp->userdata = (Pointer) cfp->ofp;
      /* vsp->convertGiToAccn = FALSE; */
      ValidateSeqEntry (sep, vsp);
      ValidStructFree (vsp);
      ErrSetMessageLevel (oldErrSev);
    }
  }
  if (StringChr (cfp->verify, 'b') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE,
                    0, 0, 0, NULL, cfp->ofp);
  }

  if (cfp->ofp != NULL) {
    fflush (cfp->ofp);
  }
}
Exemplo n.º 25
0
static void ProcessMultipleRecord (
  CharPtr filename,
  CSpeedFlagPtr cfp
)

{
  AsnIoPtr     aip;
  AsnTypePtr   atp;
  BioseqPtr    bsp;
  Char         buf [41];
  Uint2        entityID;
  FILE         *fp;
  SeqEntryPtr  fsep;
  Char         longest [41];
  Int4         numrecords, x;
  SeqEntryPtr  sep;
  time_t       starttime, stoptime, worsttime;
#ifdef OS_UNIX
  Char         cmmd [256];
  CharPtr      gzcatprog;
  int          ret;
  Boolean      usedPopen = FALSE;
#endif

  if (cfp == NULL) return;

  if (StringHasNoText (filename)) return;

#ifndef OS_UNIX
  if (cfp->compressed) {
    Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
    return;
  }
#endif

#ifdef OS_UNIX
  if (cfp->compressed) {
    gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
    if (gzcatprog != NULL) {
      sprintf (cmmd, "%s %s", gzcatprog, filename);
    } else {
      ret = system ("gzcat -h >/dev/null 2>&1");
      if (ret == 0) {
        sprintf (cmmd, "gzcat %s", filename);
      } else if (ret == -1) {
        Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
        return;
      } else {
        ret = system ("zcat -h >/dev/null 2>&1");
        if (ret == 0) {
          sprintf (cmmd, "zcat %s", filename);
        } else if (ret == -1) {
          Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
          return;
        } else {
          Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
          return;
        }
      }
    }
    fp = popen (cmmd, /* cfp->binary? "rb" : */ "r");
    usedPopen = TRUE;
  } else {
    fp = FileOpen (filename, cfp->binary? "rb" : "r");
  }
#else
  fp = FileOpen (filename, cfp->binary? "rb" : "r");
#endif
  if (fp == NULL) {
    Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename);
    return;
  }

  aip = AsnIoNew (cfp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
  if (aip == NULL) {
    Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", filename);
    return;
  }

  if (cfp->logfp != NULL) {
    fprintf (cfp->logfp, "%s\n\n", filename);
    fflush (cfp->logfp);
  }

  longest [0] = '\0';
  worsttime = 0;
  numrecords = 0;

  atp = cfp->atp_bss;

  while ((atp = AsnReadId (aip, cfp->amp, atp)) != NULL) {
    if (atp == cfp->atp_se) {

      sep = SeqEntryAsnRead (aip, atp);
      if (sep != NULL) {

        entityID = ObjMgrGetEntityIDForChoice (sep);

        fsep = FindNthBioseq (sep, 1);
        if (fsep != NULL && fsep->choice == 1) {
          bsp = (BioseqPtr) fsep->data.ptrvalue;
          if (bsp != NULL) {
            SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
            if (cfp->logfp != NULL) {
              fprintf (cfp->logfp, "%s\n", buf);
              fflush (cfp->logfp);
            }
          }
        }

        starttime = GetSecs ();

        for (x = 0; x < cfp->maxcount; x++) {
          DoProcess (sep, entityID, cfp);
        }
        stoptime = GetSecs ();

        if (stoptime - starttime > worsttime) {
          worsttime = stoptime - starttime;
          StringCpy (longest, buf);
        }
        numrecords++;

        ObjMgrFreeByEntityID (entityID);
      }

    } else {

      AsnReadVal (aip, atp, NULL);
    }
  }

  AsnIoFree (aip, FALSE);

#ifdef OS_UNIX
  if (usedPopen) {
    pclose (fp);
  } else {
    FileClose (fp);
  }
#else
  FileClose (fp);
#endif
  if (cfp->logfp != NULL && (! StringHasNoText (longest))) {
    fprintf (cfp->logfp, "Longest processing time %ld seconds on %s\n",
             (long) worsttime, longest);
    fprintf (cfp->logfp, "Total number of records %ld\n", (long) numrecords);
    fflush (cfp->logfp);
  }
}
Exemplo n.º 26
0
static void ProcessAccession (
  CharPtr accn,
  XtraPtr extra,
  Boolean only_new,
  Boolean get_var,
  Boolean do_nuc,
  Boolean do_prot
)

{
  Char         ch;
  Int4         flags = 0;
  Int4         gi = 0;
  Char         id [41];
  Boolean      is_numeric = TRUE;
  Int4         newgi = 0;
  CharPtr      ptr;
  SeqEntryPtr  sep;
  SeqIdPtr     sip;
  Char         tmp [41];
  long         val;

  ptr = accn;
  ch = *ptr;
  while (ch != '\0' && is_numeric) {
    if (! IS_DIGIT (ch)) {
      is_numeric = FALSE;
    }
    ptr++;
    ch = *ptr;
  }

  if (is_numeric) {
    if (sscanf (accn, "%ld", &val) == 1) {
      gi = (Int4) val;
      if (gi < 1) return;
      if (only_new) {
        sip = GetSeqIdForGI (gi);
        if (sip != NULL) {
          SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp));
          SeqIdFree (sip);
          ptr = StringChr (tmp, '.');
          if (ptr != NULL) {
            *ptr = '\0';
            sip = SeqIdFromAccessionDotVersion (tmp);
            newgi = GetGIForSeqId (sip);
            SeqIdFree (sip);
            if (newgi == gi) return;
          }
        }
      }
    }
  } else {
    sip = SeqIdFromAccessionDotVersion (accn);
    gi = GetGIForSeqId (sip);
    SeqIdFree (sip);
    if (only_new) {
      sip = GetSeqIdForGI (gi);
      if (sip != NULL) {
        SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id));
        SeqIdFree (sip);
        if (StringICmp (accn, id) == 0) return;
      }
    }
  }
  if (gi < 1) return;

  if (get_var) {
    flags = 1;
  }
  sep = PubSeqSynchronousQuery (gi, 0, flags);
  if (sep == NULL) return;

  if (do_nuc) {
    DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
  }
  if (do_prot) {
    DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
  }

  SeqEntryFree (sep);
}
Exemplo n.º 27
0
/*********************************************************************
*
*	make_cds_paragraph(sfp, aa_start, aa_stop)
*	return a buffer for the display of 3-codon under one amino 
*	acid format. It also includes the new line characters 
*	This is what Jonathan K. desires to have for the sequin 
*	doc object
*	aa_start, aa_stop: start and stop in the amino acid sequence
*
*********************************************************************/
NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
	BioseqPtr pbsp;
	SeqPortPtr spp;
	ValNodePtr cvp_node, curr;
	CodonVectorPtr cvp;
	CharPtr docbuf = NULL;
	Int4 num, buf_size;
	Uint1 residue;
	Char p_name[30];
	Int4 space_len, i;
	CharPtr buf;
	Int4 pos;
	Int4 max_len = 150;
	Boolean extra_space;

	if(sfp == NULL || sfp->data.choice !=3)
		return NULL;
	if(sfp->product == NULL)
		return NULL;
	pbsp = BioseqLockById(SeqLocId(sfp->product));
	if(pbsp == NULL)
		return NULL;

	cvp_node = aa_to_codon(sfp, aa_start, aa_stop);
	num = 1;
	for(curr = cvp_node; curr !=NULL; curr = curr->next)
		num +=3;
	buf_size = num * max_len;
	/*  #ifdef WIN_16
		if(buf_size > 10000)
		{
			Message(MSG_ERROR, "Can not allocate enough space ");
			return NULL;
		}
	#endif
	*/

	docbuf = MemNew((size_t)(buf_size) * sizeof(Char));

	MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE);
	/*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/
	pos = 0;
	pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, 
		FALSE, B_SPACE, POS_SPACE);

	/*print the amino acid sequence into buffer*/
	spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa);
	while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF )
		docbuf[pos++] = residue;
	docbuf[pos++] = '\n';
	SeqPortFree(spp);

	for(curr = cvp_node; curr !=NULL; curr = curr->next)
	{
		cvp = curr->data.ptrvalue;
		SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10);
		extra_space = (cvp->aa_index == 0);
		for(i=0; i<3; ++i)
		{
			space_len = cvp->aa_index;
			buf = cvp->buf[i] + cvp->aa_index;
			if(i == cvp->frame)
			{
				pos+= print_label_to_buffer(docbuf+pos, p_name, 
					cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE);
			}
			else
				pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 
				0, extra_space, FALSE, B_SPACE, POS_SPACE);
			sprintf(docbuf+pos, "%s\n", buf);
			pos += (StringLen(buf) +1);

		}
	}

	docbuf[pos++] = '\n';
	docbuf[pos] = '\0';
	
	free_cvp_list(cvp_node);
	BioseqUnlock(pbsp);

	return docbuf;
}
Exemplo n.º 28
0
/** Creates the header part of an XML report for a BLAST search.
 * @param program Program name [in]
 * @param database Database name [in]
 * @param query_loc Query Seq-loc [in]
 * @param flags Flag to indicate whether query sequence should be included in
 *              the output. [in]
 * @param search_param Search parameters [in]
 */
static BlastOutput* 
s_CreateBlastOutputHead(const char* program, const char* database, 
                        SeqLoc* query_loc, Int4 flags, 
                        const Blast_SearchParams* search_param)
{
    BlastOutput* boutp;
    Char buffer[1024];
    char* program_to_use = NULL;
    
    if((boutp = BlastOutputNew()) == NULL)
        return FALSE;
    
    if (strcmp(program, "rpsblast") == 0)
        program_to_use = strdup("blastp");
    else if (strcmp(program, "rpstblastn") == 0)
        program_to_use = strdup("blastx");
    else
        program_to_use = strdup(program);

    /* For optimization BLOSUM62 may be loaded ones */
    if (query_loc) {
       SeqId* sip = SeqLocId(query_loc);
       Bioseq* bsp;
       SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, sizeof(buffer));
       boutp->query_ID = strdup(buffer);

       bsp = BioseqLockById(sip);

       if(bsp != NULL) {
          if (BioseqGetTitle(bsp) != NULL)
             boutp->query_def = strdup(BioseqGetTitle(bsp));
          else
             boutp->query_def = strdup("No definition line found");
       }
       BioseqUnlock(bsp);

       boutp->query_len = SeqLocLen(query_loc);

       if(flags & BXML_INCLUDE_QUERY) {
           boutp->query_seq = (char *) calloc(boutp->query_len+1, 1);
           SeqPortStreamLoc(query_loc, 
                            STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
                            boutp->query_seq, NULL);
       } else {
          boutp->query_seq = NULL;    /* Do we need sequence here??? */
       }
    }
    /* Program name. Use the local version of the program. No need to copy it
       since it was locally allocated. */
    boutp->program = program_to_use;

    /* Database name */
    if (database)
        boutp->db = strdup(database);

    /* Version text */
    sprintf(buffer, "%s %s [%s]", program_to_use, BlastGetVersionNumber(), 
            BlastGetReleaseDate());
    boutp->version = strdup(buffer);

    /* Reference */
    boutp->reference = BlastGetReference(FALSE);

    /* Filling parameters */
    boutp->param = ParametersNew();    
    boutp->param->expect = search_param->expect;
    boutp->param->gap_open = search_param->gap_open;
    boutp->param->gap_extend = search_param->gap_extension;
    if (search_param->matrix)
        boutp->param->matrix = strdup(search_param->matrix);
    boutp->param->sc_match = search_param->match;
    boutp->param->sc_mismatch = search_param->mismatch;
    boutp->param->include = search_param->ethresh;
    if (search_param->filter_string)
        boutp->param->filter = strdup(search_param->filter_string);
    
    return boutp;
}
Exemplo n.º 29
0
static int LIBCALLBACK
MegaBlastPrintEndpoints(VoidPtr ptr)
{
   BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr;
   CharPtr subject_descr;
   SeqIdPtr sip, query_id;
   CharPtr query_buffer, title;
   CharPtr subject_buffer;
   Int4 query_length, q_start, q_end, q_shift=0, s_shift=0;
   Int4 subject_end;
   Int4 hsp_index;
   Boolean numeric_sip_type = FALSE;
   BLAST_HSPPtr hsp; 
   Int2 context;
   Char context_sign;
   Int4 subject_gi, score;
   FILE *fp = (FILE *) search->output;

   if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
      search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
      return 0;
   }

   if (search->rdfp)
      readdb_get_descriptor(search->rdfp, search->subject_id, &sip,
                            &subject_descr);
   else 
      sip = SeqIdSetDup(search->subject_info->sip);
   
   if (sip->choice != SEQID_GENERAL ||
       StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
      if (search->pbp->mb_params->full_seqids) {
         subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else
         numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                                  &subject_gi, &subject_buffer);
   } else {
      DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue;
      if (db_tag->db && 
          (!StringCmp(db_tag->db, "THC") || 
           !StringICmp(db_tag->db, "TI")) && 
          db_tag->tag->id != 0) {
         subject_buffer = (CharPtr) Malloc(16);
         sprintf(subject_buffer, "%ld", (long) db_tag->tag->id);
      } else {
         subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
         subject_descr = subject_buffer;
      }
   }

   search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt;

   /* Only for the two sequences case, get offset shift if subject 
      is a subsequence */
   if (!search->rdfp && search->query_slp->next) {
       s_shift = SeqLocStart(search->query_slp->next);
       subject_end = SeqLocStop(search->query_slp->next);
   } else {
      s_shift = 0;
      subject_end = 
         readdb_get_sequence_length(search->rdfp, search->subject_id);
   }
   /* Get offset shift if query is a subsequence */
   q_shift = SeqLocStart(search->query_slp);

   for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
      hsp = search->current_hitlist->hsp_array[hsp_index];
      if (hsp==NULL || (search->pbp->cutoff_e > 0 && 
      hsp->evalue > search->pbp->cutoff_e)) 
     continue;
      
      /* Correct query context is already found in BlastGetNonSumStatsEvalue */
      context = hsp->context; 
      query_id = search->qid_array[context/2];


      if (query_id == NULL) /* Bad hsp, something wrong */
     continue; 
      hsp->context = context & 1;      
      query_length = search->query_context_offsets[context+1] -
         search->query_context_offsets[context] - 1;
      hsp->subject.end = hsp->subject.offset + hsp->subject.length;

      if (hsp->context) {
     hsp->query.end = query_length - hsp->query.offset;
     hsp->query.offset = 
        hsp->query.end - hsp->query.length + 1;
     context_sign = '-'; 
      } else {
     hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1;
         if (hsp->query.end > query_length) {
            hsp->subject.end -= (hsp->query.end - query_length);
            hsp->query.end = query_length;
         }
     context_sign = '+';  
      }
      
      if (hsp->subject.end > subject_end) {
         hsp->query.end -= (hsp->subject.end - subject_end);
         hsp->subject.end = subject_end;
      }
      hsp->subject.offset++;
      
      query_buffer = NULL;
      if (query_id->choice == SEQID_LOCAL && 
          search->pbp->mb_params->full_seqids) {
         BioseqPtr query_bsp = BioseqLockById(query_id);
         title = StringSave(BioseqGetTitle(query_bsp));
         if (title)
            query_buffer = StringTokMT(title, " ", &title);
         else {
            Int4 query_gi;
            GetAccessionFromSeqId(query_bsp->id, &query_gi,
                                  &query_buffer);
         }  
         BioseqUnlock(query_bsp);
      } else {
         query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         if (!search->pbp->mb_params->full_seqids)
            SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION,
                       BUFFER_LENGTH);
         else 
            SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG,
                    BUFFER_LENGTH);
      }

      if (search->pbp->gap_open==0 && search->pbp->gap_extend==0)
     score = ((hsp->subject.length + hsp->query.length)*
           search->sbp->reward / 2 - hsp->score) / 
        (search->sbp->reward - search->sbp->penalty);
      else 
     score = hsp->score;

      if (context_sign == '+') {
     q_start = hsp->query.offset;
     q_end = hsp->query.end;
      } else {
     q_start = hsp->query.end;
     q_end = hsp->query.offset;
      }

      /* Adjust offsets if query is a subsequence, only for first query */
      if (context < 2) {
          q_start += q_shift;
          q_end += q_shift;
      }

      hsp->subject.offset += s_shift;
      hsp->subject.end += s_shift;

      if (numeric_sip_type)
     fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, 
         context_sign, query_buffer, hsp->subject.offset, q_start, 
         hsp->subject.end, q_end, score);
      else 
     fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", 
         subject_buffer, context_sign, query_buffer, 
         hsp->subject.offset, q_start, 
         hsp->subject.end, q_end, score);
      MemFree(query_buffer);
   }
   if (!numeric_sip_type && subject_buffer != subject_descr)
      MemFree(subject_buffer);
   MemFree(subject_descr);
   sip = SeqIdSetFree(sip);
   return 0;
}
Exemplo n.º 30
0
static int LIBCALLBACK
MegaBlastPrintSegments(VoidPtr ptr)
{
   BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr;
   ReadDBFILEPtr rdfp = search->rdfp;
   BLAST_HSPPtr hsp; 
   Int4 i, subject_gi;
   Int2 context;
   CharPtr query_buffer, title;
   SeqIdPtr sip, query_id; 
   Int4 hsp_index, score;
   Uint1Ptr query_seq, subject_seq = NULL;
   FloatHi perc_ident;
   Char strand;
   GapXEditScriptPtr esp;
   Int4 q_start, q_end, s_start, s_end, query_length, numseg;
   Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0;
   Int4Ptr length, start;
   Uint1Ptr strands;
   CharPtr subject_descr, subject_buffer, buffer;
   Char tmp_buffer[BUFFER_LENGTH];
   Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH;
   Boolean numeric_sip_type = FALSE;
   FILE *fp = (FILE *) search->output;

   if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
      search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
      return 0;
   }

   subject_seq = search->subject->sequence_start + 1;


   if (rdfp)
      readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr);
   else 
      sip = SeqIdSetDup(search->subject_info->sip);

   if (sip->choice != SEQID_GENERAL ||
       StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
      if (search->pbp->mb_params->full_seqids) { 
         subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else
         numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                                  &subject_gi, &subject_buffer);
   } else {
      subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
      subject_descr = subject_buffer;
   }

   buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH);

   /* Only for the two sequences case, get offset shift if subject 
      is a subsequence */
   if (!rdfp && search->query_slp->next)
       s_shift = SeqLocStart(search->query_slp->next);
   /* Get offset shift if query is a subsequence */
   q_shift = SeqLocStart(search->query_slp);
   
   for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
      hsp = search->current_hitlist->hsp_array[hsp_index];
      if (hsp==NULL || (search->pbp->cutoff_e > 0 && 
                        hsp->evalue > search->pbp->cutoff_e)) {
     continue;
      }
      context = hsp->context;
      query_id = search->qid_array[context/2];

     
      if (query_id == NULL) /* Bad hsp, something wrong */
     continue; 
      hsp->context = context & 1;

      if (search->pbp->gap_open==0 && search->pbp->gap_extend==0)
     score = ((hsp->subject.length + hsp->query.length)*
           search->sbp->reward / 2 - hsp->score) / 
        (search->sbp->reward - search->sbp->penalty);
      else 
     score = hsp->score;

      query_length = search->query_context_offsets[context+1] -
         search->query_context_offsets[context] - 1;

      q_off = hsp->query.offset;
      if (hsp->context) {
     strand = '-'; 
     hsp->query.end = query_length - hsp->query.offset;
     hsp->query.offset = 
        hsp->query.end - hsp->query.length;
      } else {
     strand = '+';  
     hsp->query.end = hsp->query.offset + hsp->query.length;
      }

      if (strand == '+') {
     q_start = hsp->query.offset + 1;
     q_end = hsp->query.end;
      } else {
     q_start = hsp->query.end;
     q_end = hsp->query.offset + 1;
      }
      s_start = hsp->subject.offset + 1;
      s_end = hsp->subject.offset + hsp->subject.length;

      /* Adjust offsets if query is a subsequence, only for first query */
      if (context < 2) {
          q_start += q_shift;
          q_end += q_shift;
      }

      s_start += s_shift;
      s_end += s_shift;

      if (query_id->choice == SEQID_LOCAL && 
          search->pbp->mb_params->full_seqids) {
         BioseqPtr query_bsp = BioseqLockById(query_id);
         title = StringSave(BioseqGetTitle(query_bsp));
         if (title)
            query_buffer = StringTokMT(title, " ", &title);
         else {
            Int4 query_gi;
            GetAccessionFromSeqId(query_bsp->id, &query_gi,
                                  &query_buffer);
         }  
         BioseqUnlock(query_bsp);
      } else {
         query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         if (!search->pbp->mb_params->full_seqids)
            SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION,
                       BUFFER_LENGTH);
         else 
            SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG,
                    BUFFER_LENGTH);
      }

      if (numeric_sip_type)
     sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n  s %d\n  b %d %d\n  e %d %d\n", 
          (long) subject_gi, strand, query_buffer, 
          s_start, q_start, s_end, q_end, score, score, 
          s_start, q_start, s_end, q_end);
      else 
     sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n  s %d\n  b %d %d\n  e %d %d\n", 
          subject_buffer, strand, query_buffer, 
          s_start, q_start, s_end, q_end, score, score, 
          s_start, q_start, s_end, q_end);
      buffer_size = StringLen(buffer);

      query_seq = search->context[context].query->sequence;

      esp = hsp->gap_info->esp;
        
      for (numseg=0; esp; esp = esp->next, numseg++);

      GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg,
                &start, &length, &strands, 
                &q_off, &hsp->subject.offset);

      if (start[0] < 0) {
         length[0] += start[0];
         start[1] -= start[0];
         start[0] = 0;
      } 
      if (start[2*(numseg-1)] + length[numseg-1] > query_length) 
         length[numseg-1] = query_length - start[2*(numseg-1)];
      
      total_ident = 0;
      align_length = 0;
      for (i=0; i<numseg; i++) {
         align_length += length[i];
     if (strand == '+') {
        q_start = start[2*i] + 1;
        q_end = q_start + length[i] - 1;
     } else {
        q_start = query_length - start[2*i];
        q_end = q_start - length[i] + 1;
     }
     if (start[2*i] != -1 && start[2*i+1] != -1) {
        num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, 
                                                 start[2*i], start[2*i+1], 
                                                 length[i], FALSE);
            perc_ident = (FloatHi) num_ident / length[i] * 100;
            total_ident += num_ident;
        sprintf(tmp_buffer, "  l %d %d %d %d (%.0f)\n", start[2*i+1]+1, 
            q_start, start[2*i+1]+length[i],
            q_end, perc_ident);     
        if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) {
           max_buffer_size *= 2;
           buffer = (CharPtr) Realloc(buffer, max_buffer_size);
        }
        StringCat(buffer, tmp_buffer);
     }
      }
      if (100*total_ident >= 
          align_length*search->pbp->mb_params->perc_identity) {
        StringCat(buffer, "}");
        fprintf(fp, "%s\n", buffer);
      }
      MemFree(start);
      MemFree(length);
      MemFree(strands);
      MemFree(query_buffer);
   } /* End loop on hsp's */
   if (!numeric_sip_type && subject_buffer != subject_descr)
      MemFree(subject_buffer);
   MemFree(subject_descr);
   MemFree(buffer);
   sip = SeqIdSetFree(sip);
   fflush(fp);
   return 1;
}