void Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, Boolean accession_only, Boolean search_for_id) { char* seqid_buffer = NULL; Int4 gi = 0; Boolean numeric_id_type = FALSE; *buffer_ptr = NULL; if (sip == NULL) return; /* Check for ad hoc ID's generated by formatdb if the user does not provide any. */ if (search_for_id && (sip->choice != SEQID_GENERAL || StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID"))) { if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else if (accession_only) { seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1); SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH); } else if (ncbi_gi) { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &gi, &seqid_buffer); } else { numeric_id_type = GetAccessionFromSeqId(SeqIdFindBestAccession(sip), &gi, &seqid_buffer); } } if (numeric_id_type && gi > 0) { seqid_buffer = (char*) malloc(16); sprintf(seqid_buffer, "%ld", (long) gi); } if (!seqid_buffer) { /* If it's still NULL make a last ditch effort to get info. */ char* title=NULL; Bioseq* bsp = BioseqLockById(sip); if (bsp) { if (BioseqGetTitle(bsp) != NULL) title = strdup(BioseqGetTitle(bsp)); else title = strdup("No definition line found"); } BioseqUnlock(bsp); if (title) /* Use first token as id. */ seqid_buffer = StringTokMT(title, " \t\n\r", &title); } *buffer_ptr = seqid_buffer; }
static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Char id_txt[255], id_txt2[255]; SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; ValNodePtr line_list = NULL, line_values; if (bsp == NULL) { return NULL; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENBANK || (sip->choice == SEQID_EMBL && sip_gb == NULL) || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) || (sip->choice == SEQID_DDBJ && sip_gb == NULL) || (sip->choice == SEQID_PIR && sip_gb == NULL)) { sip_gb = sip; } else if (sip->choice == SEQID_GI) { sip_gi = sip; } } if (sip_gb == NULL && sip_gi == NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); id_txt2[0] = 0; } else { if (sip_gb == NULL) { id_txt[0] = 0; } else { SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); } if (sip_gi == NULL) { id_txt2[0] = 0; } else { SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { line_values = NULL; ValNodeAddPointer (&line_values, 0, StringSave (id_txt)); if (want_gi) { ValNodeAddPointer (&line_values, 0, StringSave (id_txt2)); } ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list)); ValNodeAddPointer (&line_list, 0, line_values); } return line_list; }
static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Char id_txt[255], id_txt2[255]; SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; if (fp == NULL || bsp == NULL) { return; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENBANK || (sip->choice == SEQID_EMBL && sip_gb == NULL) || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) || (sip->choice == SEQID_DDBJ && sip_gb == NULL) || (sip->choice == SEQID_PIR && sip_gb == NULL)) { sip_gb = sip; } else if (sip->choice == SEQID_GI) { sip_gi = sip; } } if (sip_gb == NULL && sip_gi == NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); id_txt2[0] = 0; } else { if (sip_gb == NULL) { id_txt[0] = 0; } else { SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); } if (sip_gi == NULL) { id_txt2[0] = 0; } else { SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { if (want_gi) { fprintf (fp, "%s\t%s", id_txt, id_txt2); } else { fprintf (fp, "%s", id_txt); } PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list); fprintf (fp, "\n"); } }
static Boolean UpdateContigReadId (TContigReadPtr read, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors) { SeqIdPairPtr pair; SeqIdPtr sip_find; Char id_buf[255]; Boolean rval = TRUE; if (read == NULL || StringHasNoText (read->read_id)) { rval = FALSE; } else { sip_find = MakeSeqID (read->read_id); pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list); if (pair != NULL && (no_lookup || OkToReplaceId (pair, read->read_seq, has_errors))) { if (pair->is_complement) { if (read->is_complement) { read->is_complement = FALSE; } else { read->is_complement = TRUE; } } if (pair->ti > 0) { read->ti = pair->ti; } else { if (pair->sip_replace->choice == SEQID_LOCAL) { SeqIdWrite (pair->sip_replace, id_buf, PRINTID_REPORT, sizeof (id_buf) - 1); } else { SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1); } if (is_srr) { if (read->srr != NULL) { free (read->srr); } read->srr = malloc (sizeof (Char) * (StringLen (id_buf) + 1)); sprintf (read->srr, "%s", id_buf); free (read->read_id); read->read_id = NULL; } else { free (read->read_id); read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1)); sprintf (read->read_id, "%s", id_buf); } } read->local = FALSE; } sip_find = SeqIdFree (sip_find); } return rval; }
static SeqIdPairPtr FindReplacementInSeqIdReplaceList (SeqIdPtr sip, SeqIdReplaceListPtr pair_list) { Int4 l, r, m; Char buf_find[100]; int cmp; if (sip == NULL || pair_list == NULL) return NULL; SeqIdWrite (sip, buf_find, PRINTID_REPORT, sizeof (buf_find) - 1); l = 0; r = pair_list->num_ids - 1; m = (r + l) / 2; while ((cmp = StringICmp (buf_find, pair_list->list[m].buf_find)) != 0 && l <= r) { if (cmp < 0) { r = m - 1; } else { l = m + 1; } m = (r + l) / 2; } if (cmp == 0) { return pair_list->list + m; } else { return NULL; } }
/** Fills the Iteration ASN.1 structure, for part of the BLAST XML report * corresponding to one query. * @param seqalign Seq-align list with results [in] * @param sum_returns Search summary data [in] * @param is_ooframe Was out-of-frame gapping used in this search? [in] * @param ungapped Was this an ungapped search? [in] * @param iter_num Index of this "iteration" (query). [in] * @param message Error or warning message [in] * @param query Query Bioseq [in] * @param mask_loc List of masking locations [in] * @return Populated structure. */ static Iteration* s_XMLBuildOneQueryIteration(SeqAlign* seqalign, Blast_SummaryReturn* sum_returns, Boolean is_ooframe, Boolean ungapped, Int4 iter_num, char* message, Bioseq* query, ValNode* mask_loc) { Iteration* iterp = IterationNew(); iterp->iter_num = iter_num; if (query) { char buffer[1024]; SeqIdWrite(query->id, buffer, PRINTID_FASTA_LONG, sizeof(buffer)); iterp->query_ID = strdup(buffer); if(BioseqGetTitle(query) != NULL) iterp->query_def = strdup(BioseqGetTitle(query)); else iterp->query_def = strdup("No definition line found"); iterp->query_len = query->length; } if(seqalign != NULL) { iterp->hits = BXMLSeqAlignToHits(seqalign, ungapped, is_ooframe, mask_loc); } iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped); if (message) iterp->message = strdup(message); return iterp; }
static void ReportInvalidReplacement (SeqIdPtr sip, CharPtr reason, char *has_errors) { Char buf[128]; SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1); PrintACEFormatErrorXMLStart (buf, has_errors); printf ("%s", reason); PrintACEFormatErrorXMLEnd (); }
static void DoSuggestIntervals ( BioseqPtr bsp, Pointer userdata ) { CharPtr caret5, caret3; CSpeedFlagPtr cfp; Char id [64]; SeqLocPtr loc, slp; Boolean partial5, partial3; SeqAnnotPtr sap; SeqFeatPtr sfp; SeqIdPtr sip; Int4 start, stop; if (bsp == NULL) return; if (! ISA_aa (bsp->mol)) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return; sip = SeqIdFindBest (bsp->id, 0); if (sip == NULL) return; SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1); sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode); if (sap == NULL) return; if (sap->type == 1) { sfp = (SeqFeatPtr) sap->data; if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { loc = sfp->location; if (loc != NULL) { fprintf (cfp->ofp, "%s\n", id); slp = SeqLocFindNext (loc, NULL); while (slp != NULL) { start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1; stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1; caret5 = ""; caret3 = ""; CheckSeqLocForPartial (slp, &partial5, &partial3); if (partial5) { caret5 = "<"; } if (partial3) { caret3 = ">"; } fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop); slp = SeqLocFindNext (loc, slp); } } } } SeqAnnotFree (sap); }
static void VecScreen_AddToQueue ( VQUEUE * queue, VecScreenResultProc resultproc, VecScreenAnnounceProc announceproc, Nlm_VoidPtr userdata, CONN conn, BioseqPtr bsp ) { VQueuePtr cqp; VQueuePtr PNTR qptr; VQueuePtr tmp; if (queue == NULL || resultproc == NULL || conn == NULL || bsp == NULL) return; /* allocate queue element */ cqp = (VQueuePtr) MemNew (sizeof (VecScreenQueue)); if (cqp == NULL) return; cqp->rid [0] = '\0'; SeqIdWrite (bsp->id, cqp->seqid, PRINTID_FASTA_LONG, 40); cqp->estTime = 0; cqp->initialTime = GetSecs (); cqp->postedTime = cqp->initialTime; cqp->secondsToWait = 15; cqp->resultproc = resultproc; cqp->announceproc = announceproc; cqp->userdata = userdata; cqp->connqueue = NULL; cqp->done = FALSE; /* add to polling queue */ qptr = (VQueuePtr PNTR) queue; if (qptr != NULL) { if (*qptr != NULL) { tmp = *qptr; if (tmp != NULL) { while (tmp->next != NULL) { tmp = tmp->next; } tmp->next = cqp; } } else { *qptr = cqp; } } /* queue the request for a rID */ QUERY_AddToQueue (&(cqp->connqueue), conn, FirstVecScreenCallback, (Pointer) cqp, TRUE); }
extern CharPtr ErrorDescString (SeqIdPtr sip) { SeqIdPtr bestid; CharPtr errbuf; bestid = SeqIdFindBest(sip, SEQID_GI); errbuf = (CharPtr) MemNew ((size_t) (sizeof (Char) * 32)); SeqIdWrite (bestid, errbuf, PRINTID_FASTA_LONG, 32-1); return errbuf; }
NLM_EXTERN SeqIdReplaceListPtr ReadSeqIdPairListFromFile (FILE *fp) { ReadBufferData rbd; CharPtr linestring, cp, id2, buf = NULL; Int4 len, buf_len = 0; SeqIdPairPtr pair; ValNodePtr pair_list = NULL, last = NULL, vnp; SeqIdReplaceListPtr replace_list = NULL; if (fp == NULL) return NULL; rbd.fp = fp; rbd.current_data = NULL; linestring = AbstractReadFunction (&rbd); while (linestring != NULL && linestring[0] != EOF) { cp = linestring + StringSpn (linestring, " \t"); if (*cp != 0) { len = StringCSpn (cp, " \t"); id2 = cp + len + StringSpn (cp + len, " \t"); if (*id2 != 0) { if (len + 1 > buf_len) { buf = MemFree (buf); buf_len = len + 1; buf = (CharPtr) MemNew (sizeof (Char) * buf_len); } StringNCpy (buf, cp, len); buf[len] = 0; pair = SeqIdPairNew (); pair->sip_find = MakeSeqID (buf); SeqIdWrite (pair->sip_find, pair->buf_find, PRINTID_REPORT, sizeof (pair->buf_find) - 1); pair->sip_replace = MakeSeqID (id2); vnp = ValNodeNew (NULL); vnp->data.ptrvalue = pair; if (last == NULL) { pair_list = vnp; } else { last->next = vnp; } last = vnp; } } free (linestring); linestring = AbstractReadFunction (&rbd); } pair_list = ValNodeSort (pair_list, SortSeqIdPairList); replace_list = SeqIdReplaceListNew (pair_list); pair_list = SeqIdPairListFree (pair_list); return replace_list; }
static void DoVisitCodingRegions ( SeqFeatPtr sfp, Pointer userdata ) { BioseqPtr bsp; CharPtr caret5, caret3; CSpeedFlagPtr cfp; Char id [64]; SeqLocPtr loc, slp; Boolean partial5, partial3; SeqIdPtr sip; Int4 start, stop; if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL || cfp->ofp == NULL) return; loc = sfp->location; bsp = BioseqFindFromSeqLoc (loc); if (bsp == NULL) return; StringCpy (id, "?"); if (sfp->product != NULL) { sip = SeqLocId (sfp->product); if (sip != NULL) { SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1); } } fprintf (cfp->ofp, "%s\n", id); slp = SeqLocFindNext (loc, NULL); while (slp != NULL) { start = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1; stop = GetOffsetInBioseq (slp, bsp, SEQLOC_STOP) + 1; caret5 = ""; caret3 = ""; CheckSeqLocForPartial (slp, &partial5, &partial3); if (partial5) { caret5 = "<"; } if (partial3) { caret3 = ">"; } fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop); slp = SeqLocFindNext (loc, slp); } }
static void PrintGraphMessage (BioseqPtr bsp, ScanDataPtr sdp, CharPtr prefix, CharPtr suffix) { Char buf [41]; if (bsp == NULL || sdp == NULL || prefix == NULL) return; SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf)); fprintf (sdp->fp, "QA - %s - %s", buf, prefix); if (! StringHasNoText (suffix)) { fprintf (sdp->fp, " - %s", suffix); } fprintf (sdp->fp, "\n"); }
static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip) { Char id_txt[255]; if (fp == NULL || sip == NULL) { return; } SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); if (sip->choice == SEQID_GI) { fprintf (fp, "\t%s\n", id_txt); } else { fprintf (fp, "%s\t\n", id_txt); } }
/******************************************************************************* Function : DDV_DrawSequenceName() Purpose : draw the name of the sequence (left column of the DDV panel) Parameters : GrData; graphical data (font size, etc) ScaleStyle;style of the ParaG scale top, left; coord to start the draw Return value : none *******************************************************************************/ static void DDV_DrawSequenceName(UnDViewerGraphDataPtr GrData,ParaGPtr pgp, Int2 top,Int2 left,Int4 cur_row,Int4 CurEditRow,Int4 CurMasterRow) { SeqIdPtr sip = NULL; RecT rc; Int2 x,y,decal=1,size;/*text position/size*/ Char szAccess[21]; BioseqPtr bsp; /*get a name*/ bsp = BioseqLockById(pgp->sip); if(bsp) { sip = SeqIdFindBestAccession(bsp->id); BioseqUnlock(bsp); } if (!sip) sip = SeqIdFindBest(pgp->sip, 0); SeqIdWrite(sip, szAccess,PRINTID_TEXTID_ACCESSION, 20); /*compute position*/ if (pgp->ScaleStyle==SCALE_POS_TOP) decal++; /*draw name*/ size=StringWidth(szAccess); x=left/*-GrData->udv_scale.cxLeftScale*/-size; y=top+decal*GrData->udv_font.LineHeight; MoveTo(x,y); if (cur_row==CurEditRow){ Magenta(); } PaintString (szAccess); if (cur_row==CurMasterRow){ Blue(); MoveTo(x,y); LineTo(x+size,y); } /*draw a little box (for selection a full sequence)*/ left+=GrData->udv_font.cxChar; top+=GrData->udv_font.cxChar/2; LoadRect(&rc,left,top,left+GrData->udv_font.cxChar, top+GrData->udv_font.cxChar); Blue(); PaintOval(&rc); Black(); }
static void DoNewFastaDefline ( BioseqPtr bsp, Pointer userdata ) { BioseqSetPtr bssp; CSpeedFlagPtr cfp; Char id [128]; CharPtr title; if (bsp == NULL) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL) return; if (StringChr (cfp->skip, 's') != NULL) { if (bsp->idx.parenttype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) bsp->idx.parentptr; if (bssp != NULL) { if (bssp->_class == BioseqseqSet_class_segset || bssp->_class == BioseqseqSet_class_parts) return; } } } if (StringChr (cfp->skip, 'v') != NULL) { if (bsp->repr == Seq_repr_virtual) return; } id [0] = '\0'; SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); title = NewCreateDefLine (NULL, bsp, FALSE, FALSE); if (StringHasNoText (title)) { title = StringSave ("?"); } if (cfp->ofp != NULL) { fprintf (cfp->ofp, ">%s %s\n", id, title); } MemFree (title); }
static void WriteOneProteinWithProduct (BioseqPtr bsp, Pointer data) { FastaExportOptionsPtr fe; SeqFeatPtr prot; SeqMgrFeatContext fcontext; Char id [128]; if (bsp != NULL && ISA_aa (bsp->mol) && (fe = (FastaExportOptionsPtr) data) != NULL) { prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &fcontext); if (prot == NULL) { BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen, TRUE, FALSE, FALSE); } else { SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (fe->fp, ">%s [prot=%s]\n", id, fcontext.label); BioseqFastaStreamEx (bsp, fe->fp, fe->flags, fe->linelen, fe->blocklen, fe->grouplen, FALSE, FALSE, FALSE); } } }
static void PrintFeatureMessage (SeqFeatPtr sfp, ScanDataPtr sdp, CharPtr prefix, CharPtr suffix) { BioseqPtr bsp; Char buf [41]; if (sfp == NULL || sdp == NULL || prefix == NULL) return; bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf)); fprintf (sdp->fp, "%s - %s", prefix, buf); } else { fprintf (sdp->fp, "%s - %s", prefix, sdp->buf); } if (! StringHasNoText (suffix)) { fprintf (sdp->fp, " - %s", suffix); } fprintf (sdp->fp, "\n"); }
static void DoFastaComp ( BioseqPtr bsp, Pointer userdata, Boolean ignoreExisting ) { Char buf [4096]; CSpeedFlagPtr cfp; Char id [128]; CharPtr title; if (bsp == NULL) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL) return; id [0] = '\0'; SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); buf [0] = '\0'; CreateDefLineExEx (NULL, bsp, buf, sizeof (buf) - 1, 0, NULL, NULL, ignoreExisting, FALSE); title = NewCreateDefLine (NULL, bsp, ignoreExisting, FALSE); if (StringHasNoText (title)) { title = StringSave ("?"); } if (StringCmp (buf, title) != 0) { if (cfp->ofp != NULL) { fprintf (cfp->ofp, "< %s %s\n", id, buf); fprintf (cfp->ofp, "> %s %s\n", id, title); } printf ("< %s %s\n", id, buf); printf ("> %s %s\n", id, title); fflush (stdout); } MemFree (title); }
static Boolean ValidateContigReadId (TContigReadPtr read, char *has_errors) { SeqIdPairData pair; Char id_buf[255]; Boolean rval = TRUE; if (read == NULL || StringHasNoText (read->read_id)) { rval = FALSE; } else if (!read->local) { rval = TRUE; } else { pair.sip_find = NULL; pair.is_complement = FALSE; pair.is_consensus = FALSE; pair.trim3 = 0; pair.trim5 = 0; pair.sip_replace = MakeSeqID (read->read_id); pair.ti = 0; if (OkToReplaceId (&pair, read->read_seq, has_errors)) { if (pair.is_complement && !read->is_complement) { read->is_complement = TRUE; } else if (!pair.is_complement && read->is_complement) { read->is_complement = FALSE; } if (pair.ti > 0) { read->ti = pair.ti; } else { SeqIdWrite (pair.sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1); free (read->read_id); read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1)); sprintf (read->read_id, "%s", id_buf); } read->local = FALSE; } pair.sip_replace = SeqIdFree (pair.sip_replace); } return rval; }
static void DoFastaDefline ( BioseqPtr bsp, Pointer userdata ) { Char buf [4096]; CSpeedFlagPtr cfp; Char id [128]; if (bsp == NULL) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL) return; id [0] = '\0'; SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); buf [0] = '\0'; CreateDefLine (NULL, bsp, buf, sizeof (buf) - 1, 0, NULL, NULL); if (cfp->ofp != NULL) { fprintf (cfp->ofp, ">%s %s\n", id, buf); } }
NLM_EXTERN Boolean UpdateContigIds (TContigPtr contig, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors) { Int4 i; SeqIdPairPtr pair; SeqIdPtr sip_find; Char id_buf[255]; Boolean rval = TRUE; if (contig == NULL) return FALSE; if (pair_list == NULL) return TRUE; if (contig->consensus_id != NULL) { sip_find = MakeSeqID (contig->consensus_id); pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list); if (pair != NULL && (no_lookup || OkToReplaceId (pair, contig->consensus_seq, has_errors))) { if (pair->is_complement) { if (contig->is_complement) { contig->is_complement = FALSE; } else { contig->is_complement = TRUE; } } SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1); free (contig->consensus_id); contig->consensus_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1)); sprintf (contig->consensus_id, "%s", id_buf); } else { rval = FALSE; } sip_find = SeqIdFree (sip_find); } for (i = 0; i < contig->num_reads; i++) { rval &= UpdateContigReadId (contig->reads[i], pair_list, no_lookup, is_srr, has_errors); } return rval; }
static void DoRecord (SeqEntryPtr sep, Pointer userdata) { BioseqPtr bsp; SeqEntryPtr nsep; ScanDataPtr sdp; sdp = (ScanDataPtr) userdata; (sdp->recordCount)++; nsep = FindNthBioseq (sep, 1); if (nsep != NULL && IS_Bioseq (nsep)) { bsp = (BioseqPtr) nsep->data.ptrvalue; if (bsp != NULL) { SeqIdWrite (bsp->id, sdp->buf, PRINTID_FASTA_LONG, sizeof (sdp->buf)); } } #ifdef OS_UNIX /* printf ("%s\n", sdp->buf); */ #endif VisitPubdescsInSep (sep, (Pointer) sdp, DoThesis); /* check for 'genomic DNA' in DoTitle suppressed for bulk submissions */ sdp->bulk = FALSE; VisitDescriptorsInSep (sep, (Pointer) sdp, LookForBulk); VisitDescriptorsInSep (sep, (Pointer) sdp, DoTitle); VisitFeaturesInSep (sep, (Pointer) sdp, DoImpCDSandTrna); /* index for pseudo cds, impfeat peptides codon frame */ SeqMgrIndexFeatures (0, sep->data.ptrvalue); VisitFeaturesInSep (sep, (Pointer) sdp, DoPseudoCDS); VisitFeaturesInSep (sep, (Pointer) sdp, DoPeptide); /* now cleanup, index for overlapping peptides */ SeriousSeqEntryCleanup (sep, NULL, NULL); SeqMgrIndexFeatures (0, sep->data.ptrvalue); VisitBioseqsInSep (sep, (Pointer) sdp, DoProteins); /* VisitBioseqsInSep (sep, (Pointer) sdp, DoGraphs); */ #if 0 { Boolean hasUser = FALSE; VisitFeaturesInSep (sep, (Pointer) &hasUser, DoUser); if (hasUser && sdp->aop != NULL && sdp->atp_se != NULL) { SeqEntryAsnWrite (sep, sdp->aop, sdp->atp_se); } } #endif }
static void DoProcess ( SeqEntryPtr sep, Uint2 entityID, CSpeedFlagPtr cfp ) { Char id [64]; ErrSev oldErrSev; ValidStructPtr vsp; if (sep == NULL || cfp == NULL) return; if (StringChr (cfp->clean, 't') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); } if (StringChr (cfp->clean, 'a') != NULL) { AssignIDsInEntity (entityID, 0, NULL); } if (StringChr (cfp->clean, 'b') != NULL) { BasicSeqEntryCleanup (sep); } if (StringChr (cfp->clean, 's') != NULL) { SeriousSeqEntryCleanup (sep, NULL, NULL); } if (StringChr (cfp->index, 'f') != NULL) { SeqMgrIndexFeatures (entityID, 0); } if (StringChr (cfp->seq, 'c') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaExist); } if (StringChr (cfp->seq, 'C') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRegen); } if (StringChr (cfp->seq, 's') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq); } if (StringChr (cfp->seq, 'S') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq); } if (StringChr (cfp->seq, 'r') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRaw); } if (StringChr (cfp->seq, 'd') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'D') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'T') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); SeqMgrIndexFeatures (entityID, 0); VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'x') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline); } if (StringChr (cfp->seq, 'X') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); SeqMgrIndexFeatures (entityID, 0); VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline); } if (StringChr (cfp->seq, 'f') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaFeat); } if (StringChr (cfp->seq, 't') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaTrans); } if (StringChr (cfp->feat, 'v') != NULL) { VisitFeaturesInSep (sep, NULL, DoVisitFeaturesTest); } if (StringChr (cfp->feat, 'g') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapPrintTest); } if (StringChr (cfp->feat, 'h') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapSpeedTest); } if (StringChr (cfp->feat, 'x') != NULL) { } if (StringChr (cfp->feat, 'o') != NULL) { } if (StringChr (cfp->feat, 'd') != NULL) { } if (StringChr (cfp->feat, 't') != NULL) { SeqEntryToGnbk (sep, NULL, FTABLE_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, SHOW_PROT_FTABLE, NULL, cfp->ofp); } if (StringChr (cfp->feat, 's') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } cfp->nucbsp = FindNucBioseq (sep); if (cfp->nucbsp != NULL) { BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL); SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (cfp->ofp, "%s\n", id); VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals); cfp->nucbsp = NULL; cfp->genCode = 0; } } if (StringChr (cfp->feat, 'S') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } cfp->nucbsp = FindNucBioseq (sep); if (cfp->nucbsp != NULL) { BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL); SetBatchSuggestNucleotide (cfp->nucbsp, cfp->genCode); SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (cfp->ofp, "%s\n", id); VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals); ClearBatchSuggestNucleotide (); cfp->nucbsp = NULL; cfp->genCode = 0; } } if (StringChr (cfp->feat, 'c') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoVisitCodingRegions); } if (StringChr (cfp->desc, 'b') != NULL) { } if (StringChr (cfp->desc, 't') != NULL) { } if (StringChr (cfp->verify, 'v') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } vsp = ValidStructNew (); if (vsp != NULL) { vsp->useSeqMgrIndexes = TRUE; vsp->suppressContext = TRUE; vsp->seqSubmitParent = TRUE; vsp->testLatLonSubregion = TRUE; oldErrSev = ErrSetMessageLevel (SEV_NONE); vsp->errfunc = ValidCallback; vsp->userdata = (Pointer) cfp->ofp; /* vsp->convertGiToAccn = FALSE; */ ValidateSeqEntry (sep, vsp); ValidStructFree (vsp); ErrSetMessageLevel (oldErrSev); } } if (StringChr (cfp->verify, 'b') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL, cfp->ofp); } if (cfp->ofp != NULL) { fflush (cfp->ofp); } }
static void ProcessMultipleRecord ( CharPtr filename, CSpeedFlagPtr cfp ) { AsnIoPtr aip; AsnTypePtr atp; BioseqPtr bsp; Char buf [41]; Uint2 entityID; FILE *fp; SeqEntryPtr fsep; Char longest [41]; Int4 numrecords, x; SeqEntryPtr sep; time_t starttime, stoptime, worsttime; #ifdef OS_UNIX Char cmmd [256]; CharPtr gzcatprog; int ret; Boolean usedPopen = FALSE; #endif if (cfp == NULL) return; if (StringHasNoText (filename)) return; #ifndef OS_UNIX if (cfp->compressed) { Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines"); return; } #endif #ifdef OS_UNIX if (cfp->compressed) { gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY"); if (gzcatprog != NULL) { sprintf (cmmd, "%s %s", gzcatprog, filename); } else { ret = system ("gzcat -h >/dev/null 2>&1"); if (ret == 0) { sprintf (cmmd, "gzcat %s", filename); } else if (ret == -1) { Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease"); return; } else { ret = system ("zcat -h >/dev/null 2>&1"); if (ret == 0) { sprintf (cmmd, "zcat %s", filename); } else if (ret == -1) { Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease"); return; } else { Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable"); return; } } } fp = popen (cmmd, /* cfp->binary? "rb" : */ "r"); usedPopen = TRUE; } else { fp = FileOpen (filename, cfp->binary? "rb" : "r"); } #else fp = FileOpen (filename, cfp->binary? "rb" : "r"); #endif if (fp == NULL) { Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename); return; } aip = AsnIoNew (cfp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL); if (aip == NULL) { Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", filename); return; } if (cfp->logfp != NULL) { fprintf (cfp->logfp, "%s\n\n", filename); fflush (cfp->logfp); } longest [0] = '\0'; worsttime = 0; numrecords = 0; atp = cfp->atp_bss; while ((atp = AsnReadId (aip, cfp->amp, atp)) != NULL) { if (atp == cfp->atp_se) { sep = SeqEntryAsnRead (aip, atp); if (sep != NULL) { entityID = ObjMgrGetEntityIDForChoice (sep); fsep = FindNthBioseq (sep, 1); if (fsep != NULL && fsep->choice == 1) { bsp = (BioseqPtr) fsep->data.ptrvalue; if (bsp != NULL) { SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf)); if (cfp->logfp != NULL) { fprintf (cfp->logfp, "%s\n", buf); fflush (cfp->logfp); } } } starttime = GetSecs (); for (x = 0; x < cfp->maxcount; x++) { DoProcess (sep, entityID, cfp); } stoptime = GetSecs (); if (stoptime - starttime > worsttime) { worsttime = stoptime - starttime; StringCpy (longest, buf); } numrecords++; ObjMgrFreeByEntityID (entityID); } } else { AsnReadVal (aip, atp, NULL); } } AsnIoFree (aip, FALSE); #ifdef OS_UNIX if (usedPopen) { pclose (fp); } else { FileClose (fp); } #else FileClose (fp); #endif if (cfp->logfp != NULL && (! StringHasNoText (longest))) { fprintf (cfp->logfp, "Longest processing time %ld seconds on %s\n", (long) worsttime, longest); fprintf (cfp->logfp, "Total number of records %ld\n", (long) numrecords); fflush (cfp->logfp); } }
static void ProcessAccession ( CharPtr accn, XtraPtr extra, Boolean only_new, Boolean get_var, Boolean do_nuc, Boolean do_prot ) { Char ch; Int4 flags = 0; Int4 gi = 0; Char id [41]; Boolean is_numeric = TRUE; Int4 newgi = 0; CharPtr ptr; SeqEntryPtr sep; SeqIdPtr sip; Char tmp [41]; long val; ptr = accn; ch = *ptr; while (ch != '\0' && is_numeric) { if (! IS_DIGIT (ch)) { is_numeric = FALSE; } ptr++; ch = *ptr; } if (is_numeric) { if (sscanf (accn, "%ld", &val) == 1) { gi = (Int4) val; if (gi < 1) return; if (only_new) { sip = GetSeqIdForGI (gi); if (sip != NULL) { SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp)); SeqIdFree (sip); ptr = StringChr (tmp, '.'); if (ptr != NULL) { *ptr = '\0'; sip = SeqIdFromAccessionDotVersion (tmp); newgi = GetGIForSeqId (sip); SeqIdFree (sip); if (newgi == gi) return; } } } } } else { sip = SeqIdFromAccessionDotVersion (accn); gi = GetGIForSeqId (sip); SeqIdFree (sip); if (only_new) { sip = GetSeqIdForGI (gi); if (sip != NULL) { SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id)); SeqIdFree (sip); if (StringICmp (accn, id) == 0) return; } } } if (gi < 1) return; if (get_var) { flags = 1; } sep = PubSeqSynchronousQuery (gi, 0, flags); if (sep == NULL) return; if (do_nuc) { DoSeqEntryToGnbk (sep, GENBANK_FMT, extra); } if (do_prot) { DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra); } SeqEntryFree (sep); }
/********************************************************************* * * make_cds_paragraph(sfp, aa_start, aa_stop) * return a buffer for the display of 3-codon under one amino * acid format. It also includes the new line characters * This is what Jonathan K. desires to have for the sequin * doc object * aa_start, aa_stop: start and stop in the amino acid sequence * *********************************************************************/ NLM_EXTERN CharPtr make_cds_paragraph(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr pbsp; SeqPortPtr spp; ValNodePtr cvp_node, curr; CodonVectorPtr cvp; CharPtr docbuf = NULL; Int4 num, buf_size; Uint1 residue; Char p_name[30]; Int4 space_len, i; CharPtr buf; Int4 pos; Int4 max_len = 150; Boolean extra_space; if(sfp == NULL || sfp->data.choice !=3) return NULL; if(sfp->product == NULL) return NULL; pbsp = BioseqLockById(SeqLocId(sfp->product)); if(pbsp == NULL) return NULL; cvp_node = aa_to_codon(sfp, aa_start, aa_stop); num = 1; for(curr = cvp_node; curr !=NULL; curr = curr->next) num +=3; buf_size = num * max_len; /* #ifdef WIN_16 if(buf_size > 10000) { Message(MSG_ERROR, "Can not allocate enough space "); return NULL; } #endif */ docbuf = MemNew((size_t)(buf_size) * sizeof(Char)); MuskSeqIdWrite(pbsp->id, p_name, B_SPACE, PRINTID_TEXTID_ACCESSION, TRUE, FALSE); /*SeqIdWrite (pbsp->id, p_name, PRINTID_FASTA_SHORT, 10);*/ pos = 0; pos+= print_label_to_buffer(docbuf+pos, p_name, (aa_start+1), 0, FALSE, FALSE, B_SPACE, POS_SPACE); /*print the amino acid sequence into buffer*/ spp = SeqPortNew(pbsp, aa_start, aa_stop, Seq_strand_plus, Seq_code_ncbieaa); while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF ) docbuf[pos++] = residue; docbuf[pos++] = '\n'; SeqPortFree(spp); for(curr = cvp_node; curr !=NULL; curr = curr->next) { cvp = curr->data.ptrvalue; SeqIdWrite (cvp->sip, p_name, PRINTID_FASTA_SHORT, 10); extra_space = (cvp->aa_index == 0); for(i=0; i<3; ++i) { space_len = cvp->aa_index; buf = cvp->buf[i] + cvp->aa_index; if(i == cvp->frame) { pos+= print_label_to_buffer(docbuf+pos, p_name, cvp->dna_pos, cvp->strand, extra_space, FALSE, B_SPACE, POS_SPACE); } else pos+= print_label_to_buffer(docbuf+pos, NULL, -1, 0, extra_space, FALSE, B_SPACE, POS_SPACE); sprintf(docbuf+pos, "%s\n", buf); pos += (StringLen(buf) +1); } } docbuf[pos++] = '\n'; docbuf[pos] = '\0'; free_cvp_list(cvp_node); BioseqUnlock(pbsp); return docbuf; }
/** Creates the header part of an XML report for a BLAST search. * @param program Program name [in] * @param database Database name [in] * @param query_loc Query Seq-loc [in] * @param flags Flag to indicate whether query sequence should be included in * the output. [in] * @param search_param Search parameters [in] */ static BlastOutput* s_CreateBlastOutputHead(const char* program, const char* database, SeqLoc* query_loc, Int4 flags, const Blast_SearchParams* search_param) { BlastOutput* boutp; Char buffer[1024]; char* program_to_use = NULL; if((boutp = BlastOutputNew()) == NULL) return FALSE; if (strcmp(program, "rpsblast") == 0) program_to_use = strdup("blastp"); else if (strcmp(program, "rpstblastn") == 0) program_to_use = strdup("blastx"); else program_to_use = strdup(program); /* For optimization BLOSUM62 may be loaded ones */ if (query_loc) { SeqId* sip = SeqLocId(query_loc); Bioseq* bsp; SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, sizeof(buffer)); boutp->query_ID = strdup(buffer); bsp = BioseqLockById(sip); if(bsp != NULL) { if (BioseqGetTitle(bsp) != NULL) boutp->query_def = strdup(BioseqGetTitle(bsp)); else boutp->query_def = strdup("No definition line found"); } BioseqUnlock(bsp); boutp->query_len = SeqLocLen(query_loc); if(flags & BXML_INCLUDE_QUERY) { boutp->query_seq = (char *) calloc(boutp->query_len+1, 1); SeqPortStreamLoc(query_loc, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, boutp->query_seq, NULL); } else { boutp->query_seq = NULL; /* Do we need sequence here??? */ } } /* Program name. Use the local version of the program. No need to copy it since it was locally allocated. */ boutp->program = program_to_use; /* Database name */ if (database) boutp->db = strdup(database); /* Version text */ sprintf(buffer, "%s %s [%s]", program_to_use, BlastGetVersionNumber(), BlastGetReleaseDate()); boutp->version = strdup(buffer); /* Reference */ boutp->reference = BlastGetReference(FALSE); /* Filling parameters */ boutp->param = ParametersNew(); boutp->param->expect = search_param->expect; boutp->param->gap_open = search_param->gap_open; boutp->param->gap_extend = search_param->gap_extension; if (search_param->matrix) boutp->param->matrix = strdup(search_param->matrix); boutp->param->sc_match = search_param->match; boutp->param->sc_mismatch = search_param->mismatch; boutp->param->include = search_param->ethresh; if (search_param->filter_string) boutp->param->filter = strdup(search_param->filter_string); return boutp; }
static int LIBCALLBACK MegaBlastPrintEndpoints(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; CharPtr subject_descr; SeqIdPtr sip, query_id; CharPtr query_buffer, title; CharPtr subject_buffer; Int4 query_length, q_start, q_end, q_shift=0, s_shift=0; Int4 subject_end; Int4 hsp_index; Boolean numeric_sip_type = FALSE; BLAST_HSPPtr hsp; Int2 context; Char context_sign; Int4 subject_gi, score; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } if (search->rdfp) readdb_get_descriptor(search->rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue; if (db_tag->db && (!StringCmp(db_tag->db, "THC") || !StringICmp(db_tag->db, "TI")) && db_tag->tag->id != 0) { subject_buffer = (CharPtr) Malloc(16); sprintf(subject_buffer, "%ld", (long) db_tag->tag->id); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } } search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt; /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!search->rdfp && search->query_slp->next) { s_shift = SeqLocStart(search->query_slp->next); subject_end = SeqLocStop(search->query_slp->next); } else { s_shift = 0; subject_end = readdb_get_sequence_length(search->rdfp, search->subject_id); } /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) continue; /* Correct query context is already found in BlastGetNonSumStatsEvalue */ context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; hsp->subject.end = hsp->subject.offset + hsp->subject.length; if (hsp->context) { hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length + 1; context_sign = '-'; } else { hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1; if (hsp->query.end > query_length) { hsp->subject.end -= (hsp->query.end - query_length); hsp->query.end = query_length; } context_sign = '+'; } if (hsp->subject.end > subject_end) { hsp->query.end -= (hsp->subject.end - subject_end); hsp->subject.end = subject_end; } hsp->subject.offset++; query_buffer = NULL; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; if (context_sign == '+') { q_start = hsp->query.offset; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset; } /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } hsp->subject.offset += s_shift; hsp->subject.end += s_shift; if (numeric_sip_type) fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); else fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", subject_buffer, context_sign, query_buffer, hsp->subject.offset, q_start, hsp->subject.end, q_end, score); MemFree(query_buffer); } if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); sip = SeqIdSetFree(sip); return 0; }
static int LIBCALLBACK MegaBlastPrintSegments(VoidPtr ptr) { BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr; ReadDBFILEPtr rdfp = search->rdfp; BLAST_HSPPtr hsp; Int4 i, subject_gi; Int2 context; CharPtr query_buffer, title; SeqIdPtr sip, query_id; Int4 hsp_index, score; Uint1Ptr query_seq, subject_seq = NULL; FloatHi perc_ident; Char strand; GapXEditScriptPtr esp; Int4 q_start, q_end, s_start, s_end, query_length, numseg; Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0; Int4Ptr length, start; Uint1Ptr strands; CharPtr subject_descr, subject_buffer, buffer; Char tmp_buffer[BUFFER_LENGTH]; Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH; Boolean numeric_sip_type = FALSE; FILE *fp = (FILE *) search->output; if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) { search->subject_info = BLASTSubjectInfoDestruct(search->subject_info); return 0; } subject_seq = search->subject->sequence_start + 1; if (rdfp) readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr); else sip = SeqIdSetDup(search->subject_info->sip); if (sip->choice != SEQID_GENERAL || StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) { if (search->pbp->mb_params->full_seqids) { subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } else numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), &subject_gi, &subject_buffer); } else { subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr); subject_descr = subject_buffer; } buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH); /* Only for the two sequences case, get offset shift if subject is a subsequence */ if (!rdfp && search->query_slp->next) s_shift = SeqLocStart(search->query_slp->next); /* Get offset shift if query is a subsequence */ q_shift = SeqLocStart(search->query_slp); for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) { hsp = search->current_hitlist->hsp_array[hsp_index]; if (hsp==NULL || (search->pbp->cutoff_e > 0 && hsp->evalue > search->pbp->cutoff_e)) { continue; } context = hsp->context; query_id = search->qid_array[context/2]; if (query_id == NULL) /* Bad hsp, something wrong */ continue; hsp->context = context & 1; if (search->pbp->gap_open==0 && search->pbp->gap_extend==0) score = ((hsp->subject.length + hsp->query.length)* search->sbp->reward / 2 - hsp->score) / (search->sbp->reward - search->sbp->penalty); else score = hsp->score; query_length = search->query_context_offsets[context+1] - search->query_context_offsets[context] - 1; q_off = hsp->query.offset; if (hsp->context) { strand = '-'; hsp->query.end = query_length - hsp->query.offset; hsp->query.offset = hsp->query.end - hsp->query.length; } else { strand = '+'; hsp->query.end = hsp->query.offset + hsp->query.length; } if (strand == '+') { q_start = hsp->query.offset + 1; q_end = hsp->query.end; } else { q_start = hsp->query.end; q_end = hsp->query.offset + 1; } s_start = hsp->subject.offset + 1; s_end = hsp->subject.offset + hsp->subject.length; /* Adjust offsets if query is a subsequence, only for first query */ if (context < 2) { q_start += q_shift; q_end += q_shift; } s_start += s_shift; s_end += s_shift; if (query_id->choice == SEQID_LOCAL && search->pbp->mb_params->full_seqids) { BioseqPtr query_bsp = BioseqLockById(query_id); title = StringSave(BioseqGetTitle(query_bsp)); if (title) query_buffer = StringTokMT(title, " ", &title); else { Int4 query_gi; GetAccessionFromSeqId(query_bsp->id, &query_gi, &query_buffer); } BioseqUnlock(query_bsp); } else { query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1); if (!search->pbp->mb_params->full_seqids) SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION, BUFFER_LENGTH); else SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); } if (numeric_sip_type) sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", (long) subject_gi, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); else sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n", subject_buffer, strand, query_buffer, s_start, q_start, s_end, q_end, score, score, s_start, q_start, s_end, q_end); buffer_size = StringLen(buffer); query_seq = search->context[context].query->sequence; esp = hsp->gap_info->esp; for (numseg=0; esp; esp = esp->next, numseg++); GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg, &start, &length, &strands, &q_off, &hsp->subject.offset); if (start[0] < 0) { length[0] += start[0]; start[1] -= start[0]; start[0] = 0; } if (start[2*(numseg-1)] + length[numseg-1] > query_length) length[numseg-1] = query_length - start[2*(numseg-1)]; total_ident = 0; align_length = 0; for (i=0; i<numseg; i++) { align_length += length[i]; if (strand == '+') { q_start = start[2*i] + 1; q_end = q_start + length[i] - 1; } else { q_start = query_length - start[2*i]; q_end = q_start - length[i] + 1; } if (start[2*i] != -1 && start[2*i+1] != -1) { num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, start[2*i], start[2*i+1], length[i], FALSE); perc_ident = (FloatHi) num_ident / length[i] * 100; total_ident += num_ident; sprintf(tmp_buffer, " l %d %d %d %d (%.0f)\n", start[2*i+1]+1, q_start, start[2*i+1]+length[i], q_end, perc_ident); if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) { max_buffer_size *= 2; buffer = (CharPtr) Realloc(buffer, max_buffer_size); } StringCat(buffer, tmp_buffer); } } if (100*total_ident >= align_length*search->pbp->mb_params->perc_identity) { StringCat(buffer, "}"); fprintf(fp, "%s\n", buffer); } MemFree(start); MemFree(length); MemFree(strands); MemFree(query_buffer); } /* End loop on hsp's */ if (!numeric_sip_type && subject_buffer != subject_descr) MemFree(subject_buffer); MemFree(subject_descr); MemFree(buffer); sip = SeqIdSetFree(sip); fflush(fp); return 1; }