static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi) { SeqDescrPtr sdp; SeqMgrDescContext dcontext; Char id_txt[255], id_txt2[255]; SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL; ValNodePtr line_list = NULL, line_values; if (bsp == NULL) { return NULL; } for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_GENBANK || (sip->choice == SEQID_EMBL && sip_gb == NULL) || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL) || (sip->choice == SEQID_DDBJ && sip_gb == NULL) || (sip->choice == SEQID_PIR && sip_gb == NULL)) { sip_gb = sip; } else if (sip->choice == SEQID_GI) { sip_gi = sip; } } if (sip_gb == NULL && sip_gi == NULL) { SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); id_txt2[0] = 0; } else { if (sip_gb == NULL) { id_txt[0] = 0; } else { SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1); } if (sip_gi == NULL) { id_txt2[0] = 0; } else { SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1); } } for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); sdp != NULL; sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { line_values = NULL; ValNodeAddPointer (&line_values, 0, StringSave (id_txt)); if (want_gi) { ValNodeAddPointer (&line_values, 0, StringSave (id_txt2)); } ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list)); ValNodeAddPointer (&line_list, 0, line_values); } return line_list; }
static Boolean BL2SEQ_MakeSeqLoc(const BioseqPtr bsp1, const BioseqPtr bsp2, SeqLocPtr *slp1, SeqLocPtr *slp2, Uint1 strand_option) { const char* k_delimiters = " ,;"; CharPtr location; Int4 from, to; *slp1 = NULL; *slp2 = NULL; location = myargs[ARG_LOC1].strvalue; if (location) { from = atoi(StringTokMT(location, k_delimiters, &location)) - 1; to = atoi(location) - 1; from = MAX(from, 0); if (to < 0) to = bsp1->length - 1; to = MIN(to, bsp1->length - 1); if (from >= bsp1->length) { ErrPostEx(SEV_FATAL, 1, 0, "Location outside of the first sequence range\n"); return FALSE; } *slp1 = SeqLocIntNew(from, to, strand_option, SeqIdFindBestAccession(bsp1->id)); } else if (strand_option != Seq_strand_both) { *slp1 = SeqLocIntNew(0, bsp1->length-1, strand_option, SeqIdFindBestAccession(bsp1->id)); } else ValNodeAddPointer(slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id))); location = myargs[ARG_LOC2].strvalue; if (location) { from = atoi(StringTokMT(location, k_delimiters, &location)) - 1; to = atoi(location) - 1; from = MAX(from, 0); if (to < 0) to = bsp2->length - 1; to = MIN(to, bsp2->length - 1); if (from >= bsp2->length) { ErrPostEx(SEV_FATAL, 1, 0, "Location outside of the second sequence range\n"); return FALSE; } *slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id)); } else ValNodeAddPointer(slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id))); return TRUE; }
static void CommentFieldsToDialog (DialoG d, Pointer data) { TagListPtr tlp; ValNodePtr fields, vnp; CharPtr str; tlp = (TagListPtr) GetObjectExtra (d); if (tlp == NULL) { return; } fields = (ValNodePtr) data; tlp->vnp = ValNodeFreeData (tlp->vnp); SendMessageToDialog (tlp->dialog, VIB_MSG_RESET); for (vnp = fields; vnp != NULL; vnp = vnp->next) { str = TagStringFromFieldRule (vnp->data.ptrvalue); if (str != NULL) { ValNodeAddPointer (&(tlp->vnp), 0, str); } } SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW); tlp->max = MAX ((Int2) 0, (Int2) (ValNodeLen (tlp->vnp) - tlp->rows)); CorrectBarMax (tlp->bar, tlp->max); CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1); if (tlp->max > 0) { SafeShow (tlp->bar); } else { SafeHide (tlp->bar); } }
static ValNodePtr CollectBioSourceValues (BioSourcePtr biop, ValNodePtr field_list) { Char taxid_buf[30]; ValNodePtr field_values = NULL; CharPtr txt; sprintf (taxid_buf, "%d", GetTaxIdFromOrgRef(biop->org)); ValNodeAddPointer (&field_values, 0, StringSave (taxid_buf)); while (field_list != NULL) { txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL); ValNodeAddPointer (&field_values, 0, txt); field_list = field_list->next; } return field_values; }
static Boolean GetGraphsProc (GatherObjectPtr gop) { GphGetPtr ggp; GphItemPtr gip; SeqGraphPtr sgp; if (gop == NULL || gop->itemtype != OBJ_SEQGRAPH) return TRUE; ggp = (GphGetPtr) gop->userdata; sgp = (SeqGraphPtr) gop->dataptr; if (ggp == NULL || sgp == NULL) return TRUE; /* only phrap or gap4 currently allowed */ if (StringICmp (sgp->title, "Phrap Quality") == 0 || StringICmp (sgp->title, "Gap4") == 0) { /* data type must be bytes */ if (sgp->flags[2] == 3) { if (SeqIdForSameBioseq (SeqLocId (sgp->loc), SeqLocId (ggp->slp))) { gip = (GphItemPtr) MemNew (sizeof (GphItem)); if (gip == NULL) return TRUE; gip->sgp = sgp; gip->left = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_LEFT_END); gip->right = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_RIGHT_END); ValNodeAddPointer (&(ggp->vnp), 0, (Pointer) gip); } } } return TRUE; }
static void RecordThesis (ScanDataPtr sdp, CharPtr str) { StringSetPtr ssp; ValNodePtr vnp; if (sdp == NULL || StringHasNoText (str)) return; for (vnp = sdp->lcList; vnp != NULL; vnp = vnp->next) { ssp = (StringSetPtr) vnp->data.ptrvalue; if (ssp == NULL) continue; if (StringCmp (ssp->text, str) == 0) { (ssp->count)++; return; } } ssp = MemNew (sizeof (StringSet)); if (ssp == NULL) return; StringCpy (ssp->firstID, sdp->buf); StringNCpy_0 (ssp->text, str, sizeof (ssp->text)); ssp->count = 1; ValNodeAddPointer (&(sdp->lcList), 0, (Pointer) ssp); }
static void GetProtListCallback (BioseqPtr bsp, Pointer userdata) { ValNodePtr PNTR pList; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; ProtRefPtr prp; AlphaProtPtr app; if (bsp == NULL || userdata == NULL || ! ISA_aa (bsp->mol)) return; pList = (ValNodePtr PNTR) userdata; app = (AlphaProtPtr) MemNew (sizeof (AlphaProtData)); if (app == NULL) return; app->bsp = bsp; app->prot_name = NULL; sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext); if (sfp != NULL && sfp->data.value.ptrvalue != NULL) { prp = (ProtRefPtr) sfp->data.value.ptrvalue; if (prp->name != NULL) { app->prot_name = StringSave (prp->name->data.ptrvalue); } else { app->prot_name = StringSave (fcontext.label); } } ValNodeAddPointer (pList, 0, app); }
PVNMB CpyPVNMBList(PVNMB pvnmb, PDNMG pdnmgNewHead, Pointer parentPtr) { PDNMG pdnmg, pdnmgNew, pdnmgPmadFrom = NULL, pdnmgPmadTo = NULL; PMGD pmgd, pmgdNew, pmgdTmp = NULL; PVNMB pvnmbNew, pvnmbNewHead = NULL, pvnmbLast = NULL; PMBD pmbd, pmbdNew; while(pvnmb) { pmbd = (PMBD)(pvnmb->data.ptrvalue); pvnmbNew = ValNodeNew(NULL); pvnmbNew->choice = pvnmb->choice; pmbdNew = NewMBD(); pmbdNew->pfbParent = (PFB)parentPtr; pmbdNew->pvnmbLink = pvnmbNew; pmbdNew->bWhat = pmbd->bWhat; pmgdTmp = (PMGD)(pmbd->pmadFrom->pfbParent); pdnmgPmadFrom = GetPDNMGFromIndex(pdnmgNewHead, ((PDNMG)(pmgdTmp->pdnmgLink))->choice); pmbdNew->pmadFrom = GetPMADFromName(((PMGD)(pdnmgPmadFrom->data.ptrvalue))->pvnmaAHead, pmbd->pmadFrom->pcAName); pmgdTmp = (PMGD)(pmbd->pmadTo->pfbParent); pdnmgPmadTo = GetPDNMGFromIndex(pdnmgNewHead, ((PDNMG)(pmgdTmp->pdnmgLink))->choice); pmbdNew->pmadTo = GetPMADFromName(((PMGD)(pdnmgPmadTo->data.ptrvalue))->pvnmaAHead, pmbd->pmadTo->pcAName); /* Add PMBD to the appropriate PMADs */ ValNodeAddPointer(&pmbdNew->pmadFrom->pvnBonds, 0, (VoidPtr)pmbdNew); ValNodeAddPointer(&pmbdNew->pmadTo->pvnBonds, 0, (VoidPtr)pmbdNew); pvnmbNew->data.ptrvalue = (VoidPtr)pmbdNew; if(pvnmbNewHead == NULL) { pvnmbNewHead = pvnmbNew; pvnmbLast = pvnmbNew; } else { pvnmbLast->next = pvnmbNew; pvnmbLast = pvnmbNew; } pvnmb = pvnmb->next; } return pvnmbNewHead; }
static Boolean AddToSaveList (GatherContextPtr gcp) { ValNodePtr PNTR list; list = (ValNodePtr PNTR) gcp->userdata; if (list == NULL) return TRUE; ValNodeAddPointer (list, gcp->thistype, gcp->thisitem); return TRUE; }
static ValNodePtr FieldsFromFieldListString (CharPtr str) { CharPtr cpy, val, comma; Int4 qual; ValNodePtr field_list = NULL, qc; if (StringHasNoText (str)) { return NULL; } cpy = StringSave (str); val = cpy; comma = StringChr(val, ','); while (comma != NULL) { *comma = 0; qual = GetSourceQualTypeByName(val); if (qual < 0) { Message (MSG_ERROR, "%s is not a recognized source field name", val); } else { qc = ValNodeNew (NULL); qc->choice = SourceQualChoice_textqual; qc->data.intvalue = qual; ValNodeAddPointer (&field_list, FieldType_source_qual, qc); } *comma = ','; val = comma + 1; comma = StringChr (val, ','); } qual = GetSourceQualTypeByName(val); if (qual < 0) { Message (MSG_ERROR, "%s is not a recognized source field name", val); } else { qc = ValNodeNew (NULL); qc->choice = SourceQualChoice_textqual; qc->data.intvalue = qual; ValNodeAddPointer (&field_list, FieldType_source_qual, qc); } cpy = MemFree (cpy); return field_list; }
static void CollectBioseqsForConversion (BioseqPtr bsp, Pointer userdata) { ValNodePtr PNTR list; if (bsp == NULL || bsp->repr != Seq_repr_raw || ISA_aa (bsp->mol)) return; if (userdata == NULL) { return; } list = (ValNodePtr PNTR) userdata; ValNodeAddPointer (list, 0, bsp); }
NLM_EXTERN ValNodePtr LIBCALL AsnGenericValNodeSetAsnRead ( AsnIoPtr aip, AsnModulePtr amp, AsnTypePtr orig, BoolPtr isError, AsnReadFunc readfunc, AsnOptFreeFunc freefunc ) { AsnTypePtr atp = orig, start_atp; DataVal av; Pointer val; ValNodePtr vnp, head = NULL, last = NULL; if (isError != NULL) { *isError = FALSE; } if (aip == NULL || readfunc == NULL || freefunc == NULL) return NULL; if (AsnReadVal (aip, atp, &av) <= 0) goto erret; /* read START STRUCT */ start_atp = orig; atp = start_atp; while ((atp = AsnReadId (aip, amp, atp)) != start_atp) { val = (Pointer) readfunc (aip, atp); if (val == NULL) goto erret; vnp = ValNodeAddPointer (&last, 0, val); if (head == NULL) { head = vnp; } last = vnp; } if (AsnReadVal (aip, atp, &av) <= 0) goto erret; /* read END STRUCT */ ret: return (Pointer) head; erret: head = AsnGenericValNodeSetFree (head, freefunc); if (isError != NULL) { *isError = TRUE; } goto ret; }
/* This constructs an ASN.1 SeqGraph that contains the quality scores from the consensus sequence */ static SeqGraphPtr SeqGraphFromContig (TContigPtr contig, BioseqPtr bsp) { SeqGraphPtr sgp; ByteStorePtr bs; Uint1 bytes[128]; Int2 max = INT2_MIN; Int2 min = INT2_MAX; Int4 q_pos, b_pos; SeqIntPtr sintp; if (contig == NULL || contig->num_qual_scores == 0 || contig->qual_scores == NULL || bsp == NULL) { return NULL; } sgp = SeqGraphNew (); bs = BSNew (1000); q_pos = 0; while (q_pos < contig->num_qual_scores) { b_pos = 0; while (b_pos < sizeof (bytes) && q_pos < contig->num_qual_scores) { max = MAX (max, (Int2) contig->qual_scores[q_pos]); min = MIN (min, (Int2) contig->qual_scores[q_pos]); bytes[b_pos++] = (Uint1) contig->qual_scores[q_pos++]; } BSWrite (bs, (Pointer) bytes, (Int4) b_pos); } sgp->numval = BSLen (bs); BSPutByte (bs, EOF); sgp->title = StringSave ("Phrap Quality"); sgp->flags [0] = 0; sgp->compr = 1; sgp->flags [1] = 0; sgp->flags [2] = 3; sgp->axis.intvalue = 0; sgp->min.intvalue = min; sgp->max.intvalue = max; sgp->a = 1.0; sgp->b = 0; sgp->values = (Pointer) bs; sintp = SeqIntNew (); sintp->from = 0; sintp->to = bsp->length - 1; sintp->id = SeqIdDup (bsp->id); ValNodeAddPointer (&(sgp->loc), SEQLOC_INT, (Pointer) sintp); return sgp; }
/** Creates a list of SeqLoc structures with data about PHI BLAST pattern * occurrences, to be used as features on Query Seq-locs. * @param pattern_info Pattern information structure. [in] * @param query_seqloc Query SeqLoc, needed to retrieve Seq-id. [in] * @param seed_seqloc_ptr List of SeqLoc's with pattern data. [out] */ static Int2 s_PHIBlastCreateSeedSeqLoc(const SPHIQueryInfo* pattern_info, SeqLoc* query_seqloc, SeqLoc** seed_seqloc_ptr) { Int4 index; for (index = 0; index < pattern_info->num_patterns; ++index) { const SPHIPatternInfo* this_occurrence = &pattern_info->occurrences[index]; SeqInt* si = SeqIntNew(); si->id = SeqIdDup(SeqLocId(query_seqloc)); si->from = this_occurrence->offset; si->to = this_occurrence->offset + this_occurrence->length - 1; ValNodeAddPointer(seed_seqloc_ptr, SEQLOC_INT, si); } return 0; }
static Pointer CommentFieldsFromDialog (DialoG d) { TagListPtr tlp; ValNodePtr fields = NULL, vnp; FieldRulePtr rule; tlp = (TagListPtr) GetObjectExtra (d); if (tlp == NULL) { return NULL; } for (vnp = tlp->vnp; vnp != NULL; vnp = vnp->next) { rule = FieldRuleFromTagString (vnp->data.ptrvalue); if (rule != NULL) { ValNodeAddPointer (&fields, 0, rule); } } return fields; }
/** Splits the PHI BLAST results corresponding to different pattern occurrences * in query, converts them to Seq-aligns and puts in a list of ValNodes. * @param results All results from different pattern occurrences * mixed together. On return points to NULL. [in] * @param pattern_info Query pattern occurrences information [in] * @param program Program type (phiblastp or phiblastn) [in] * @param query_seqloc List of query locations [in] * @param rdfp blast db object [in] * @param phivnps List of ValNodes containing Seq-aligns. [out] * @return Status, 0 on success, -1 on failure. */ static Int2 s_PHIResultsToSeqAlign(const BlastHSPResults* results, const SPHIQueryInfo* pattern_info, EBlastProgramType program, SeqLoc* query_seqloc, ReadDBFILE* rdfp, ValNode* *phivnps) { Int2 status = 0; /* Split results into an array of BlastHSPResults structures corresponding to different pattern occurrences. */ BlastHSPResults* *phi_results = PHIBlast_HSPResultsSplit(results, pattern_info); if (phi_results) { int pattern_index; /* Index over pattern occurrences. */ for (pattern_index = 0; pattern_index < pattern_info->num_patterns; ++pattern_index) { SBlastSeqalignArray* seqalign_arr = NULL; SeqAlign* seqalign = NULL; BlastHSPResults* one_phi_results = phi_results[pattern_index]; if (one_phi_results) { /* PHI BLAST is always gapped, and never out-of-frame, hence * TRUE and FALSE values for the respective booleans in the next * call. */ status = BLAST_ResultsToSeqAlign(program, &one_phi_results, query_seqloc, rdfp, NULL, TRUE, FALSE, &seqalign_arr); if (seqalign_arr) { seqalign = seqalign_arr->array[0]; seqalign_arr->array[0] = NULL; SBlastSeqalignArrayFree(seqalign_arr); } ValNodeAddPointer(phivnps, pattern_index, seqalign); } } sfree(phi_results); } return status; }
Int2 Main_old (void) { AsnIoPtr aip; BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, subject_bsp = NULL; BioseqPtr bsp1, bsp2; BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL; BLAST_OptionsBlkPtr options=NULL; Boolean seq1_is_na, seq2_is_na; CharPtr params_buffer=NULL; DbtagPtr dbtagptr; Uint1 align_type; Uint4 align_options; SeqAlignPtr seqalign; SeqAnnotPtr seqannot; SeqEntryPtr sep = NULL, sep1 = NULL; CharPtr program_name, blast_outputfile; FILE *outfp; ValNodePtr mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL; BLAST_MatrixPtr matrix; Int4Ptr PNTR txmatrix; int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL; Boolean entrez_lookup = FALSE; Boolean html, seqannot_output, believe_query; Uint1 tabular_output; Boolean gapped_calculation; entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue; html = (Boolean) myargs[ARG_HTML].intvalue; seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL); blast_outputfile = myargs [ARG_OUT].strvalue; program_name = StringSave(myargs[ARG_PROGRAM].strvalue); if (StringCmp(program_name, "blastn") && StringCmp(program_name, "blastp") && StringCmp(program_name, "blastx") && StringCmp(program_name, "tblastn") && StringCmp(program_name, "tblastx")) { ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n"); return (1); } align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na); if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return (1); } gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue; believe_query = (seqannot_output || entrez_lookup); options = BLASTOptionNewEx(program_name, gapped_calculation, (Boolean) myargs[ARG_USEMEGABLAST].intvalue); if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1, &(options->query_lcase_mask), believe_query) == FALSE) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences"); return (1); } if (!entrez_lookup) { if (!believe_query) fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL); fake_subject_bsp = BioseqNew(); fake_subject_bsp->descr = subject_bsp->descr; fake_subject_bsp->repr = subject_bsp->repr; fake_subject_bsp->mol = subject_bsp->mol; fake_subject_bsp->length = subject_bsp->length; fake_subject_bsp->seq_data = subject_bsp->seq_data; fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type; dbtagptr = DbtagNew(); dbtagptr->db = StringSave("BL_ORD_ID"); dbtagptr->tag = ObjectIdNew(); if (BioseqGetTitle(subject_bsp) != NULL) dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp)); else dbtagptr->tag->str = StringSave("No definition line found"); ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr); bsp1 = (believe_query ? query_bsp : fake_bsp); bsp2 = fake_subject_bsp; } else { bsp1 = query_bsp; bsp2 = subject_bsp; } tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; if (myargs[ARG_SEARCHSP].floatvalue) options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue; options->filter_string = StringSave(myargs[ARG_FILTER].strvalue); options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue; if (StringICmp("blastn", program_name) == 0) { options->penalty = myargs[ARG_MISMATCH].intvalue; options->reward = myargs[ARG_MATCH].intvalue; } options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue; options->discontinuous = FALSE; if (myargs[ARG_XDROP].intvalue != 0) { options->gap_x_dropoff = myargs[ARG_XDROP].intvalue; } if (myargs[ARG_WORDSIZE].intvalue != 0) options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue; if (options->is_megablast_search) { options->cutoff_s2 = options->wordsize*options->reward; } options->matrix = MemFree(options->matrix); BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); if (myargs[ARG_GAPOPEN].intvalue != -1) options->gap_open = myargs[ARG_GAPOPEN].intvalue; if (myargs[ARG_GAPEXT].intvalue != -1) options->gap_extend = myargs[ARG_GAPEXT].intvalue; options->strand_option = myargs[ARG_STRAND].intvalue; /* Input longest intron length is in nucleotide scale; in the lower level code it will be used in protein scale */ if (myargs[ARG_INTRON].intvalue > 0) options->longest_intron = myargs[ARG_INTRON].intvalue; if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) { seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, options, &other_returns, &error_returns, handle_results); } else { SeqLocPtr slp1=NULL, slp2=NULL; if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE) return 1; seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL); SeqLocFree(slp1); SeqLocFree(slp2); } if (error_returns) { BlastErrorPrint(error_returns); for (vnp = error_returns; vnp; vnp = vnp->next) { BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue); } ValNodeFree(error_returns); } ka_params = NULL; ka_params_gap = NULL; params_buffer = NULL; mask_loc = NULL; matrix = NULL; txmatrix = NULL; for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXKABLK_NOGAP: ka_params = vnp->data.ptrvalue; break; case TXKABLK_GAP: ka_params_gap = vnp->data.ptrvalue; break; case TXPARAMETERS: params_buffer = vnp->data.ptrvalue; break; case TXMATRIX: matrix = vnp->data.ptrvalue; if (matrix && !tabular_output) txmatrix = BlastMatrixToTxMatrix(matrix); break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue); break; default: break; } } if (!tabular_output || seqannot_output) { align_options = 0; align_options += TXALIGN_MATRIX_VAL; align_options += TXALIGN_SHOW_QS; align_options += TXALIGN_COMPRESS; align_options += TXALIGN_END_NUM; if (StringICmp("blastx", program_name) == 0) { align_options += TXALIGN_BLASTX_SPECIAL; } if (html) align_options += TXALIGN_HTML; seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; aip = NULL; if (seqannot_output) aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w"); if (aip && seqannot) { SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL); AsnIoReset(aip); aip = AsnIoClose(aip); } } if (!tabular_output) { AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html); ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc); seqannot = SeqAnnotFree(seqannot); if (txmatrix) txmatrix = TxMatrixDestruct(txmatrix); init_buff_ex(85); if (ka_params) { PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE); } if (ka_params_gap) { PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE); } PrintTildeSepLines(params_buffer, 70, outfp); free_buff(); } else { PrintTabularOutputHeader(NULL, query_bsp, NULL, program_name, 0, believe_query, outfp); BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 1, program_name, !gapped_calculation, believe_query, 0, 0, outfp, FALSE); SeqAlignSetFree(seqalign); } matrix = BLAST_MatrixDestruct(matrix); MemFree(ka_params); MemFree(ka_params_gap); MemFree(params_buffer); mask_loc_start = mask_loc; while (mask_loc) { SeqLocSetFree(mask_loc->data.ptrvalue); mask_loc = mask_loc->next; } ValNodeFree(mask_loc_start); fake_bsp = BlastDeleteFakeBioseq(fake_bsp); other_returns = ValNodeFree(other_returns); options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask); options = BLASTOptionDelete(options); MemFree(program_name); FileClose(outfp); if (entrez_lookup) { BioseqFree(query_bsp); BioseqFree(subject_bsp); } else { SeqEntryFree(sep); SeqEntryFree(sep1); } return 0; }
Int2 Main_new(void) { BioseqPtr query_bsp=NULL, subject_bsp=NULL; BioseqPtr bsp1=NULL, bsp2=NULL; BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL; BlastFormattingInfo* format_info = NULL; BLAST_SummaryOptions* options=NULL; Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew(); Boolean believe_query= FALSE; Boolean seq1_is_na, seq2_is_na; /* seq1/2 is DNA if TRUE. */ Boolean seqannot_output; /* SeqAlign will be output. */ Boolean entrez_lookup; /* QUery/subject fetched from Entrez. */ Boolean mask_at_hash=FALSE; /* masking only on lookup table if TRUE. */ DbtagPtr dbtagptr; EBlastProgramType program_number; Int2 status; /* return value */ EAlignView align_view = eAlignViewPairwise; /* Used for formatting */ SeqAlignPtr seqalign=NULL; SeqEntryPtr sep=NULL, sep1=NULL; SeqLocPtr slp1, slp2; /* Used for actual search. */ SeqLocPtr filter_loc=NULL; /* Location of regions filtered (returned by engine) */ SeqLocPtr lcase_mask=NULL; /* For lower-case masking info from query FASTA. */ SeqLoc* repeat_mask = NULL; /* Repeat mask locations */ Uint1 strand_option = 0; /* FIXME */ SBlastOptions* search_options = NULL; /* Needed for formatting. */ SBlastSeqalignArray* seqalign_arr = NULL; GeneticCodeSingletonInit(); strand_option = (Uint1) myargs[ARG_STRAND].intvalue; entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue; seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL); believe_query = (seqannot_output || entrez_lookup); /* Non-zero value for -m option means tabular output. */ if (myargs[ARG_FORMAT].intvalue != 0) align_view = eAlignViewTabularWithComments; BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number); seq1_is_na = (program_number == eBlastTypeBlastn || program_number == eBlastTypeBlastx || program_number == eBlastTypeRpsTblastn || program_number == eBlastTypeTblastx); seq2_is_na = (program_number == eBlastTypeBlastn || program_number == eBlastTypeTblastn || program_number == eBlastTypeTblastx); if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1, &lcase_mask, believe_query) == FALSE) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences"); return (1); } if (!entrez_lookup) { if (!believe_query) fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL); fake_subject_bsp = BioseqNew(); fake_subject_bsp->descr = subject_bsp->descr; fake_subject_bsp->repr = subject_bsp->repr; fake_subject_bsp->mol = subject_bsp->mol; fake_subject_bsp->length = subject_bsp->length; fake_subject_bsp->seq_data = subject_bsp->seq_data; fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type; dbtagptr = DbtagNew(); dbtagptr->db = StringSave("BL_ORD_ID"); dbtagptr->tag = ObjectIdNew(); if (BioseqGetTitle(subject_bsp) != NULL) dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp)); else dbtagptr->tag->str = StringSave("No definition line found"); ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr); bsp1 = (believe_query ? query_bsp : fake_bsp); bsp2 = fake_subject_bsp; } else { /* Query and subject Bioseqs are already "fake". */ bsp1 = query_bsp; bsp2 = subject_bsp; } if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE) return 1; if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE) return 1; /* Find repeat mask, if necessary */ if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue, &repeat_mask, &extra_returns->error)) != 0) { if (extra_returns && extra_returns->error) { ErrSev max_sev = SBlastMessageErrPost(extra_returns->error); if (max_sev >= SEV_ERROR) return status; } } /* Combine repeat mask with lower case mask */ if (repeat_mask) lcase_mask = ValNodeLink(&lcase_mask, repeat_mask); status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr, &filter_loc, &mask_at_hash, &extra_returns); /* Free the lower case mask in SeqLoc form. */ lcase_mask = Blast_ValNodeMaskListFree(lcase_mask); /* Post warning or error messages, no matter what the search status was. */ SBlastMessageErrPost(extra_returns->error); if (status != 0) { ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed"); return status; } if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) { AsnIoPtr asnout = AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w"); GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout); asnout = AsnIoClose(asnout); } /* Pass NULL for the database name, since there is no database. */ BlastFormattingInfoNewBasic(align_view, options, slp1, myargs[ARG_OUT].strvalue, &search_options, &format_info); /* Always show gis in the output, hence pass TRUE for respective argument. */ BlastFormattingInfoSetUpOptions(format_info, 0, 1, (Boolean) myargs[ARG_HTML].intvalue, (Boolean) myargs[ARG_USEMEGABLAST].intvalue, TRUE, believe_query); /* If masking was at hash only, free the masking locations, * to prevent them from being used for formatting. */ if (SBlastOptionsGetMaskAtHash(search_options)) filter_loc = Blast_ValNodeMaskListFree(filter_loc); /* Format the results */ status = BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info, extra_returns); status = Blast_PrintOutputFooter(format_info, extra_returns); /* Free masking locations if they haven't been freed already. */ filter_loc = Blast_ValNodeMaskListFree(filter_loc); format_info = BlastFormattingInfoFree(format_info); extra_returns = Blast_SummaryReturnFree(extra_returns); search_options = SBlastOptionsFree(search_options); if (entrez_lookup) { BioseqFree(query_bsp); BioseqFree(subject_bsp); } else { SeqEntryFree(sep); SeqEntryFree(sep1); } options = BLAST_SummaryOptionsFree(options); seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr); slp1 = SeqLocSetFree(slp1); slp2 = SeqLocSetFree(slp2); fake_bsp = BlastDeleteFakeBioseq(fake_bsp); GeneticCodeSingletonFini(); return 0; }
/******************************************************************************* Function : DDV_GetRulerForEditor() Purpose : create the ruler for the editor. This new descriptor is created using the descriptor of the viewer. Return value : an allocated data block ready for use by the editor. Null if failure *******************************************************************************/ NLM_EXTERN ValNodePtr DDV_GetRulerForEditor(ValNodePtr descr_head,Int4 from_disp, Int4 to_disp) { DDVRulerDescrPtr drdp,prev_drdp,new_drdp; ValNodePtr vnp2,vnp3,vnpDesc; Int4 cumulDecr,drdp_size,beginCopy,endCopy,drdp_start_ali; Boolean bPop; cumulDecr=0; vnpDesc=NULL; prev_drdp=NULL; for(vnp2=descr_head;vnp2!=NULL;vnp2=vnp2->next){ drdp=(DDVRulerDescrPtr)vnp2->data.ptrvalue; drdp_size=drdp->disp_stop-drdp->disp_start+1; /*not yet in the region of interest ?*/ if (drdp->disp_stop<from_disp) { cumulDecr+=drdp_size; continue; } beginCopy=_max_(from_disp,drdp->disp_start); endCopy=_min_(drdp->disp_stop,to_disp); if (drdp->bUnAligned==FALSE) drdp_start_ali=drdp->align_start+(beginCopy-drdp->disp_start); else drdp_start_ali=(Int4)-1; /*in order to merge nodes of same type, i keep track of the previous populated drdp node. Same style==yes, then just extend to the right the previous node*/ if (prev_drdp){ if (prev_drdp->bUnAligned==drdp->bUnAligned){ prev_drdp->disp_stop=endCopy; bPop=TRUE; } else{ bPop=FALSE; } } else{ bPop=FALSE; } if (!bPop){ new_drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr)); if (!new_drdp) goto erreur; new_drdp->disp_start=beginCopy; new_drdp->disp_stop=endCopy; new_drdp->align_start=drdp_start_ali; new_drdp->bUnAligned=drdp->bUnAligned; if (!vnpDesc){ vnp3=ValNodeAddPointer(&vnpDesc,0,(Pointer)new_drdp); } else{ vnp3=ValNodeAddPointer(&vnp3,0,(Pointer)new_drdp); } prev_drdp=new_drdp; } if (drdp->disp_stop>=to_disp) break; cumulDecr+=drdp_size; } return(vnpDesc); erreur: if (vnpDesc) ValNodeFreeData(vnpDesc); return(NULL); }
/******************************************************************************* Function : DDV_ComputeRuler() Purpose : compute the Ruler descriptor (usefull for discontinuous align) Parameters : Return value : a list of RUler descriptor *******************************************************************************/ extern ValNodePtr DDV_ComputeRuler(SeqAlignPtr sap,DDV_Disp_OptPtr ddop) { DDVRulerDescrPtr drdp; ValNodePtr vnp=NULL,vnp_head=NULL; Int4 disp_start=0,length,TotAliLength=0,r=0; Boolean bUnAligned; /* make ruler for LEFT_TAIL */ if ((ddop->DispDiscStyle == MSA_TXT_STYLE_2) && (ddop->ShowLeftTail)) { length = AlnMgrGetMaxTailLength(sap, LEFT_TAIL); if (length) { drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr)); drdp->disp_start=disp_start; drdp->disp_stop=disp_start+length-1; drdp->bUnAligned=TRUE; drdp->align_start = -1; disp_start += length; vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp); vnp=vnp_head; } } if (AlnMgrIsSAPDiscAli(sap)){ /* make ruler for multiple blocks */ while(AlnMgrGetNextLengthBit(sap,&length,&r)){ if (length<0){ bUnAligned=TRUE; switch(ddop->DispDiscStyle){/*user's display choice*/ case MSA_TXT_STYLE_1: length=ddop->SpacerSize; break; case MSA_TXT_STYLE_2: length=ABS(length); break; } } else{ bUnAligned=FALSE; } drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr)); drdp->disp_start=disp_start; drdp->disp_stop=disp_start+length-1; drdp->bUnAligned=bUnAligned; if (bUnAligned==FALSE) drdp->align_start=TotAliLength;/*SeqAlign Coord*/ else drdp->align_start=-1;/*drdp->disp_start;*//*Disp Coord*/ if (!vnp_head){ vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp); vnp=vnp_head; } else{ vnp=ValNodeAddPointer(&vnp,0,(Pointer)drdp); } disp_start+=length; if (bUnAligned==FALSE) TotAliLength+=length; } } else if (sap->type == SAT_MASTERSLAVE){ /* make ruler for a single block */ drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr)); drdp->disp_start=disp_start; length=AlnMgrGetAlnLength(sap,FALSE); drdp->disp_stop=disp_start+length-1; drdp->bUnAligned = FALSE; drdp->align_start=0; if (!vnp_head){ vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp); vnp=vnp_head; } else{ vnp=ValNodeAddPointer(&vnp,0,(Pointer)drdp); } } /* make ruler for RIGHT_TAIL */ if ((ddop->DispDiscStyle == MSA_TXT_STYLE_2) && (ddop->ShowRightTail)) { length = AlnMgrGetMaxTailLength(sap, RIGHT_TAIL); if (length) { drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr)); drdp->disp_start=disp_start; drdp->disp_stop=disp_start+length-1; drdp->bUnAligned=TRUE; drdp->align_start = -1; disp_start += length; vnp = ValNodeAddPointer(&vnp,0,(Pointer)drdp); } } return(vnp_head); }
static Int2 Main_old (void) { AsnIoPtr aip, xml_aip = NULL; BioseqPtr query_bsp, PNTR query_bsp_array; BioSourcePtr source; BLAST_MatrixPtr matrix; BLAST_OptionsBlkPtr options; BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL; BlastPruneSapStructPtr prune; Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE; Boolean html=FALSE; CharPtr params_buffer=NULL; Int4 number_of_descriptions, number_of_alignments; SeqAlignPtr seqalign, PNTR seqalign_array; SeqAnnotPtr seqannot; SeqEntryPtr PNTR sepp; TxDfDbInfoPtr dbinfo=NULL, dbinfo_head; Uint1 align_type, align_view, out_type; Uint4 align_options, print_options; ValNodePtr mask_loc, mask_loc_start, next_mask_loc; ValNodePtr vnp, other_returns, error_returns; CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile; FILE *infp, *outfp, *mqfp=NULL; Int4 index, num_bsps, total_length, total_processed = 0; Int2 ctr = 1; Char prefix[2]; SeqLocPtr last_mask, mask_slp; Boolean done, hits_found; Boolean lcase_masking; MBXmlPtr mbxp = NULL; Boolean traditional_formatting; blast_program = "blastn"; blast_database = myargs [ARG_DB].strvalue; blast_inputfile = myargs [ARG_QUERY].strvalue; blast_outputfile = myargs [ARG_OUT].strvalue; if (myargs[ARG_HTML].intvalue) html = TRUE; if ((infp = FileOpen(blast_inputfile, "r")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile); return (1); } align_view = (Int1) myargs[ARG_FORMAT].intvalue; /* Geo mod: -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on */ out_type=(Int1) myargs[ARG_OUTTYPE].intvalue; if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) { align_view = 12 + (out_type-MGBLAST_FLTHITS ); out_type=MBLAST_ALIGNMENTS; //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1 if (align_view>12) { // this is MGBLAST_HITGAPS output gap_Info=TRUE; if (dbgaps_buf==NULL) dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1); if (qgaps_buf==NULL) qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1); } } outfp = NULL; traditional_formatting = (out_type == MBLAST_ALIGNMENTS || out_type == MBLAST_DELAYED_TRACEBACK); if ((!traditional_formatting || (align_view != 7 && align_view != 10 && align_view != 11)) && blast_outputfile != NULL) { if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return (1); } } //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na); align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na); /* if (!traditional_formatting) believe_query = TRUE; else believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue; */ //Geo mod: believe_query=FALSE; //If ASN.1 output is requested and believe_query is not set to TRUE, // exit with an error. if (!believe_query && (myargs[ARG_ASNOUT].strvalue || align_view == 10 || align_view == 11)) { ErrPostEx(SEV_FATAL, 1, 0, "-J option must be TRUE to produce ASN.1 output; before " "changing -J to TRUE please also ensure that all query " "sequence identifiers are unique"); return -1; } options = BLASTOptionNewEx(blast_program, TRUE, TRUE); if (options == NULL) return 3; options->do_sum_stats = FALSE; options->is_neighboring = FALSE; options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue; number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue; number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue; options->hitlist_size = MAX(number_of_descriptions, number_of_alignments); if (myargs[ARG_XDROP].intvalue != 0) options->gap_x_dropoff = myargs[ARG_XDROP].intvalue; if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0) options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue; if (myargs[ARG_XDROP_FINAL].intvalue != 0) options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue; if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0) options->filter_string = StringSave("D"); else options->filter_string = StringSave(myargs[ARG_FILTER].strvalue); show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue; options->penalty = myargs[ARG_MISMATCH].intvalue; options->reward = myargs[ARG_MATCH].intvalue; if (myargs[ARG_GAPOPEN].intvalue >= 0) options->gap_open = myargs[ARG_GAPOPEN].intvalue; if (myargs[ARG_GAPEXT].intvalue >= 0) options->gap_extend = myargs[ARG_GAPEXT].intvalue; if (options->gap_open == 0 && options->reward % 2 == 0 && options->gap_extend == options->reward / 2 - options->penalty) /* This is the default value */ options->gap_extend = 0; options->genetic_code = 1; options->db_genetic_code = 1; /* Default; it's not needed here anyway */ options->number_of_cpus = myargs[ARG_THREADS].intvalue; if (myargs[ARG_WORDSIZE].intvalue != 0) options->wordsize = myargs[ARG_WORDSIZE].intvalue; if (myargs[ARG_MINSCORE].intvalue == 0) options->cutoff_s2 = options->wordsize*options->reward; else options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue; options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue; options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue; options->perform_culling = FALSE; /* Kludge */ options->block_width = myargs[ARG_MAXPOS].intvalue; options->strand_option = myargs[ARG_STRAND].intvalue; options->window_size = myargs[ARG_WINDOW].intvalue; #ifdef DO_NOT_SUPPRESS_BLAST_OP options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue; if (myargs[ARG_TEMPL_LEN].intvalue != 0) options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue; options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue; #endif lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue; /* Allow dynamic programming gapped extension only with affine gap scores */ if (options->gap_open != 0 || options->gap_extend != 0) options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue; print_options = 0; align_options = 0; align_options += TXALIGN_COMPRESS; align_options += TXALIGN_END_NUM; if (show_gi) { align_options += TXALIGN_SHOW_GI; print_options += TXALIGN_SHOW_GI; } if (align_view) { align_options += TXALIGN_MASTER; if (align_view == 1 || align_view == 3) align_options += TXALIGN_MISMATCH; if (align_view == 3 || align_view == 4 || align_view == 6) align_options += TXALIGN_FLAT_INS; if (align_view == 5 || align_view == 6) align_options += TXALIGN_BLUNT_END; } else { align_options += TXALIGN_MATRIX_VAL; align_options += TXALIGN_SHOW_QS; } if (html) { align_options += TXALIGN_HTML; print_options += TXALIGN_HTML; } if (myargs[ARG_GILIST].strvalue) options->gifile = StringSave(myargs[ARG_GILIST].strvalue); if (out_type == MBLAST_ENDPOINTS) options->no_traceback = 1; else if (out_type == MBLAST_DELAYED_TRACEBACK) options->no_traceback = 2; else options->no_traceback = 0; options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue; options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue; options->hsp_num_max = myargs[ARG_MAXHSP].intvalue; if (!believe_query) options->megablast_full_deflines = TRUE; /*if (options->megablast_full_deflines) believe_query = FALSE;*/ query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr)); sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr)); StrCpy(prefix, ""); global_fp = outfp; options->output = outfp; if (traditional_formatting) { if (align_view < 7) { if (html) { fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n"); fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" " "VLINK=\"#660099\" ALINK=\"#660099\">\n"); fprintf(outfp, "<PRE>\n"); } init_buff_ex(90); BlastPrintVersionInfo("mgblast", html, outfp); fprintf(outfp, "\n"); MegaBlastPrintReference(html, 90, outfp); fprintf(outfp, "\n"); if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html)) return 1; free_buff(); #ifdef OS_UNIX fprintf(global_fp, "%s", "Searching"); #endif } } aip = NULL; if (myargs[ARG_ASNOUT].strvalue != NULL) { if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue); return 1; } } else if (align_view == 10 || align_view == 11) { const char* mode = (align_view == 10) ? "w" : "wb"; if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile); return 1; } } if (align_view == 7) { xml_aip = AsnIoOpen(blast_outputfile, "wx"); } if (myargs[ARG_QUERYLOC].strvalue) { Int4 start, end; Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end); options->required_start = start - 1; options->required_end = end -1; } done = FALSE; while (!done) { num_bsps = 0; total_length = 0; done = TRUE; SeqMgrHoldIndexing(TRUE); mask_slp = last_mask = NULL; while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL, believe_query, prefix, &ctr, &mask_slp)) != NULL) { if (!lcase_masking) /* Lower case ignored */ mask_slp = SeqLocFree(mask_slp); if (mask_slp) { if (!last_mask) options->query_lcase_mask = last_mask = mask_slp; else { last_mask->next = mask_slp; last_mask = last_mask->next; } mask_slp = NULL; } query_bsp = NULL; SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc); //debug: /* char query_buffer[255]; SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH); fprintf(stderr, "===> query_buf=%s\n", query_buffer); */ if (query_bsp == NULL) { ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n"); return 2; } source = BioSourceNew(); source->org = OrgRefNew(); source->org->orgname = OrgNameNew(); source->org->orgname->gcode = options->genetic_code; ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source); query_bsp_array[num_bsps++] = query_bsp; total_length += query_bsp->length; if (total_length > myargs[ARG_MAXQUERY].intvalue || num_bsps >= MAX_NUM_QUERIES) { done = FALSE; break; } } if (num_bsps == 0) break; SeqMgrHoldIndexing(FALSE); other_returns = NULL; error_returns = NULL; if (out_type==MBLAST_ENDPOINTS) seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintEndpoints); else if (out_type==MBLAST_SEGMENTS) seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintSegments); else if (out_type==MBLAST_ALIGN_INFO) { /* -- Geo mod: do not print header PrintTabularOutputHeader(blast_database, (num_bsps==1) ? query_bsp_array[0] : NULL, NULL, "megablast", 0, believe_query, global_fp);*/ seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, dummy_callback, NULL, NULL, 0, MegaBlastPrintAlignInfo); } else if (out_type==MBLAST_ALIGNMENTS) { seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, NULL, NULL, 0, NULL); } #ifdef OS_UNIX fflush(global_fp); #endif if (error_returns) { BlastErrorPrint(error_returns); for (vnp = error_returns; vnp; vnp = vnp->next) { BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue); } ValNodeFree(error_returns); } if (traditional_formatting) { dbinfo = NULL; ka_params = NULL; ka_params_gap = NULL; params_buffer = NULL; mask_loc = NULL; matrix = NULL; for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXDBINFO: dbinfo = vnp->data.ptrvalue; break; case TXKABLK_NOGAP: ka_params = vnp->data.ptrvalue; break; case TXKABLK_GAP: ka_params_gap = vnp->data.ptrvalue; break; case TXPARAMETERS: params_buffer = vnp->data.ptrvalue; break; case TXMATRIX: matrix = vnp->data.ptrvalue; break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue); break; default: break; } } #ifdef OS_UNIX if(align_view < 7) { fprintf(global_fp, "%s\n", " done"); } #endif if (myargs[ARG_MASKEDQUERY].strvalue) { if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL) ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n", myargs[ARG_MASKEDQUERY].strvalue); } hits_found = FALSE; mask_loc_start = next_mask_loc = mask_loc; mask_loc = NULL; if (align_view == 7) { mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, options, query_bsp_array[0], 0); } if (seqalign_array) { //results returned back for processing ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE); for (index=0; index<num_bsps; index++) { seqalign = seqalign_array[index]; if (next_mask_loc && SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), query_bsp_array[index]->id) == SIC_YES) { mask_loc = (SeqLocPtr) MemDup(next_mask_loc, sizeof(SeqLoc)); next_mask_loc = next_mask_loc->next; mask_loc->next = NULL; } if (mqfp) { /* convert mask locations from all sources into a single seqloc */ mask_slp = NULL; if (mask_loc) mask_slp = blastMergeFilterLocs(mask_slp, (SeqLocPtr)mask_loc->data.ptrvalue, FALSE, 0, 0); PrintMaskedSequence(query_bsp_array[index], mask_slp, mqfp, 50, lcase_masking); SeqLocSetFree(mask_slp); } if (seqalign==NULL) { mask_loc = MemFree(mask_loc); continue; } hits_found = TRUE; if (align_view < 7) { init_buff_ex(70); AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, believe_query, html); free_buff(); } if (align_view == 8 || align_view == 9) { if (align_view == 9) PrintTabularOutputHeader(blast_database, query_bsp_array[index], NULL, blast_program, 0, believe_query, global_fp); /* debug: char qbuf[512]; strcpy(qbuf, BioseqGetTitle(query_bsp_array[index])); fprintf(stderr, "---> Here: query title=%s\n", qbuf); */ BlastPrintTabulatedResults(seqalign, query_bsp_array[index], NULL, number_of_alignments, blast_program, !options->gapped_calculation, believe_query, 0, 0, global_fp, (align_view == 9)); ObjMgrFreeCache(0); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } //Geo mod: else if (align_view>=12) { MGBlastPrintTab(seqalign, query_bsp_array[index], number_of_alignments, !options->gapped_calculation, global_fp); ObjMgrFreeCache(0); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } else if(align_view == 7) { IterationPtr iterp; iterp = BXMLBuildOneQueryIteration(seqalign, NULL, FALSE, !options->gapped_calculation, index, NULL, query_bsp_array[index], mask_loc); IterationAsnWrite(iterp, mbxp->aip, mbxp->atp); AsnIoFlush(mbxp->aip); IterationFree(iterp); SeqAlignSetFree(seqalign); mask_loc = MemFree(mask_loc); continue; } seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; if (aip) { SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL); AsnIoReset(aip); } if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */ prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL); ObjMgrSetHold(); init_buff_ex(85); PrintDefLinesFromSeqAlign(prune->sap, 80, outfp, print_options, FIRST_PASS, NULL); free_buff(); prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune); seqannot->data = prune->sap; if (align_view != 0) ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, NULL); else ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc); seqannot->data = seqalign; prune = BlastPruneSapStructDestruct(prune); ObjMgrClearHold(); ObjMgrFreeCache(0); } seqannot = SeqAnnotFree(seqannot); mask_loc = MemFree(mask_loc); } /* End loop on seqaligns for different queries */ ReadDBBioseqFetchDisable(); } if (mbxp != NULL) { MBXmlClose(mbxp, other_returns, !options->gapped_calculation); } if (mqfp) FileClose(mqfp); if (!hits_found && align_view < 7) fprintf(outfp, "\n\n ***** No hits found ******\n\n"); matrix = BLAST_MatrixDestruct(matrix); if(html) fprintf(outfp, "<PRE>\n"); init_buff_ex(85); dbinfo_head = dbinfo; if(align_view < 7) { while (dbinfo) { PrintDbReport(dbinfo, 70, outfp); dbinfo = dbinfo->next; } } dbinfo_head = TxDfDbInfoDestruct(dbinfo_head); if (ka_params) { if(align_view < 7) PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE); MemFree(ka_params); } if (ka_params_gap) { if(align_view < 7) PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE); MemFree(ka_params_gap); } if(align_view < 7) PrintTildeSepLines(params_buffer, 70, outfp); MemFree(params_buffer); free_buff(); mask_loc = mask_loc_start; while (mask_loc) { SeqLocSetFree(mask_loc->data.ptrvalue); mask_loc = mask_loc->next; } ValNodeFree(mask_loc_start); } else { //not traditional formatting /* Just destruct all other_returns parts */ for (vnp=other_returns; vnp; vnp = vnp->next) { switch (vnp->choice) { case TXDBINFO: TxDfDbInfoDestruct(vnp->data.ptrvalue); break; case TXKABLK_NOGAP: case TXKABLK_GAP: case TXPARAMETERS: MemFree(vnp->data.ptrvalue); break; case TXMATRIX: BLAST_MatrixDestruct(vnp->data.ptrvalue); break; case SEQLOC_MASKING_NOTSET: case SEQLOC_MASKING_PLUS1: case SEQLOC_MASKING_PLUS2: case SEQLOC_MASKING_PLUS3: case SEQLOC_MASKING_MINUS1: case SEQLOC_MASKING_MINUS2: case SEQLOC_MASKING_MINUS3: mask_loc = vnp->data.ptrvalue; SeqLocSetFree(mask_loc); default: break; } } } other_returns = ValNodeFree(other_returns); MemFree(seqalign_array); options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask); /* Freeing SeqEntries can be very expensive, do this only if this is not the last iteration of search */ if (!done) { for (index=0; index<num_bsps; index++) { sepp[index] = SeqEntryFree(sepp[index]); query_bsp_array[index] = NULL; } } total_processed += num_bsps; } /* End of loop on complete searches */ aip = AsnIoClose(aip); /*if (align_view == 7) xml_aip = AsnIoClose(xml_aip);*/ if (align_view < 7 && html) fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n"); if (align_view < 7 && myargs[ARG_LOGINFO].intvalue) fprintf(outfp, "Mega BLAST run finished, processed %d queries\n", total_processed); MemFree(query_bsp_array); MemFree(sepp); MemFree(qgaps_buf); MemFree(dbgaps_buf); options = BLASTOptionDelete(options); FileClose(infp); FileClose(outfp); return 0; }
/****************************************************************** * * aa_to_codon(sfp, aa_start, aa_stop) * generate a list of CodonVecotr to show the codons of an * amino acid sequence * sfp: the Seq-feat for cds * aa_start: the start position of protein sequence * aa_stop the stop position of protein sequence * ******************************************************************/ NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop) { BioseqPtr bsp; Int4 frame_offset, start_offset; SeqLocPtr slp = NULL; SeqLocPtr cdloc; CdRegionPtr crp; Uint1 frame; Boolean is_end; /**is the end for process reached?**/ Int4 p_start=0, p_stop=0; /**protein start & stop in defined corresponding CdRegion Seq-loc**/ Int4 line_len; Int4 cur_pos; /**current protein position in process**/ Int4 cd_len; /**length of the cDNA for the coding region**/ Int2 i, j; Int2 k, n; CharPtr PNTR buf; Boolean is_new; /**Is cur_pos at the begin of new Seq-loc?**/ CharPtr temp; SeqPortPtr spp; Uint1 residue; Boolean end_partial; Int4 d_start, seq_pos; Int2 pos; ValNodePtr head= NULL; CodonVectorPtr cvp; Boolean prt_stop_codon; Uint2 exon; if(sfp->data.choice !=3) return NULL; crp = sfp->data.value.ptrvalue; if(!crp) return NULL; frame = crp->frame; cdloc = sfp->location; if(cdloc == NULL ) return NULL; if(frame>0) frame_offset = frame-1; else frame_offset = 0; start_offset = frame_offset; prt_stop_codon = (aa_stop == SeqLocStop(sfp->product)); line_len = (aa_stop - aa_start + 1) + 1; /* +1 for the possible partial start codon*/ if(prt_stop_codon)/*can be either as a stop codon or partial stop*/ ++line_len; buf = MemNew((size_t)3 * sizeof(CharPtr)); for(i =0; i<3; ++i) buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char)); cur_pos= aa_start; cd_len = 0; is_end = FALSE; p_start = 0; slp = NULL; exon = 0; while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL)) { ++exon; cd_len += SeqLocLen(slp); end_partial = ((cd_len - start_offset)%3 != 0); p_stop = (cd_len - start_offset)/3 -1; if(end_partial) ++p_stop; if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial)) { p_stop = aa_stop; /**check if the end is reached**/ is_end = TRUE; } if(p_stop >= cur_pos) /*get the exon*/ { bsp = BioseqLockById(SeqLocId(slp)); if(bsp) { is_new = (p_start == cur_pos); /*start a new exon?*/ cvp = MemNew(sizeof(CodonVector)); cvp->sip = SeqIdDup(find_sip(bsp->id)); cvp->strand = SeqLocStrand(slp); cvp->exonCount = exon; if(is_new) { if(frame_offset == 0) cvp->frame = 0; else cvp->frame = 3- (Uint1)frame_offset; } else cvp->frame = 0; if(cur_pos==0 && frame_offset > 0) /*partial start codon*/ cvp->aa_index = 0; else cvp->aa_index = 1; if(is_new) /**special case of the first partial**/ d_start = SeqLocStart(slp); else { if(frame_offset && p_start >0) ++p_start; d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset; } /**p_start is the start position of aa in the current Seq-loc cur_pos is the current aa that is in process. The offset will help to located the position on the DNA Seq-loc for translation d_start is the position of the starting DNA in the coordinates of DNA segment, used for mark the sequence **/ seq_pos = d_start - SeqLocStart(slp); /**the pos in spp**/ if(SeqLocStrand(slp)== Seq_strand_minus) d_start = SeqLocStop(slp) - seq_pos; cvp->dna_pos = d_start; n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index; /*position in buffer*/ for(i =0; i<3; ++i) make_empty(buf[i], (Int2)line_len); spp = SeqPortNewByLoc(slp, Seq_code_iupacna); SeqPortSeek(spp, seq_pos, SEEK_SET); /**store the partial codons**/ if(is_new && frame_offset > 0) { k = (Int2)frame_offset; while(k > 0) { residue = SeqPortGetResidue(spp); temp = buf[3-k]; /**the position**/ pos = n; temp[pos] = TO_LOWER(residue); --k; } ++n; if(cur_pos!=0) ++cur_pos; } /**load the codons**/ k =0; while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop) { j= (Uint1)k%3; temp = buf[j]; temp[n] = TO_LOWER(residue); if(j ==2) { /**the last base**/ ++n; if(!prt_stop_codon|| !is_end) /*for the last codon*/ /**prt_end controls to print the whole loc**/ ++cur_pos; } ++k; } /**end of while**/ SeqPortFree(spp); for(i =0; i<3; ++i) cvp->buf[i] = StringSave(buf[i]); ValNodeAddPointer(&head, 0, (Pointer)cvp); BioseqUnlock(bsp); }/*end of if(bsp)*/ }/**end of if for matched intervals**/ if(end_partial) p_start = p_stop; else p_start = p_stop +1; frame_offset = (cd_len - start_offset)%3; if(frame_offset >0) frame_offset = 3-frame_offset; }/**end of while(slp && !is_end) **/ for(i=0; i<3; ++i) MemFree(buf[i]); MemFree(buf); return head; }
static SeqLocPtr slpDust (SeqPortPtr spp, SeqLocPtr slp, SeqIdPtr id, ValNodePtr PNTR vnp, DREGION PNTR reg, Int4 nreg, Int4 loopDustMax) { SeqIntPtr sintp; Int4 i; Boolean flagNoPack; /* point to dusted locations */ if (nreg) { /* loopDustMax == 1 forces PACKED_INT IN - PACKED_INT OUT as needed */ flagNoPack = FALSE; if (nreg == 1 && loopDustMax == 1) flagNoPack = TRUE; if (!slp) { if ((slp = ValNodeNew (NULL)) == NULL) { ErrPostEx (SEV_ERROR, 6, 1, "val node new failed"); ErrShow (); return slp; } } if (flagNoPack) { slp->choice = SEQLOC_INT; } else { slp->choice = SEQLOC_PACKED_INT; } for (i = 0; i < nreg; i++) { sintp = SeqIntNew (); if (!sintp) { ErrPostEx (SEV_FATAL, 6, 2, "memory allocation error"); ErrShow (); return slp; } sintp->id = SeqIdDup (id); sintp->from = reg->from; sintp->to = reg->to; if (!flagNoPack) ValNodeAddPointer (vnp, SEQLOC_INT, sintp); reg = reg->next; } if (flagNoPack) { slp->data.ptrvalue = (Pointer) sintp; } else { slp->data.ptrvalue = *vnp; } } return slp; }