Exemple #1
0
static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi)
{
  SeqDescrPtr       sdp;
  SeqMgrDescContext dcontext;
  Char              id_txt[255], id_txt2[255];
  SeqIdPtr          sip, sip_gi = NULL, sip_gb = NULL;
  ValNodePtr        line_list = NULL, line_values;

  if (bsp == NULL) {
    return NULL;
  }

  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    if (sip->choice == SEQID_GENBANK
        || (sip->choice == SEQID_EMBL && sip_gb == NULL)
        || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
        || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
        || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
      sip_gb = sip;
    } else if (sip->choice == SEQID_GI) {
      sip_gi = sip;
    }
  }

  if (sip_gb == NULL && sip_gi == NULL) {
    SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    id_txt2[0] = 0;
  } else {
    if (sip_gb == NULL) {
      id_txt[0] = 0;
    } else {
      SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    }
    if (sip_gi == NULL) {
      id_txt2[0] = 0;
    } else {
      SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
    }
  }

  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
       sdp != NULL;
       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
    line_values = NULL;
    ValNodeAddPointer (&line_values, 0, StringSave (id_txt));
    if (want_gi) {
      ValNodeAddPointer (&line_values, 0, StringSave (id_txt2));
    }
    ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list));
    ValNodeAddPointer (&line_list, 0, line_values);
  }
  return line_list;
}
Exemple #2
0
static Boolean
BL2SEQ_MakeSeqLoc(const BioseqPtr bsp1, const BioseqPtr bsp2, SeqLocPtr *slp1, SeqLocPtr *slp2, Uint1 strand_option)
{
       const char* k_delimiters = " ,;";
       CharPtr location;
       Int4 from, to;

       *slp1 = NULL;
       *slp2 = NULL;

       location = myargs[ARG_LOC1].strvalue;
       if (location) {
           from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
           to = atoi(location) - 1;

            from = MAX(from, 0);
            if (to < 0) 
                 to = bsp1->length - 1;
            to = MIN(to, bsp1->length - 1);
            if (from >= bsp1->length) {
                 ErrPostEx(SEV_FATAL, 1, 0, 
                           "Location outside of the first sequence range\n");
                 return FALSE;
            }
            *slp1 = SeqLocIntNew(from, to, strand_option,
                                 SeqIdFindBestAccession(bsp1->id));
        } else if (strand_option != Seq_strand_both) {
            *slp1 = SeqLocIntNew(0, bsp1->length-1, strand_option,
                                 SeqIdFindBestAccession(bsp1->id));
        } else
              ValNodeAddPointer(slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id)));

        location = myargs[ARG_LOC2].strvalue;
        if (location) {
            from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
            to = atoi(location) - 1;

            from = MAX(from, 0);
            if (to < 0) 
                to = bsp2->length - 1;
            to = MIN(to, bsp2->length - 1);
            if (from >= bsp2->length) {
               ErrPostEx(SEV_FATAL, 1, 0, 
                           "Location outside of the second sequence range\n");
               return FALSE;
            }
            *slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id));
         } else
            ValNodeAddPointer(slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id)));

       return TRUE;
}
Exemple #3
0
static void CommentFieldsToDialog (DialoG d, Pointer data)
{
  TagListPtr tlp;
  ValNodePtr fields, vnp;
  CharPtr    str;

  tlp = (TagListPtr) GetObjectExtra (d);
  if (tlp == NULL) {
    return;
  }

  fields = (ValNodePtr) data;

  tlp->vnp = ValNodeFreeData (tlp->vnp);
  SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
  for (vnp = fields; vnp != NULL; vnp = vnp->next) {
    str = TagStringFromFieldRule (vnp->data.ptrvalue);
    if (str != NULL) {
      ValNodeAddPointer (&(tlp->vnp), 0, str);
    }
  }

  SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
  tlp->max = MAX ((Int2) 0, (Int2) (ValNodeLen (tlp->vnp) - tlp->rows));
  CorrectBarMax (tlp->bar, tlp->max);
  CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
  if (tlp->max > 0) {
    SafeShow (tlp->bar);
  } else {
    SafeHide (tlp->bar);
  }
}
Exemple #4
0
static ValNodePtr CollectBioSourceValues (BioSourcePtr biop, ValNodePtr field_list)
{
  Char       taxid_buf[30];
  ValNodePtr field_values = NULL;
  CharPtr    txt;

  sprintf (taxid_buf, "%d", GetTaxIdFromOrgRef(biop->org));
  ValNodeAddPointer (&field_values, 0, StringSave (taxid_buf));
 
  while (field_list != NULL) {
    txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL);
    ValNodeAddPointer (&field_values, 0, txt);
    field_list = field_list->next;
  }
  return field_values;
}
Exemple #5
0
static Boolean GetGraphsProc (GatherObjectPtr gop)

{
  GphGetPtr    ggp;
  GphItemPtr   gip;
  SeqGraphPtr  sgp;

  if (gop == NULL || gop->itemtype != OBJ_SEQGRAPH) return TRUE;
  ggp = (GphGetPtr) gop->userdata;
  sgp = (SeqGraphPtr) gop->dataptr;
  if (ggp == NULL || sgp == NULL) return TRUE;
  /* only phrap or gap4 currently allowed */
  if (StringICmp (sgp->title, "Phrap Quality") == 0 ||
      StringICmp (sgp->title, "Gap4") == 0) {
    /* data type must be bytes */
    if (sgp->flags[2] == 3) {
      if (SeqIdForSameBioseq (SeqLocId (sgp->loc), SeqLocId (ggp->slp))) {
        gip = (GphItemPtr) MemNew (sizeof (GphItem));
        if (gip == NULL) return TRUE;
        gip->sgp = sgp;
        gip->left = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_LEFT_END);
        gip->right = GetOffsetInBioseq (sgp->loc, ggp->bsp, SEQLOC_RIGHT_END);
        ValNodeAddPointer (&(ggp->vnp), 0, (Pointer) gip);
      }
    }
  }
  return TRUE;
}
Exemple #6
0
static void RecordThesis (ScanDataPtr sdp, CharPtr str)

{
  StringSetPtr  ssp;
  ValNodePtr    vnp;

  if (sdp == NULL || StringHasNoText (str)) return;

  for (vnp = sdp->lcList; vnp != NULL; vnp = vnp->next) {
    ssp = (StringSetPtr) vnp->data.ptrvalue;
    if (ssp == NULL) continue;
    if (StringCmp (ssp->text, str) == 0) {
      (ssp->count)++;
      return;
    }
  }

  ssp = MemNew (sizeof (StringSet));
  if (ssp == NULL) return;
  StringCpy (ssp->firstID, sdp->buf);
  StringNCpy_0 (ssp->text, str, sizeof (ssp->text));
  ssp->count = 1;

  ValNodeAddPointer (&(sdp->lcList), 0, (Pointer) ssp);
}
Exemple #7
0
static void GetProtListCallback (BioseqPtr bsp, Pointer userdata)
{
  ValNodePtr PNTR   pList;
  SeqFeatPtr        sfp;
  SeqMgrFeatContext fcontext;
  ProtRefPtr        prp;
  AlphaProtPtr      app;
  
  if (bsp == NULL || userdata == NULL || ! ISA_aa (bsp->mol)) return;
  pList = (ValNodePtr PNTR) userdata;
  app = (AlphaProtPtr) MemNew (sizeof (AlphaProtData));
  if (app == NULL) return;
  app->bsp = bsp;
  app->prot_name = NULL;
  
  sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
  if (sfp != NULL && sfp->data.value.ptrvalue != NULL) 
  {
    prp = (ProtRefPtr) sfp->data.value.ptrvalue;
    if (prp->name != NULL)
    {
      app->prot_name = StringSave (prp->name->data.ptrvalue);
    }
    else
    {
      app->prot_name = StringSave (fcontext.label);
    }
  }
  ValNodeAddPointer (pList, 0, app);
}
Exemple #8
0
PVNMB CpyPVNMBList(PVNMB pvnmb, PDNMG pdnmgNewHead, Pointer parentPtr) {
	PDNMG pdnmg, pdnmgNew, pdnmgPmadFrom = NULL, pdnmgPmadTo = NULL;
	PMGD pmgd, pmgdNew, pmgdTmp = NULL;
	PVNMB pvnmbNew, pvnmbNewHead = NULL, pvnmbLast = NULL;
	PMBD pmbd, pmbdNew;

	while(pvnmb) {
		pmbd = (PMBD)(pvnmb->data.ptrvalue);	
		pvnmbNew = ValNodeNew(NULL);
		pvnmbNew->choice = pvnmb->choice;

		pmbdNew = NewMBD();

		pmbdNew->pfbParent = (PFB)parentPtr;
		pmbdNew->pvnmbLink = pvnmbNew;
		pmbdNew->bWhat = pmbd->bWhat;
		
		pmgdTmp = (PMGD)(pmbd->pmadFrom->pfbParent);
		pdnmgPmadFrom = GetPDNMGFromIndex(pdnmgNewHead, ((PDNMG)(pmgdTmp->pdnmgLink))->choice);
		pmbdNew->pmadFrom = GetPMADFromName(((PMGD)(pdnmgPmadFrom->data.ptrvalue))->pvnmaAHead, pmbd->pmadFrom->pcAName);

		pmgdTmp = (PMGD)(pmbd->pmadTo->pfbParent);
		pdnmgPmadTo = GetPDNMGFromIndex(pdnmgNewHead, ((PDNMG)(pmgdTmp->pdnmgLink))->choice);
		pmbdNew->pmadTo = GetPMADFromName(((PMGD)(pdnmgPmadTo->data.ptrvalue))->pvnmaAHead, pmbd->pmadTo->pcAName);

		/* Add PMBD to the appropriate PMADs */
		ValNodeAddPointer(&pmbdNew->pmadFrom->pvnBonds, 0, (VoidPtr)pmbdNew);			
		ValNodeAddPointer(&pmbdNew->pmadTo->pvnBonds, 0, (VoidPtr)pmbdNew);			
		
		pvnmbNew->data.ptrvalue = (VoidPtr)pmbdNew;

		if(pvnmbNewHead == NULL) {
                        pvnmbNewHead = pvnmbNew;
                        pvnmbLast = pvnmbNew;
                }
                else {
                        pvnmbLast->next = pvnmbNew;
                        pvnmbLast = pvnmbNew;
                }

		pvnmb = pvnmb->next;
	}

	return pvnmbNewHead;
}
Exemple #9
0
static Boolean AddToSaveList (GatherContextPtr gcp)

{
  ValNodePtr PNTR list;

  list = (ValNodePtr PNTR) gcp->userdata;
  if (list == NULL) return TRUE;
  ValNodeAddPointer (list, gcp->thistype, gcp->thisitem);
  return TRUE;
}
Exemple #10
0
static ValNodePtr FieldsFromFieldListString (CharPtr str)
{
  CharPtr cpy, val, comma;
  Int4    qual;
  ValNodePtr field_list = NULL, qc;

  if (StringHasNoText (str)) {
    return NULL;
  }
  cpy = StringSave (str);
  val = cpy;
  comma = StringChr(val, ',');
  while (comma != NULL) {
    *comma = 0;
    qual = GetSourceQualTypeByName(val);
    if (qual < 0) {
      Message (MSG_ERROR, "%s is not a recognized source field name", val);
    } else {
      qc = ValNodeNew (NULL);
      qc->choice = SourceQualChoice_textqual;
      qc->data.intvalue = qual;
      ValNodeAddPointer (&field_list, FieldType_source_qual, qc);
    }
    *comma = ',';
    val = comma + 1;
    comma = StringChr (val, ',');
  }

  qual = GetSourceQualTypeByName(val);
  if (qual < 0) {
    Message (MSG_ERROR, "%s is not a recognized source field name", val);
  } else {
    qc = ValNodeNew (NULL);
    qc->choice = SourceQualChoice_textqual;
    qc->data.intvalue = qual;
    ValNodeAddPointer (&field_list, FieldType_source_qual, qc);
  }

  cpy = MemFree (cpy);
  return field_list;
}
Exemple #11
0
static void CollectBioseqsForConversion (BioseqPtr bsp, Pointer userdata)
{
  ValNodePtr PNTR list;
  
  if (bsp == NULL || bsp->repr != Seq_repr_raw || ISA_aa (bsp->mol)) return;
  if (userdata == NULL)
  {
    return;
  }
  list = (ValNodePtr PNTR) userdata;
  
  ValNodeAddPointer (list, 0, bsp);
}
Exemple #12
0
NLM_EXTERN ValNodePtr LIBCALL AsnGenericValNodeSetAsnRead (
  AsnIoPtr aip,
  AsnModulePtr amp,
  AsnTypePtr orig,
  BoolPtr isError,
  AsnReadFunc readfunc,
  AsnOptFreeFunc freefunc
)

{
  AsnTypePtr  atp = orig, start_atp;
  DataVal     av;
  Pointer     val;
  ValNodePtr  vnp, head = NULL, last = NULL;

  if (isError != NULL) {
    *isError = FALSE;
  }
  if (aip == NULL || readfunc == NULL || freefunc == NULL) return NULL;

  if (AsnReadVal (aip, atp, &av) <= 0) goto erret; /* read START STRUCT */

  start_atp = orig;
  atp = start_atp;

  while ((atp = AsnReadId (aip, amp, atp)) != start_atp) {
    val = (Pointer) readfunc (aip, atp);
    if (val == NULL) goto erret;
    vnp = ValNodeAddPointer (&last, 0, val);
    if (head == NULL) {
      head = vnp;
    }
    last = vnp;
  }

  if (AsnReadVal (aip, atp, &av) <= 0) goto erret; /* read END STRUCT */

ret:
  return (Pointer) head;

erret:
  head = AsnGenericValNodeSetFree (head, freefunc);

  if (isError != NULL) {
    *isError = TRUE;
  }

  goto ret;
}
Exemple #13
0
/* This constructs an ASN.1 SeqGraph that contains the quality scores from the consensus sequence */
static SeqGraphPtr SeqGraphFromContig (TContigPtr contig, BioseqPtr bsp)
{
  SeqGraphPtr       sgp;
  ByteStorePtr      bs;
  Uint1             bytes[128]; 
  Int2              max = INT2_MIN;
  Int2              min = INT2_MAX;
  Int4              q_pos, b_pos;
  SeqIntPtr         sintp;

  if (contig == NULL || contig->num_qual_scores == 0 || contig->qual_scores == NULL
      || bsp == NULL) {
    return NULL;
  }

  sgp = SeqGraphNew ();
  bs = BSNew (1000);
  q_pos = 0;
  while (q_pos < contig->num_qual_scores) {
    b_pos = 0;
    while (b_pos < sizeof (bytes) && q_pos < contig->num_qual_scores) {
      max = MAX (max, (Int2) contig->qual_scores[q_pos]);
      min = MIN (min, (Int2) contig->qual_scores[q_pos]);
      bytes[b_pos++] = (Uint1) contig->qual_scores[q_pos++];
    }
    BSWrite (bs, (Pointer) bytes, (Int4) b_pos);
  }
  sgp->numval = BSLen (bs);
  BSPutByte (bs, EOF);
  sgp->title = StringSave ("Phrap Quality");
  sgp->flags [0] = 0;
  sgp->compr = 1;
  sgp->flags [1] = 0;
  sgp->flags [2] = 3;
  sgp->axis.intvalue = 0;
  sgp->min.intvalue = min;
  sgp->max.intvalue = max;
  sgp->a = 1.0;
  sgp->b = 0;
  sgp->values = (Pointer) bs;

  sintp = SeqIntNew ();
  sintp->from = 0;
  sintp->to = bsp->length - 1;
  sintp->id = SeqIdDup (bsp->id);
  ValNodeAddPointer (&(sgp->loc), SEQLOC_INT, (Pointer) sintp);

  return sgp;
}
Exemple #14
0
/** Creates a list of SeqLoc structures with data about PHI BLAST pattern 
 * occurrences, to be used as features on Query Seq-locs.
 * @param pattern_info Pattern information structure. [in]
 * @param query_seqloc Query SeqLoc, needed to retrieve Seq-id. [in]
 * @param seed_seqloc_ptr List of SeqLoc's with pattern data. [out]
 */
static Int2
s_PHIBlastCreateSeedSeqLoc(const SPHIQueryInfo* pattern_info, 
                           SeqLoc* query_seqloc, 
                           SeqLoc** seed_seqloc_ptr)
{
    Int4 index;
    for (index = 0; index < pattern_info->num_patterns; ++index) {
        const SPHIPatternInfo* this_occurrence = 
            &pattern_info->occurrences[index];
        SeqInt* si = SeqIntNew();
        si->id = SeqIdDup(SeqLocId(query_seqloc));
        si->from = this_occurrence->offset;
        si->to = this_occurrence->offset + this_occurrence->length - 1;
        ValNodeAddPointer(seed_seqloc_ptr, SEQLOC_INT, si);
    }
    return 0;
}
Exemple #15
0
static Pointer CommentFieldsFromDialog (DialoG d)
{
  TagListPtr tlp;
  ValNodePtr fields = NULL, vnp;
  FieldRulePtr rule;

  tlp = (TagListPtr) GetObjectExtra (d);
  if (tlp == NULL) {
    return NULL;
  }

  for (vnp = tlp->vnp; vnp != NULL; vnp = vnp->next) {
    rule = FieldRuleFromTagString (vnp->data.ptrvalue);
    if (rule != NULL) {
      ValNodeAddPointer (&fields, 0, rule);
    }
  }
  return fields;
}
Exemple #16
0
/** Splits the PHI BLAST results corresponding to different pattern occurrences
 * in query, converts them to Seq-aligns and puts in a list of ValNodes.
 * @param results All results from different pattern occurrences 
 *                mixed together. On return points to NULL. [in]
 * @param pattern_info Query pattern occurrences information [in]
 * @param program Program type (phiblastp or phiblastn) [in]
 * @param query_seqloc List of query locations [in]
 * @param rdfp blast db object [in]
 * @param phivnps List of ValNodes containing Seq-aligns. [out]
 * @return Status, 0 on success, -1 on failure.
 */
static Int2
s_PHIResultsToSeqAlign(const BlastHSPResults* results, 
                       const SPHIQueryInfo* pattern_info,
                       EBlastProgramType program, SeqLoc* query_seqloc, 
                       ReadDBFILE* rdfp, ValNode* *phivnps)
{
    Int2 status = 0;
    /* Split results into an array of BlastHSPResults structures corresponding
       to different pattern occurrences. */
    BlastHSPResults* *phi_results = 
        PHIBlast_HSPResultsSplit(results, pattern_info);

    if (phi_results) {
        int pattern_index; /* Index over pattern occurrences. */

        for (pattern_index = 0; pattern_index < pattern_info->num_patterns;
             ++pattern_index) {
            SBlastSeqalignArray* seqalign_arr = NULL;
            SeqAlign* seqalign = NULL;
            BlastHSPResults* one_phi_results = phi_results[pattern_index];

            if (one_phi_results) {
                /* PHI BLAST is always gapped, and never out-of-frame, hence
                 * TRUE and FALSE values for the respective booleans in the next
                 * call.
                 */
                status =
                    BLAST_ResultsToSeqAlign(program, &one_phi_results, 
                                            query_seqloc, rdfp, NULL, TRUE, 
                                            FALSE, &seqalign_arr);
                if (seqalign_arr)
                {
                    seqalign = seqalign_arr->array[0];
                    seqalign_arr->array[0] = NULL;
                    SBlastSeqalignArrayFree(seqalign_arr);
                }
                ValNodeAddPointer(phivnps, pattern_index, seqalign);
            }
        }
        sfree(phi_results);
    }
    return status;
}
Exemple #17
0
Int2 Main_old (void)
 
{
	
	AsnIoPtr aip;
	BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, 
                  subject_bsp = NULL;
        BioseqPtr bsp1, bsp2;
	BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
	BLAST_OptionsBlkPtr options=NULL;
	Boolean seq1_is_na, seq2_is_na;
	CharPtr params_buffer=NULL;
        DbtagPtr        dbtagptr;
	Uint1 align_type;
	Uint4 align_options;
	SeqAlignPtr  seqalign;
        SeqAnnotPtr seqannot;
	SeqEntryPtr sep = NULL, sep1 = NULL;
	CharPtr program_name, blast_outputfile;
	FILE *outfp;
	ValNodePtr  mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
	BLAST_MatrixPtr matrix;
        Int4Ptr PNTR txmatrix;
        int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
        Boolean entrez_lookup = FALSE;
        Boolean html, seqannot_output, believe_query;
        Uint1 tabular_output;
        Boolean gapped_calculation;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        html = (Boolean) myargs[ARG_HTML].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);

        blast_outputfile = myargs [ARG_OUT].strvalue;

	program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
	if (StringCmp(program_name, "blastn") && 
	    StringCmp(program_name, "blastp") && 
	    StringCmp(program_name, "blastx") && 
	    StringCmp(program_name, "tblastn") && 
	    StringCmp(program_name, "tblastx")) {
		ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
		return (1);
	}
	   
	align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);

	if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
	{
		ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
		return (1);
	}

        gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
        believe_query = (seqannot_output || entrez_lookup); 

        options = BLASTOptionNewEx(program_name, gapped_calculation,
                                   (Boolean) myargs[ARG_USEMEGABLAST].intvalue);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &(options->query_lcase_mask), 
                                believe_query) == FALSE)
        {
            ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
            return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else {
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; 


    	if (myargs[ARG_SEARCHSP].floatvalue)
           options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;


	options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
	options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;

        if (StringICmp("blastn", program_name) == 0)
        {
                options->penalty = myargs[ARG_MISMATCH].intvalue;
                options->reward = myargs[ARG_MATCH].intvalue;
        }

	options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;

	options->discontinuous = FALSE;

        if (myargs[ARG_XDROP].intvalue != 0)
	{
               options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
	}
        if (myargs[ARG_WORDSIZE].intvalue != 0)
               options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;

	if (options->is_megablast_search) {
	   options->cutoff_s2 = options->wordsize*options->reward;
        }
	options->matrix = MemFree(options->matrix);
        BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 

        if (myargs[ARG_GAPOPEN].intvalue != -1)
              options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue != -1)
               options->gap_extend = myargs[ARG_GAPEXT].intvalue;

	options->strand_option = myargs[ARG_STRAND].intvalue;

        /* Input longest intron length is in nucleotide scale; in the lower 
           level code it will be used in protein scale */
        if (myargs[ARG_INTRON].intvalue > 0) 
           options->longest_intron = myargs[ARG_INTRON].intvalue;


        if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
           seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, 
              options, &other_returns, &error_returns, handle_results);
        } else {
            SeqLocPtr slp1=NULL, slp2=NULL;
            if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
                return 1;
           seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
           SeqLocFree(slp1);
           SeqLocFree(slp2);
        }

        if (error_returns) {
           BlastErrorPrint(error_returns);
           for (vnp = error_returns; vnp; vnp = vnp->next) {
              BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
           }
           ValNodeFree(error_returns);
        }
       
        ka_params = NULL;
        ka_params_gap = NULL;
        params_buffer = NULL;
        mask_loc = NULL;
        matrix = NULL;
        txmatrix = NULL;
        for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              if (matrix && !tabular_output)
                 txmatrix = BlastMatrixToTxMatrix(matrix);
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
        }	
        if (!tabular_output || seqannot_output) {
           align_options = 0;
           align_options += TXALIGN_MATRIX_VAL;
           align_options += TXALIGN_SHOW_QS;
           align_options += TXALIGN_COMPRESS;
           align_options += TXALIGN_END_NUM;
           if (StringICmp("blastx", program_name) == 0) {
              align_options += TXALIGN_BLASTX_SPECIAL;
           }
           
           if (html)
              align_options += TXALIGN_HTML;

           seqannot = SeqAnnotNew();
           seqannot->type = 2;
           AddAlignInfoToSeqAnnot(seqannot, align_type);
           seqannot->data = seqalign;
           aip = NULL;
           if (seqannot_output)
              aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
           
           if (aip && seqannot) {
              SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
              AsnIoReset(aip);
              aip = AsnIoClose(aip);
           }
        }
        if (!tabular_output) {    
           AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
           ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
           
           seqannot = SeqAnnotFree(seqannot);
           if (txmatrix)
              txmatrix = TxMatrixDestruct(txmatrix);
           init_buff_ex(85);
        
           if (ka_params) {
              PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
           }
        
           if (ka_params_gap) {
              PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
           }
        
           PrintTildeSepLines(params_buffer, 70, outfp);
           free_buff();
        } else {
           PrintTabularOutputHeader(NULL, query_bsp, NULL, 
              program_name, 0, believe_query, outfp);

           BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 
              1, program_name, !gapped_calculation,
              believe_query, 0, 0, outfp, FALSE);
           SeqAlignSetFree(seqalign);
        }

        matrix = BLAST_MatrixDestruct(matrix);
        MemFree(ka_params);
        MemFree(ka_params_gap);
        MemFree(params_buffer);
    
        mask_loc_start = mask_loc;
        while (mask_loc) {
           SeqLocSetFree(mask_loc->data.ptrvalue);
           mask_loc = mask_loc->next;
        }
        ValNodeFree(mask_loc_start);
        
        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);

        other_returns = ValNodeFree(other_returns);
    options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
	options = BLASTOptionDelete(options);
	MemFree(program_name);
	FileClose(outfp);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }
	return 0;
}
Exemple #18
0
Int2 Main_new(void)

{
        BioseqPtr query_bsp=NULL, subject_bsp=NULL;
        BioseqPtr bsp1=NULL, bsp2=NULL;
        BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
        BlastFormattingInfo* format_info = NULL;
        BLAST_SummaryOptions* options=NULL;
        Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew();
        Boolean believe_query= FALSE;
        Boolean seq1_is_na, seq2_is_na;  /* seq1/2 is DNA if TRUE. */
        Boolean seqannot_output;   /* SeqAlign will be output. */
        Boolean entrez_lookup;     /* QUery/subject fetched from Entrez. */
        Boolean mask_at_hash=FALSE;  /* masking only on lookup table if TRUE. */
        DbtagPtr        dbtagptr;
        EBlastProgramType program_number;
        Int2 status; /* return value */
        EAlignView align_view = eAlignViewPairwise; /* Used for formatting */
        SeqAlignPtr seqalign=NULL;
        SeqEntryPtr sep=NULL, sep1=NULL;
        SeqLocPtr slp1, slp2;   /* Used for actual search. */
        SeqLocPtr filter_loc=NULL;  /* Location of regions filtered (returned by engine) */
        SeqLocPtr lcase_mask=NULL;    /* For lower-case masking info from query FASTA. */
        SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
        Uint1 strand_option = 0; /* FIXME */
        SBlastOptions* search_options = NULL; /* Needed for formatting. */
        SBlastSeqalignArray* seqalign_arr = NULL;
        GeneticCodeSingletonInit();
        
        strand_option = (Uint1) myargs[ARG_STRAND].intvalue;

        entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
        seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
        believe_query = (seqannot_output || entrez_lookup); 
        /* Non-zero value for -m option means tabular output. */
        if (myargs[ARG_FORMAT].intvalue != 0)
           align_view = eAlignViewTabularWithComments; 

        BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);

        seq1_is_na = (program_number == eBlastTypeBlastn ||
                  program_number == eBlastTypeBlastx ||
                  program_number == eBlastTypeRpsTblastn ||
                  program_number == eBlastTypeTblastx);

        seq2_is_na = (program_number == eBlastTypeBlastn ||
               program_number == eBlastTypeTblastn ||
               program_number == eBlastTypeTblastx);

        if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
                                &sep, &sep1, &lcase_mask, believe_query) 
            == FALSE)
        {
                ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
                return (1);
        }

        if (!entrez_lookup) {
            if (!believe_query)
                fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
            
            fake_subject_bsp = BioseqNew();
            fake_subject_bsp->descr = subject_bsp->descr;
            fake_subject_bsp->repr = subject_bsp->repr;
            fake_subject_bsp->mol = subject_bsp->mol;
            fake_subject_bsp->length = subject_bsp->length;
            fake_subject_bsp->seq_data = subject_bsp->seq_data;
            fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
            dbtagptr = DbtagNew();
            dbtagptr->db = StringSave("BL_ORD_ID");
            dbtagptr->tag = ObjectIdNew();

            if (BioseqGetTitle(subject_bsp) != NULL)
              dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
            else
              dbtagptr->tag->str = StringSave("No definition line found");

            ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
            bsp1 = (believe_query ? query_bsp : fake_bsp);
            bsp2 = fake_subject_bsp;
        } else { /* Query and subject Bioseqs are already "fake". */
            bsp1 = query_bsp;
            bsp2 = subject_bsp;
        }

        if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE)
                return 1;

        if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE)
                return 1;

        /* Find repeat mask, if necessary */
        if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue,
                                &repeat_mask, &extra_returns->error)) != 0)
        {
            if (extra_returns && extra_returns->error)
            {
                   ErrSev max_sev = SBlastMessageErrPost(extra_returns->error);
                   if (max_sev >= SEV_ERROR)
                         return status;
            }
        }

        /* Combine repeat mask with lower case mask */
        if (repeat_mask)
            lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
        
        status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr, 
                                     &filter_loc, &mask_at_hash, 
                                     &extra_returns);

        /* Free the lower case mask in SeqLoc form. */
        lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);

        /* Post warning or error messages, no matter what the search status 
           was. */
        SBlastMessageErrPost(extra_returns->error);

        if (status != 0)
        {
                ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed");
                return status;
        }

        if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) {
            AsnIoPtr asnout =
               AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
            GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout);
            asnout = AsnIoClose(asnout);
        }

        /* Pass NULL for the database name, since there is no database. */
        BlastFormattingInfoNewBasic(align_view, options, slp1, 
                                    myargs[ARG_OUT].strvalue, &search_options,
                                    &format_info);
        
        /* Always show gis in the output, hence pass TRUE for respective 
           argument. */
        BlastFormattingInfoSetUpOptions(format_info, 0, 1,
                                        (Boolean) myargs[ARG_HTML].intvalue,
                                        (Boolean) myargs[ARG_USEMEGABLAST].intvalue,
                                        TRUE, believe_query);

        /* If masking was at hash only, free the masking locations,
         * to prevent them from being used for formatting.
         */
        if (SBlastOptionsGetMaskAtHash(search_options))
            filter_loc = Blast_ValNodeMaskListFree(filter_loc);

        /* Format the results */
        status = 
            BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info, 
                                extra_returns);
        
        status = Blast_PrintOutputFooter(format_info, extra_returns);

        /* Free masking locations if they haven't been freed already. */
        filter_loc = Blast_ValNodeMaskListFree(filter_loc);

        format_info = BlastFormattingInfoFree(format_info);
        extra_returns = Blast_SummaryReturnFree(extra_returns);
        search_options = SBlastOptionsFree(search_options);

        if (entrez_lookup) {
           BioseqFree(query_bsp);
           BioseqFree(subject_bsp);
        } else {
           SeqEntryFree(sep);
           SeqEntryFree(sep1);
        }

        options = BLAST_SummaryOptionsFree(options);
        seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
        slp1 = SeqLocSetFree(slp1);
        slp2 = SeqLocSetFree(slp2);

        fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
        GeneticCodeSingletonFini();

        return 0;

}
Exemple #19
0
/*******************************************************************************

  Function : DDV_GetRulerForEditor()
  
  Purpose : create the ruler for the editor. This new descriptor is created
      using the descriptor of the viewer.

  Return value : an allocated data block ready for use by the editor. Null if
  failure

*******************************************************************************/
NLM_EXTERN ValNodePtr DDV_GetRulerForEditor(ValNodePtr descr_head,Int4 from_disp,
		Int4 to_disp)
{
DDVRulerDescrPtr drdp,prev_drdp,new_drdp;
ValNodePtr  vnp2,vnp3,vnpDesc;
Int4     cumulDecr,drdp_size,beginCopy,endCopy,drdp_start_ali;
Boolean  bPop;

	cumulDecr=0;
	vnpDesc=NULL;
	prev_drdp=NULL;
	
	for(vnp2=descr_head;vnp2!=NULL;vnp2=vnp2->next){
		drdp=(DDVRulerDescrPtr)vnp2->data.ptrvalue;
		drdp_size=drdp->disp_stop-drdp->disp_start+1;

		/*not yet in the region of interest ?*/
		if (drdp->disp_stop<from_disp) {
			cumulDecr+=drdp_size;
			continue;
		}
		
		beginCopy=_max_(from_disp,drdp->disp_start);
		endCopy=_min_(drdp->disp_stop,to_disp);

		if (drdp->bUnAligned==FALSE)
			drdp_start_ali=drdp->align_start+(beginCopy-drdp->disp_start);
		else
			drdp_start_ali=(Int4)-1;
		/*in order to merge nodes of same type, i keep track of
		the previous populated drdp node. Same style==yes, then
		just extend to the right the previous node*/
		if (prev_drdp){
			if (prev_drdp->bUnAligned==drdp->bUnAligned){
				prev_drdp->disp_stop=endCopy;
				bPop=TRUE;
			}
			else{
				bPop=FALSE;
			}
		}
		else{
			bPop=FALSE;
		}
		if (!bPop){
			new_drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr));
			if (!new_drdp) goto erreur;
			
			new_drdp->disp_start=beginCopy;
			new_drdp->disp_stop=endCopy;
			new_drdp->align_start=drdp_start_ali;
			new_drdp->bUnAligned=drdp->bUnAligned;
			if (!vnpDesc){
				vnp3=ValNodeAddPointer(&vnpDesc,0,(Pointer)new_drdp);
			}
			else{
				vnp3=ValNodeAddPointer(&vnp3,0,(Pointer)new_drdp);
			}
			prev_drdp=new_drdp;
		}
		if (drdp->disp_stop>=to_disp) break;
		cumulDecr+=drdp_size;
	}

	return(vnpDesc);
erreur:
	if (vnpDesc)
		ValNodeFreeData(vnpDesc);
	return(NULL);
}
Exemple #20
0
/*******************************************************************************

  Function : DDV_ComputeRuler()
  
  Purpose : compute the Ruler descriptor (usefull for discontinuous align)
  
  Parameters : 
  
  Return value : a list of RUler descriptor

*******************************************************************************/
extern ValNodePtr DDV_ComputeRuler(SeqAlignPtr sap,DDV_Disp_OptPtr ddop)
{

DDVRulerDescrPtr drdp;
ValNodePtr       vnp=NULL,vnp_head=NULL;
Int4             disp_start=0,length,TotAliLength=0,r=0;
Boolean          bUnAligned;

  /* make ruler for LEFT_TAIL */
  if ((ddop->DispDiscStyle == MSA_TXT_STYLE_2) && (ddop->ShowLeftTail)) {
    length = AlnMgrGetMaxTailLength(sap, LEFT_TAIL);
    if (length) {
	    drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr));
	    drdp->disp_start=disp_start;
	    drdp->disp_stop=disp_start+length-1;
	    drdp->bUnAligned=TRUE;
      drdp->align_start = -1;
      disp_start += length;
	    vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp);
	    vnp=vnp_head;
    }
  }

  if (AlnMgrIsSAPDiscAli(sap)){
    /* make ruler for multiple blocks */
		while(AlnMgrGetNextLengthBit(sap,&length,&r)){
			if (length<0){
				bUnAligned=TRUE;
				switch(ddop->DispDiscStyle){/*user's display choice*/
					case MSA_TXT_STYLE_1:
						length=ddop->SpacerSize;
						break;
					case MSA_TXT_STYLE_2:
						length=ABS(length);
						break;
				}
			}
			else{
				bUnAligned=FALSE;
			}

			drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr));
			drdp->disp_start=disp_start;
			drdp->disp_stop=disp_start+length-1;
			drdp->bUnAligned=bUnAligned;
			
			if (bUnAligned==FALSE)
				drdp->align_start=TotAliLength;/*SeqAlign Coord*/
			else
				drdp->align_start=-1;/*drdp->disp_start;*//*Disp Coord*/

			if (!vnp_head){
				vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp);
				vnp=vnp_head;
			}
			else{
				vnp=ValNodeAddPointer(&vnp,0,(Pointer)drdp);
			}
			disp_start+=length;
			if (bUnAligned==FALSE) 
				TotAliLength+=length;
		}
	}

  else if (sap->type == SAT_MASTERSLAVE){
    /* make ruler for a single block */
		drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr));
		drdp->disp_start=disp_start;
		length=AlnMgrGetAlnLength(sap,FALSE);
		drdp->disp_stop=disp_start+length-1;
    drdp->bUnAligned = FALSE;
		drdp->align_start=0;
		if (!vnp_head){
			vnp_head=ValNodeAddPointer(NULL,0,(Pointer)drdp);
			vnp=vnp_head;
		}
		else{
			vnp=ValNodeAddPointer(&vnp,0,(Pointer)drdp);
		}
	}

  /* make ruler for RIGHT_TAIL */
  if ((ddop->DispDiscStyle == MSA_TXT_STYLE_2) && (ddop->ShowRightTail)) {
    length = AlnMgrGetMaxTailLength(sap, RIGHT_TAIL);
    if (length) {
	    drdp=(DDVRulerDescrPtr)MemNew(sizeof(DDVRulerDescr));
	    drdp->disp_start=disp_start;
	    drdp->disp_stop=disp_start+length-1;
	    drdp->bUnAligned=TRUE;
      drdp->align_start = -1;
      disp_start += length;
	    vnp = ValNodeAddPointer(&vnp,0,(Pointer)drdp);
    }
  }

	return(vnp_head);
}
Exemple #21
0
static Int2 Main_old (void)
 
{
   AsnIoPtr aip, xml_aip = NULL;
   BioseqPtr query_bsp, PNTR query_bsp_array;
   BioSourcePtr source;
   BLAST_MatrixPtr matrix;
   BLAST_OptionsBlkPtr options;
   BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
   BlastPruneSapStructPtr prune;
   Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
   Boolean html=FALSE;
   CharPtr params_buffer=NULL;
   Int4 number_of_descriptions, number_of_alignments;
   SeqAlignPtr  seqalign, PNTR seqalign_array;
   SeqAnnotPtr seqannot;
   SeqEntryPtr PNTR sepp;
   TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
   Uint1 align_type, align_view, out_type;
   Uint4 align_options, print_options;
   ValNodePtr mask_loc, mask_loc_start, next_mask_loc;
   ValNodePtr vnp, other_returns, error_returns;
   
   CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
   FILE *infp, *outfp, *mqfp=NULL;
   Int4 index, num_bsps, total_length, total_processed = 0;
   Int2 ctr = 1;
   Char prefix[2];
   SeqLocPtr last_mask, mask_slp;
   Boolean done, hits_found;
   Boolean lcase_masking;
   MBXmlPtr mbxp = NULL;
   Boolean traditional_formatting;

    blast_program = "blastn";
    blast_database = myargs [ARG_DB].strvalue;
    blast_inputfile = myargs [ARG_QUERY].strvalue;
    blast_outputfile = myargs [ARG_OUT].strvalue;
    if (myargs[ARG_HTML].intvalue)
        html = TRUE;

    if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
       ErrPostEx(SEV_FATAL, 1, 0, "mgblast: Unable to open input file %s\n", blast_inputfile);
       return (1);
    }

    align_view = (Int1) myargs[ARG_FORMAT].intvalue;
    /* Geo mod: 
      -- replaced myargs[ARG_OUTTYPE].intvalue with out_type from now on
    */
    out_type=(Int1) myargs[ARG_OUTTYPE].intvalue;
    if (out_type==MGBLAST_FLTHITS || out_type==MGBLAST_HITGAPS) {
      align_view = 12 + (out_type-MGBLAST_FLTHITS ); 
      out_type=MBLAST_ALIGNMENTS;
      //Attention: 12 MUST be the -m mgblast tab option for MGBLAST_FLTHITS format
      // and MGBLAST_HITGAPS = MGBLAST_FLTHITS+1
       if (align_view>12) { // this is MGBLAST_HITGAPS output
            gap_Info=TRUE;
            if (dbgaps_buf==NULL)
                  dbgaps_buf=(CharPtr) Malloc(dbgaps_bufsize + 1);
            if (qgaps_buf==NULL) 
                qgaps_buf=(CharPtr) Malloc(qgaps_bufsize + 1);
            }
      }

    outfp = NULL;

    traditional_formatting = 
        (out_type == MBLAST_ALIGNMENTS ||
         out_type == MBLAST_DELAYED_TRACEBACK);

    if ((!traditional_formatting ||
            (align_view != 7 && align_view != 10 && align_view != 11)) && 
            blast_outputfile != NULL) {
       if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
          ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
          return (1);
       }
    }

    //align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
    /*
    if (!traditional_formatting)
        believe_query = TRUE;
    else
        believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
    */
    //Geo mod: 
    believe_query=FALSE;
    //If ASN.1 output is requested and believe_query is not set to TRUE,
    //   exit with an error.    
    if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
                           align_view == 10 || align_view == 11)) {
        ErrPostEx(SEV_FATAL, 1, 0, 
                  "-J option must be TRUE to produce ASN.1 output; before "
                  "changing -J to TRUE please also ensure that all query "
                  "sequence identifiers are unique");
        return -1;
    }
        
    options = BLASTOptionNewEx(blast_program, TRUE, TRUE);
    if (options == NULL)
        return 3;

    options->do_sum_stats = FALSE;
    options->is_neighboring = FALSE;
        options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
    number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;    
    number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;    
    options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);

    if (myargs[ARG_XDROP].intvalue != 0)
           options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
    if (myargs[ARG_XDROP_UNGAPPED].intvalue != 0)
           options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].intvalue;
        if (myargs[ARG_XDROP_FINAL].intvalue != 0)
           options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;

    if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0)
       options->filter_string = StringSave("D");
    else
       options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
    
    show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
    options->penalty = myargs[ARG_MISMATCH].intvalue;
    options->reward = myargs[ARG_MATCH].intvalue;
        if (myargs[ARG_GAPOPEN].intvalue >= 0)
        options->gap_open = myargs[ARG_GAPOPEN].intvalue;
        if (myargs[ARG_GAPEXT].intvalue >= 0)
        options->gap_extend = myargs[ARG_GAPEXT].intvalue;

    if (options->gap_open == 0 && options->reward % 2 == 0 && 
        options->gap_extend == options->reward / 2 - options->penalty)
       /* This is the default value */
    options->gap_extend = 0;

    options->genetic_code = 1;
    options->db_genetic_code = 1; /* Default; it's not needed here anyway */
    options->number_of_cpus = myargs[ARG_THREADS].intvalue;
    if (myargs[ARG_WORDSIZE].intvalue != 0)
           options->wordsize = myargs[ARG_WORDSIZE].intvalue;
        if (myargs[ARG_MINSCORE].intvalue == 0)
           options->cutoff_s2 = options->wordsize*options->reward;
        else 
           options->cutoff_s2 = myargs[ARG_MINSCORE].intvalue;

        options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
        options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;

    options->perform_culling = FALSE;
    /* Kludge */
    options->block_width  = myargs[ARG_MAXPOS].intvalue;

    options->strand_option = myargs[ARG_STRAND].intvalue;
        options->window_size = myargs[ARG_WINDOW].intvalue;
#ifdef DO_NOT_SUPPRESS_BLAST_OP        
        options->mb_template_length = myargs[ARG_TEMPL_LEN].intvalue;
        if (myargs[ARG_TEMPL_LEN].intvalue != 0)
            options->mb_one_base_step = (Boolean) myargs[ARG_EVERYBASE].intvalue;
        options->mb_disc_type = myargs[ARG_TEMPL_TYPE].intvalue;
#endif
        lcase_masking = (Boolean) myargs[ARG_LCASE].intvalue;
        /* Allow dynamic programming gapped extension only with affine 
           gap scores */
        if (options->gap_open != 0 || options->gap_extend != 0)
           options->mb_use_dyn_prog = (Boolean) myargs[ARG_DYNAMIC].intvalue;

        print_options = 0;
        align_options = 0;
        align_options += TXALIGN_COMPRESS;
        align_options += TXALIGN_END_NUM;
        if (show_gi) {
       align_options += TXALIGN_SHOW_GI;
       print_options += TXALIGN_SHOW_GI;
        }
            
        if (align_view) {
       align_options += TXALIGN_MASTER;
       if (align_view == 1 || align_view == 3)
          align_options += TXALIGN_MISMATCH;
       if (align_view == 3 || align_view == 4 || align_view == 6)
          align_options += TXALIGN_FLAT_INS;
       if (align_view == 5 || align_view == 6)
          align_options += TXALIGN_BLUNT_END;
        } else {
       align_options += TXALIGN_MATRIX_VAL;
       align_options += TXALIGN_SHOW_QS;
    }

    if (html) {
       align_options += TXALIGN_HTML;
       print_options += TXALIGN_HTML;
    }

    if (myargs[ARG_GILIST].strvalue)
       options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
   
    if (out_type == MBLAST_ENDPOINTS)
      options->no_traceback = 1;
   else if (out_type == MBLAST_DELAYED_TRACEBACK)
       options->no_traceback = 2;
    else
       options->no_traceback = 0;

    options->megablast_full_deflines = (Boolean) myargs[ARG_FULLID].intvalue;
    options->perc_identity = (FloatLo) myargs[ARG_PERC_IDENT].floatvalue;
    options->hsp_num_max = myargs[ARG_MAXHSP].intvalue;

    if (!believe_query)
           options->megablast_full_deflines = TRUE;
        /*if (options->megablast_full_deflines)
          believe_query = FALSE;*/

    query_bsp_array = (BioseqPtr PNTR) MemNew((MAX_NUM_QUERIES+1)*sizeof(BioseqPtr));
    sepp = (SeqEntryPtr PNTR) MemNew(MAX_NUM_QUERIES*sizeof(SeqEntryPtr));

    StrCpy(prefix, "");

    global_fp = outfp;
        options->output = outfp;

    if (traditional_formatting) {
       if (align_view < 7) {
              if (html) {
                 fprintf(outfp, "<HTML>\n<TITLE>MEGABLAST Search Results</TITLE>\n");
                 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
                         "VLINK=\"#660099\" ALINK=\"#660099\">\n");
                 fprintf(outfp, "<PRE>\n");
              }
              init_buff_ex(90);
              BlastPrintVersionInfo("mgblast", html, outfp);
              fprintf(outfp, "\n");
              MegaBlastPrintReference(html, 90, outfp);
              fprintf(outfp, "\n");
              
              if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
                 return 1;
              
              free_buff();
    
#ifdef OS_UNIX
              fprintf(global_fp, "%s", "Searching");
#endif
           }
    }
    
        aip = NULL;
        if (myargs[ARG_ASNOUT].strvalue != NULL) {
           if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
              ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
              return 1;
           }
        }
        else if (align_view == 10 || align_view == 11)
        {
            const char* mode = (align_view == 10) ? "w" : "wb";
            if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
                    ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
                    return 1;
            }
        }


        if (align_view == 7) {
           xml_aip = AsnIoOpen(blast_outputfile, "wx");
        }

        if (myargs[ARG_QUERYLOC].strvalue) {       
            Int4 start, end;
            Megablast_GetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
            options->required_start = start - 1;
            options->required_end = end -1;
        }

    done = FALSE;
    while (!done) {
       num_bsps = 0;
       total_length = 0;
       done = TRUE;
       SeqMgrHoldIndexing(TRUE);
       mask_slp = last_mask = NULL;
   
       while ((sepp[num_bsps]=FastaToSeqEntryForDb(infp, query_is_na, NULL,
                               believe_query, prefix, &ctr, 
                               &mask_slp)) != NULL) {
              if (!lcase_masking) /* Lower case ignored */
                 mask_slp = SeqLocFree(mask_slp);
         if (mask_slp) {
           if (!last_mask)
              options->query_lcase_mask = last_mask = mask_slp;
           else {
              last_mask->next = mask_slp;
              last_mask = last_mask->next;
              }
           mask_slp = NULL;
           }
          query_bsp = NULL;
         SeqEntryExplore(sepp[num_bsps], &query_bsp, FindNuc);
         //debug:
         /*
         char query_buffer[255];
         SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
         fprintf(stderr, "===> query_buf=%s\n", query_buffer);
         */
         if (query_bsp == NULL) {
           ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
           return 2;
          }
          
          source = BioSourceNew();
          source->org = OrgRefNew();
          source->org->orgname = OrgNameNew();
          source->org->orgname->gcode = options->genetic_code;
          ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
          
          query_bsp_array[num_bsps++] = query_bsp;
          
          total_length += query_bsp->length;
          if (total_length > myargs[ARG_MAXQUERY].intvalue || 
          num_bsps >= MAX_NUM_QUERIES) {
         done = FALSE;
         break;
          }
       }

           if (num_bsps == 0)
               break;

       SeqMgrHoldIndexing(FALSE);
       other_returns = NULL;
       error_returns = NULL;
       
       if (out_type==MBLAST_ENDPOINTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0, 
                             MegaBlastPrintEndpoints);
       else if (out_type==MBLAST_SEGMENTS) 
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                             MegaBlastPrintSegments);
       else if (out_type==MBLAST_ALIGN_INFO) {
              /* -- Geo mod: do not print header
              PrintTabularOutputHeader(blast_database, 
                                       (num_bsps==1) ? query_bsp_array[0] : NULL,
                                       NULL, "megablast", 0, believe_query,
                                       global_fp);*/
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                             blast_database, options,
                             &other_returns, &error_returns,
                             dummy_callback, NULL, NULL, 0,
                                MegaBlastPrintAlignInfo);
       } else if (out_type==MBLAST_ALIGNMENTS) {
          seqalign_array = BioseqMegaBlastEngine(query_bsp_array, blast_program,
                  blast_database, options, &other_returns, 
                                  &error_returns, align_view < 7 ? tick_callback : NULL,
                                  NULL, NULL, 0, NULL);
          }
       
#ifdef OS_UNIX
       fflush(global_fp);
#endif

       if (error_returns) {
             BlastErrorPrint(error_returns);
              for (vnp = error_returns; vnp; vnp = vnp->next) {
                 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
              }
              ValNodeFree(error_returns);
           }
              
              
       if (traditional_formatting) {
          dbinfo = NULL;
          ka_params = NULL;
          ka_params_gap = NULL;
          params_buffer = NULL;
          mask_loc = NULL;
          matrix = NULL;
          for (vnp=other_returns; vnp; vnp = vnp->next) {
           switch (vnp->choice) {
           case TXDBINFO:
              dbinfo = vnp->data.ptrvalue;
              break;
           case TXKABLK_NOGAP:
              ka_params = vnp->data.ptrvalue;
              break;
           case TXKABLK_GAP:
              ka_params_gap = vnp->data.ptrvalue;
              break;
           case TXPARAMETERS:
              params_buffer = vnp->data.ptrvalue;
              break;
           case TXMATRIX:
              matrix = vnp->data.ptrvalue;
              break;
           case SEQLOC_MASKING_NOTSET:
           case SEQLOC_MASKING_PLUS1:
           case SEQLOC_MASKING_PLUS2:
           case SEQLOC_MASKING_PLUS3:
           case SEQLOC_MASKING_MINUS1:
           case SEQLOC_MASKING_MINUS2:
           case SEQLOC_MASKING_MINUS3:
              ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
              break;
           default:
              break;
           }
          }    
          
#ifdef OS_UNIX
        if(align_view < 7) {
                 fprintf(global_fp, "%s\n", " done");
              }
#endif
          
        if (myargs[ARG_MASKEDQUERY].strvalue) {
                 if ((mqfp = FileOpen(myargs[ARG_MASKEDQUERY].strvalue, "w")) == NULL)
                    ErrPostEx(SEV_WARNING, 1, 0, "Unable to open file %s for masked query\n",
                              myargs[ARG_MASKEDQUERY].strvalue);
              }

        hits_found = FALSE;

        mask_loc_start = next_mask_loc = mask_loc;
        mask_loc = NULL;

        if (align_view == 7) {
           mbxp = PSIXmlInit(xml_aip, "megablast", blast_database, 
                             options, query_bsp_array[0], 0);
           }

        if (seqalign_array) { //results returned back for processing
             ReadDBBioseqFetchEnable ("megablast", blast_database, db_is_na, TRUE);
             for (index=0; index<num_bsps; index++) {
                    seqalign = seqalign_array[index];
                    if (next_mask_loc && 
                        SeqIdComp(SeqLocId((SeqLocPtr)next_mask_loc->data.ptrvalue), 
                                  query_bsp_array[index]->id) == SIC_YES) {
                       mask_loc = (SeqLocPtr) 
                       MemDup(next_mask_loc, sizeof(SeqLoc));
                       next_mask_loc = next_mask_loc->next;
                       mask_loc->next = NULL;
                    }
                    if (mqfp) {
                       /* convert mask locations from all sources into
                          a single seqloc */
                       mask_slp = NULL;
                       if (mask_loc) 
                          mask_slp = blastMergeFilterLocs(mask_slp, 
                              (SeqLocPtr)mask_loc->data.ptrvalue,
                              FALSE, 0, 0);
                       PrintMaskedSequence(query_bsp_array[index], mask_slp,
                                           mqfp, 50, lcase_masking);
                       SeqLocSetFree(mask_slp);
                       }
                    if (seqalign==NULL) {
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    hits_found = TRUE;
                    if (align_view < 7) {
                       init_buff_ex(70);
                       AcknowledgeBlastQuery(query_bsp_array[index], 70, outfp, 
                                             believe_query, html);
                       free_buff();
                       }
                    if (align_view == 8 || align_view == 9) {
                       if (align_view == 9)
                          PrintTabularOutputHeader(blast_database, 
                             query_bsp_array[index], NULL, blast_program, 0,
                             believe_query, global_fp);
                       /* debug:
                       char qbuf[512];
                       strcpy(qbuf, BioseqGetTitle(query_bsp_array[index]));
                       fprintf(stderr, "---> Here: query title=%s\n", qbuf);
                       */
                       BlastPrintTabulatedResults(seqalign, 
                           query_bsp_array[index], NULL, number_of_alignments,
                            blast_program, !options->gapped_calculation, 
                            believe_query, 0, 0, 
                            global_fp, (align_view == 9));
                            

                       ObjMgrFreeCache(0);

                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    } 
                       //Geo mod:   
                   else if (align_view>=12)  {
                        MGBlastPrintTab(seqalign, 
                            query_bsp_array[index], number_of_alignments,
                            !options->gapped_calculation, 
                            global_fp);
                        ObjMgrFreeCache(0);

                        SeqAlignSetFree(seqalign);
                        mask_loc = MemFree(mask_loc);
                        continue;
                        }
                    else if(align_view == 7) {
                       IterationPtr iterp;

                       iterp = BXMLBuildOneQueryIteration(seqalign, 
                                  NULL, FALSE, 
                                  !options->gapped_calculation, index, 
                                  NULL, query_bsp_array[index], mask_loc);
                       IterationAsnWrite(iterp, mbxp->aip, mbxp->atp);
                       AsnIoFlush(mbxp->aip);
                       IterationFree(iterp);
                       SeqAlignSetFree(seqalign);
                       mask_loc = MemFree(mask_loc);
                       continue;
                    }
                    seqannot = SeqAnnotNew();
                    seqannot->type = 2;
                    AddAlignInfoToSeqAnnot(seqannot, align_type);
                    seqannot->data = seqalign;
                    if (aip) {
                       SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
                       AsnIoReset(aip);
                    }
                    if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_descriptions, NULL);
                       ObjMgrSetHold();
                       init_buff_ex(85);
                       PrintDefLinesFromSeqAlign(prune->sap, 80,
                                                 outfp, print_options, FIRST_PASS, NULL);
                       free_buff();
                       
                       prune = BlastPruneHitsFromSeqAlign(seqalign, number_of_alignments, prune);
                       seqannot->data = prune->sap;
                       if (align_view != 0)
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL,
                                                 NULL, align_options, NULL, 
                                                 mask_loc, NULL);
                       else
                          ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, NULL, mask_loc, FormatScoreFunc);
                       seqannot->data = seqalign;
                       prune = BlastPruneSapStructDestruct(prune);
                       ObjMgrClearHold();
                       ObjMgrFreeCache(0);
                    }
                    seqannot = SeqAnnotFree(seqannot);
                    mask_loc = MemFree(mask_loc);
                 } /* End loop on seqaligns for different queries */
                 ReadDBBioseqFetchDisable();
              } 

              if (mbxp != NULL) {
                 MBXmlClose(mbxp, other_returns, !options->gapped_calculation);
              }

              if (mqfp)
                 FileClose(mqfp);

              if (!hits_found && align_view < 7)
                 fprintf(outfp, "\n\n ***** No hits found ******\n\n");

              matrix = BLAST_MatrixDestruct(matrix);
          
              if(html) 
                 fprintf(outfp, "<PRE>\n");
              init_buff_ex(85);
              dbinfo_head = dbinfo;
              if(align_view < 7) {
                 while (dbinfo) {
                    PrintDbReport(dbinfo, 70, outfp);
                    dbinfo = dbinfo->next;
                 }
              }
              dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
              
              if (ka_params) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
                 MemFree(ka_params);
              }
              if (ka_params_gap) {
                 if(align_view < 7)
                    PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
                 MemFree(ka_params_gap);
              }
              if(align_view < 7)
                 PrintTildeSepLines(params_buffer, 70, outfp);
              MemFree(params_buffer);
              free_buff();
              mask_loc = mask_loc_start;
              while (mask_loc) {
                 SeqLocSetFree(mask_loc->data.ptrvalue);
                 mask_loc = mask_loc->next;
              }
              ValNodeFree(mask_loc_start);
       } else { //not traditional formatting
          /* Just destruct all other_returns parts */
          for (vnp=other_returns; vnp; vnp = vnp->next) {
         switch (vnp->choice) {
         case TXDBINFO:
            TxDfDbInfoDestruct(vnp->data.ptrvalue);
            break;
         case TXKABLK_NOGAP:
         case TXKABLK_GAP:
         case TXPARAMETERS:
            MemFree(vnp->data.ptrvalue);
            break;
         case TXMATRIX:
            BLAST_MatrixDestruct(vnp->data.ptrvalue);
            break;
         case SEQLOC_MASKING_NOTSET:
         case SEQLOC_MASKING_PLUS1:
         case SEQLOC_MASKING_PLUS2:
         case SEQLOC_MASKING_PLUS3:
         case SEQLOC_MASKING_MINUS1:
         case SEQLOC_MASKING_MINUS2:
         case SEQLOC_MASKING_MINUS3:
                    mask_loc = vnp->data.ptrvalue;
                    SeqLocSetFree(mask_loc);
         default:
            break;
         }
          }
       }
       other_returns = ValNodeFree(other_returns);
       MemFree(seqalign_array);
           options->query_lcase_mask = 
              SeqLocSetFree(options->query_lcase_mask);

       /* Freeing SeqEntries can be very expensive, do this only if 
          this is not the last iteration of search */
       if (!done) { 
          for (index=0; index<num_bsps; index++) {
         sepp[index] = SeqEntryFree(sepp[index]);
         query_bsp_array[index] = NULL;
          }       
           }
           total_processed += num_bsps;
    } /* End of loop on complete searches */
        
        aip = AsnIoClose(aip);

        /*if (align_view == 7)
          xml_aip = AsnIoClose(xml_aip);*/

        if (align_view < 7 && html) 
           fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
        if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
           fprintf(outfp, "Mega BLAST run finished, processed %d queries\n",
                   total_processed);
    MemFree(query_bsp_array);
    MemFree(sepp);
    MemFree(qgaps_buf);
    MemFree(dbgaps_buf);
    options = BLASTOptionDelete(options);
    FileClose(infp);
        FileClose(outfp);
    
    return 0;
}
Exemple #22
0
/******************************************************************
*
*	aa_to_codon(sfp, aa_start, aa_stop)
*	generate a list of CodonVecotr to show the codons of an 
*	amino acid sequence
*	sfp: the Seq-feat for cds
*	aa_start: the start position of protein sequence
*	aa_stop the stop position of protein sequence
*
******************************************************************/
NLM_EXTERN ValNodePtr aa_to_codon(SeqFeatPtr sfp, Int4 aa_start, Int4 aa_stop)
{
  BioseqPtr bsp;

  Int4 frame_offset, start_offset;
  SeqLocPtr slp = NULL;
  SeqLocPtr cdloc;
  CdRegionPtr crp;
  Uint1 frame;

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/

  Int4 line_len;
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Int2 i, j;
  Int2 k, n;
  CharPtr PNTR buf;

  Boolean is_new;		/**Is cur_pos at the begin of new Seq-loc?**/
  CharPtr temp;

  SeqPortPtr spp;
  Uint1 residue;

  Boolean end_partial;
  Int4 d_start, seq_pos;
  Int2 pos;

  ValNodePtr head= NULL;
  CodonVectorPtr cvp;
  Boolean prt_stop_codon;
  Uint2 exon;




   if(sfp->data.choice !=3)
	return NULL;

   crp = sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   frame = crp->frame;
   cdloc = sfp->location;
   if(cdloc == NULL )
	return NULL;

   if(frame>0)
	frame_offset = frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;

   prt_stop_codon = (aa_stop == SeqLocStop(sfp->product));
   line_len = (aa_stop - aa_start + 1) + 1;
					/* +1 for the possible partial start codon*/
   if(prt_stop_codon)/*can be either as a stop codon or partial stop*/
	++line_len;
   buf = MemNew((size_t)3 * sizeof(CharPtr));
   for(i =0; i<3; ++i)
	buf[i] = MemNew((size_t)(line_len + 1) * sizeof (Char));
		

   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   slp = NULL;
   exon = 0;
   while(!is_end && ((slp = SeqLocFindNext(cdloc, slp))!=NULL))
   {
	++exon;
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3 != 0);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial)
	   ++p_stop;
	if(p_stop > aa_stop || (p_stop == aa_stop && !end_partial))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
	   bsp = BioseqLockById(SeqLocId(slp));
	   if(bsp)
	   {
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		cvp = MemNew(sizeof(CodonVector));
		cvp->sip = SeqIdDup(find_sip(bsp->id));
		cvp->strand = SeqLocStrand(slp);
		cvp->exonCount = exon;
		if(is_new)
		{
			if(frame_offset == 0)
				cvp->frame = 0;
			else
				cvp->frame = 3- (Uint1)frame_offset;
		}
		else
			cvp->frame = 0;
		if(cur_pos==0 && frame_offset > 0)	/*partial start codon*/
			cvp->aa_index = 0;
		else
			cvp->aa_index = 1;
		if(is_new)	/**special case of the first partial**/
		   d_start = SeqLocStart(slp);
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   d_start = SeqLocStart(slp) + 3*(cur_pos - p_start) + frame_offset;
		}
	    /**p_start is the start position of aa in the current Seq-loc
	       cur_pos is the current aa that is in process. The offset will
	       help to located the position on the DNA Seq-loc for translation
	       d_start is the position of the starting DNA in the coordinates
	       of DNA segment, used for mark the sequence
	       **/

		seq_pos = d_start - SeqLocStart(slp);	/**the pos in spp**/
		if(SeqLocStrand(slp)== Seq_strand_minus)
		   d_start = SeqLocStop(slp) - seq_pos;
		cvp->dna_pos = d_start;

		n = (Int2)cur_pos - (Int2)aa_start + cvp->aa_index;	/*position in buffer*/
		for(i =0; i<3; ++i)
			make_empty(buf[i], (Int2)line_len);
		spp = SeqPortNewByLoc(slp, Seq_code_iupacna);
		SeqPortSeek(spp, seq_pos, SEEK_SET);
		/**store the partial codons**/
		if(is_new && frame_offset > 0)
		{
		   k = (Int2)frame_offset;
		   while(k > 0)
		   {
			residue = SeqPortGetResidue(spp);
			temp = buf[3-k];	/**the position**/
			pos = n;
			temp[pos] = TO_LOWER(residue);
			--k;
		   }
		   ++n;
		   if(cur_pos!=0)
			++cur_pos;
		}


	     	/**load  the codons**/
		k =0;
		while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF && cur_pos <= p_stop)
		{
		   j= (Uint1)k%3;
		   temp = buf[j];
		   temp[n] = TO_LOWER(residue);
		   if(j ==2)
		   {		/**the last base**/
			++n;
		 	if(!prt_stop_codon|| !is_end) /*for the last codon*/
			/**prt_end controls to print the whole loc**/
		   	   ++cur_pos;
		   }
		   ++k;
		}	/**end of while**/

		SeqPortFree(spp);

		for(i =0; i<3; ++i)
		   cvp->buf[i] = StringSave(buf[i]);
		ValNodeAddPointer(&head, 0, (Pointer)cvp);

		BioseqUnlock(bsp);
	   }/*end of if(bsp)*/
	}/**end of if for matched intervals**/

	if(end_partial)
	    p_start = p_stop;
	else
	    p_start = p_stop +1;

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/

   for(i=0; i<3; ++i)
	MemFree(buf[i]);
   MemFree(buf);

   return head;
}
Exemple #23
0
static SeqLocPtr slpDust (SeqPortPtr spp, SeqLocPtr slp, SeqIdPtr id,
			  ValNodePtr PNTR vnp, DREGION PNTR reg,
			  Int4 nreg, Int4 loopDustMax)
{
	SeqIntPtr	sintp;
        Int4            i;
        Boolean         flagNoPack;

/* point to dusted locations */
	if (nreg)
	{

/* loopDustMax == 1 forces PACKED_INT IN - PACKED_INT OUT as needed	*/
		flagNoPack = FALSE;
		if (nreg == 1 && loopDustMax == 1) flagNoPack = TRUE;

		if (!slp)
		{
			if ((slp = ValNodeNew (NULL)) == NULL)
			{
				ErrPostEx (SEV_ERROR, 6, 1,
					   "val node new failed");
				ErrShow ();
				return slp;
			}
		}

		if (flagNoPack)
		{
			slp->choice = SEQLOC_INT;
		}
		else
		{
			slp->choice = SEQLOC_PACKED_INT;
		}

		for (i = 0; i < nreg; i++)
		{
			sintp = SeqIntNew ();
			if (!sintp)
			{
				ErrPostEx (SEV_FATAL, 6, 2,
					   "memory allocation error");
				ErrShow ();
				return slp;
			}
			sintp->id = SeqIdDup (id);
			sintp->from = reg->from;
			sintp->to = reg->to;
			if (!flagNoPack) ValNodeAddPointer
					(vnp, SEQLOC_INT, sintp);
			reg = reg->next;
		}

		if (flagNoPack)
		{
			slp->data.ptrvalue = (Pointer) sintp;
		}
		else
		{
			slp->data.ptrvalue = *vnp;
		}
	}
	return slp;
}