示例#1
0
/*****************************************************************
*
*	map a position on the anchor_bsp (anchor_pos) to a
*	position on the other_bsp. It is the reverse operation of
*	MapLocToAnchor
*	return -1 for failure
*
******************************************************************/
Int4 MapAnchorToLoc(SeqAnnotPtr annot, Int4 anchor_pos, BioseqPtr anchor_bsp, BioseqPtr other_bsp)
{
    Int2 type;
    Int4Ptr x_a, x;
    Uint2 num;
    SeqIdPtr anchor_id, other_id;
    SeqAlignPtr align;
    Int4 other_pos;


    if(annot == NULL || anchor_bsp == NULL || other_bsp == NULL)
        return -1;
    if(anchor_pos < 0 || anchor_pos > anchor_bsp->length-1)
        return -1;

    if(anchor_bsp == other_bsp)
        return anchor_pos;

    anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
    if(anchor_id == NULL)
        anchor_id = anchor_bsp->id;

    other_id = SeqIdFindBest(other_bsp->id, SEQID_GI);
    if(other_id == NULL)
        other_id = other_bsp->id;

    other_pos = -1;
    while(annot)
    {
        if(annot->type == 2)
        {
            type = GetEquivAlignType(annot);
            if(type == 1)	/*this is consistent*/
            {
                align = annot->data;
                if(get_anchor_coordinates(align, anchor_id, other_id, &x_a, &x, &num))
                {
                    if(num >= 10)
                        other_pos = find_this_position_by_anchor (x, x_a, num, anchor_pos, other_bsp->length);
                    MemFree(x_a);
                    MemFree(x);
                    if(other_pos != -1)
                        return other_pos;
                }
            }
        }

        annot = annot->next;
    }

    return other_pos;
}
示例#2
0
void 
Blast_SeqIdGetDefLine(SeqId* sip, char** buffer_ptr, Boolean ncbi_gi, 
                      Boolean accession_only, Boolean search_for_id)
{
   char* seqid_buffer = NULL;
   Int4 gi = 0;
   Boolean numeric_id_type = FALSE;

   *buffer_ptr = NULL;

   if (sip == NULL)
	return;

   /* Check for ad hoc ID's generated by formatdb if the user does not provide 
      any. */
   if (search_for_id && (sip->choice != SEQID_GENERAL ||
       StringCmp(((Dbtag*)sip->data.ptrvalue)->db, "BL_ORD_ID")))  
   {
      if ((!accession_only && !ncbi_gi) || sip->choice == SEQID_LOCAL) {
         seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, seqid_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else if (accession_only) {
         seqid_buffer = (char*) malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(SeqIdFindBestAccession(sip), seqid_buffer, 
                    PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH);
      } else if (ncbi_gi) {
         numeric_id_type = 
            GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                  &gi, &seqid_buffer);
      } else {
         numeric_id_type = 
	    GetAccessionFromSeqId(SeqIdFindBestAccession(sip), 
                                  &gi, &seqid_buffer);
      }
   }

   if (numeric_id_type && gi > 0) {
      seqid_buffer = (char*) malloc(16);
      sprintf(seqid_buffer, "%ld", (long) gi);
   }   
   if (!seqid_buffer) {
      /* If it's still NULL make a last ditch effort to get info. */
      char* title=NULL;
      Bioseq* bsp = BioseqLockById(sip);
      if (bsp) {
         if (BioseqGetTitle(bsp) != NULL)
            title = strdup(BioseqGetTitle(bsp));
         else
            title = strdup("No definition line found");
      }
      BioseqUnlock(bsp);
      
      if (title) /* Use first token as id. */
         seqid_buffer = StringTokMT(title, " \t\n\r", &title);  
   }
   *buffer_ptr = seqid_buffer;

}
示例#3
0
/**********************************************************************
*
*	MapLocToAnchor(annot, slp, anchor)
*	map the current slp to a position on the anchor Bioseq
*	annot: Seq-annot that may contain the alignment of the consistent markers
*	slp: the current Bioseq
*	anchor_id: the Seq-id for the anchor Bioseq, that is the sequece map
*
************************************************************************/
SeqLocPtr MapLocToAnchor(SeqAnnotPtr annot, SeqLocPtr slp, BioseqPtr anchor_bsp)
{
    Int2 type;
    Int4Ptr x_a, x;
    Uint2 num;
    SeqIdPtr anchor_id;
    SeqLocPtr t_slp;
    Int4 start, stop;
    SeqAlignPtr align;


    if(annot == NULL || slp == NULL || anchor_bsp == NULL)
        return NULL;
    if(slp->choice != SEQLOC_PNT && slp->choice != SEQLOC_INT)
        return NULL;
    anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
    if(anchor_id == NULL)
        anchor_id = anchor_bsp->id;

    while(annot)
    {
        if(annot->type == 2)
        {
            type = GetEquivAlignType(annot);
            if(type == 1)	/*this is consistent*/
            {
                align = annot->data;
                if(!get_anchor_coordinates(align, anchor_id, SeqLocId(slp), &x_a, &x, &num))
                    return NULL;
                if(slp->choice == SEQLOC_INT)
                {
                    start = find_this_position_by_anchor (x_a, x, num, SeqLocStart(slp), anchor_bsp->length);
                    if(SeqLocStart(slp) != SeqLocStop(slp))
                        stop = find_this_position_by_anchor (x_a, x, num, SeqLocStop(slp), anchor_bsp->length);
                    else
                        stop = start;
                    t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, anchor_id);
                }
                else
                {
                    start = SeqLocStart(slp);
                    start = find_this_position_by_anchor (x_a, x, num, start, anchor_bsp->length);
                    t_slp = SeqLocPntNew(start, Seq_strand_plus, anchor_id, FALSE);
                }
                MemFree(x_a);
                MemFree(x);
                return t_slp;
            }
        }

        annot = annot->next;
    }

    return NULL;
}
示例#4
0
static void DoSuggestIntervals (
  BioseqPtr bsp,
  Pointer userdata
)

{
  CharPtr        caret5, caret3;
  CSpeedFlagPtr  cfp;
  Char           id [64];
  SeqLocPtr      loc, slp;
  Boolean        partial5, partial3;
  SeqAnnotPtr    sap;
  SeqFeatPtr     sfp;
  SeqIdPtr       sip;
  Int4           start, stop;

  if (bsp == NULL) return;
  if (! ISA_aa (bsp->mol)) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return;

  sip = SeqIdFindBest (bsp->id, 0);
  if (sip == NULL) return;
  SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);

  sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode);
  if (sap == NULL) return;
  if (sap->type == 1) {
    sfp = (SeqFeatPtr) sap->data;
    if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
      loc = sfp->location;
      if (loc != NULL) {
        fprintf (cfp->ofp, "%s\n", id);
        slp = SeqLocFindNext (loc, NULL);
        while (slp != NULL) {
          start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1;
          stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1;
          caret5 = "";
          caret3 = "";
          CheckSeqLocForPartial (slp, &partial5, &partial3);
          if (partial5) {
            caret5 = "<";
          }
          if (partial3) {
            caret3 = ">";
          }
          fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
          slp = SeqLocFindNext (loc, slp);
        }
      }
    }
  }
  SeqAnnotFree (sap);
}
示例#5
0
static ValNodePtr CollectBioseqLineValues (BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi)
{
  SeqDescrPtr       sdp;
  SeqMgrDescContext dcontext;
  Char              id_txt[255], id_txt2[255];
  SeqIdPtr          sip, sip_gi = NULL, sip_gb = NULL;
  ValNodePtr        line_list = NULL, line_values;

  if (bsp == NULL) {
    return NULL;
  }

  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    if (sip->choice == SEQID_GENBANK
        || (sip->choice == SEQID_EMBL && sip_gb == NULL)
        || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
        || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
        || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
      sip_gb = sip;
    } else if (sip->choice == SEQID_GI) {
      sip_gi = sip;
    }
  }

  if (sip_gb == NULL && sip_gi == NULL) {
    SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    id_txt2[0] = 0;
  } else {
    if (sip_gb == NULL) {
      id_txt[0] = 0;
    } else {
      SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    }
    if (sip_gi == NULL) {
      id_txt2[0] = 0;
    } else {
      SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
    }
  }

  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
       sdp != NULL;
       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
    line_values = NULL;
    ValNodeAddPointer (&line_values, 0, StringSave (id_txt));
    if (want_gi) {
      ValNodeAddPointer (&line_values, 0, StringSave (id_txt2));
    }
    ValNodeLink (&line_values, CollectBioSourceValues (sdp->data.ptrvalue, field_list));
    ValNodeAddPointer (&line_list, 0, line_values);
  }
  return line_list;
}
示例#6
0
extern CharPtr ErrorDescString (SeqIdPtr sip)
{
  SeqIdPtr    bestid;
  CharPtr     errbuf;

  bestid = SeqIdFindBest(sip, SEQID_GI);

  errbuf = (CharPtr) MemNew ((size_t) (sizeof (Char) * 32));
  SeqIdWrite (bestid, errbuf, PRINTID_FASTA_LONG, 32-1);

  return errbuf;
}
示例#7
0
static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list, Boolean want_gi)
{
  SeqDescrPtr       sdp;
  SeqMgrDescContext dcontext;
  Char              id_txt[255], id_txt2[255];
  SeqIdPtr          sip, sip_gi = NULL, sip_gb = NULL;

  if (fp == NULL || bsp == NULL) {
    return;
  }

  for (sip = bsp->id; sip != NULL; sip = sip->next) {
    if (sip->choice == SEQID_GENBANK
        || (sip->choice == SEQID_EMBL && sip_gb == NULL)
        || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
        || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
        || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
      sip_gb = sip;
    } else if (sip->choice == SEQID_GI) {
      sip_gi = sip;
    }
  }

  if (sip_gb == NULL && sip_gi == NULL) {
    SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    id_txt2[0] = 0;
  } else {
    if (sip_gb == NULL) {
      id_txt[0] = 0;
    } else {
      SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
    }
    if (sip_gi == NULL) {
      id_txt2[0] = 0;
    } else {
      SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
    }
  }

  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
       sdp != NULL;
       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
    if (want_gi) {
      fprintf (fp, "%s\t%s", id_txt, id_txt2);
    } else {
      fprintf (fp, "%s", id_txt);
    }
    PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
    fprintf (fp, "\n");
  }
}
示例#8
0
/*******************************************************************************

  Function : DDV_DrawSequenceName()
  
  Purpose : draw the name of the sequence (left column of the DDV panel)
  
  Parameters : GrData; graphical data (font size, etc)
				ScaleStyle;style of the ParaG scale
				top, left; coord to start the draw
  
  Return value : none

*******************************************************************************/
static void  DDV_DrawSequenceName(UnDViewerGraphDataPtr GrData,ParaGPtr pgp,
	Int2 top,Int2 left,Int4 cur_row,Int4 CurEditRow,Int4 CurMasterRow)
{
SeqIdPtr  sip = NULL;
RecT      rc;
Int2      x,y,decal=1,size;/*text position/size*/
Char      szAccess[21];
BioseqPtr bsp;

	/*get a name*/	
    bsp = BioseqLockById(pgp->sip);
    if(bsp) {
        sip = SeqIdFindBestAccession(bsp->id);
        BioseqUnlock(bsp);
    }
	if (!sip)
		sip = SeqIdFindBest(pgp->sip, 0);
	SeqIdWrite(sip, szAccess,PRINTID_TEXTID_ACCESSION, 20);   

	/*compute position*/
	if (pgp->ScaleStyle==SCALE_POS_TOP) decal++;
	
	/*draw name*/
	size=StringWidth(szAccess);
	x=left/*-GrData->udv_scale.cxLeftScale*/-size;
	y=top+decal*GrData->udv_font.LineHeight;
	MoveTo(x,y);
	if (cur_row==CurEditRow){
		Magenta();
	}
	PaintString (szAccess);
	if (cur_row==CurMasterRow){
		Blue();
		MoveTo(x,y);
		LineTo(x+size,y);
	}
	/*draw a little box (for selection a full sequence)*/
	left+=GrData->udv_font.cxChar;
	top+=GrData->udv_font.cxChar/2;
	LoadRect(&rc,left,top,left+GrData->udv_font.cxChar,
		top+GrData->udv_font.cxChar);	
	Blue();
	PaintOval(&rc);
	Black();
}
示例#9
0
static void AnnotateBestOrf (
  BioseqPtr bsp,
  Int2 genCode,
  Boolean altstart
  
)

{
  CdRegionPtr     crp;
  Int2            i, best, idx;
  OrfData         od;
  ProtRefPtr      prp;
  SeqFeatPtr      sfp;
  SeqInt          sint;
  TransTablePtr   tbl;
  ValNode         vn;
  SeqFeatXrefPtr  xref;

  if (bsp == NULL) return;
  for (i = 0; i < 6; i++) {
    od.curlen [i] = INT4_MIN;
    od.bestlen [i] = 0;
    od.currstart [i] = 0;
    od.beststart [i] = 0;
    od.sublen [i] = INT4_MIN;
    od.inorf [i] = FALSE;
  }
  od.altstart = altstart;

  /* use simultaneous 6-frame translation finite state machine */

  tbl = TransTableNew (genCode);
  if (tbl != NULL) {
    TransTableProcessBioseq (tbl, LookForOrfs, (Pointer) &od, bsp);
  }
  TransTableFree (tbl);
  best = -1;
  idx = -1;
  for (i = 0; i < 6; i++) {
    if (od.bestlen [i] > best) {
      best = od.bestlen [i];
      idx = i;
    }
  }
  if (idx == -1) return;

  /* make feature location on largest ORF */

  if (idx < 3) {
    MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
    sint.from = od.beststart [idx] + idx;
    sint.to = sint.from + (od.bestlen [idx]) * 3 + 2;
    sint.id = SeqIdFindBest (bsp->id, 0);
    sint.strand = Seq_strand_plus;
    vn.choice = SEQLOC_INT;
    vn.extended = 0;
    vn.data.ptrvalue = (Pointer) &sint;
    vn.next = NULL;
  } else {
    MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
    sint.from = od.beststart [idx] + idx - 3;
    sint.to = sint.from + (od.bestlen [idx]) * 3 + 2;
    sint.id = SeqIdFindBest (bsp->id, 0);
    sint.strand = Seq_strand_minus;
    vn.choice = SEQLOC_INT;
    vn.extended = 0;
    vn.data.ptrvalue = (Pointer) &sint;
    vn.next = NULL;
  }

  /* make CDS feature with unknown product */

  sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_CDREGION, &vn);
  if (sfp == NULL) return;
  crp = CreateNewCdRgn (1, FALSE, genCode);
  if (crp == NULL) return;
  crp->frame = 1;
  sfp->data.value.ptrvalue = (Pointer) crp;

  prp = ProtRefNew ();
  if (prp == NULL) return;
  xref = SeqFeatXrefNew ();
  if (xref == NULL) return;
  xref->data.choice = SEQFEAT_PROT;
  xref->data.value.ptrvalue = (Pointer) prp;
  xref->next = sfp->xref;
  sfp->xref = xref;
  prp->name = ValNodeCopyStr (NULL, 0, "unknown");
}
示例#10
0
void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
              short whichSeq, short *seqnum,
              char **seq, char **seqid, long *seqlen)
{
  SeqPortPtr spp;
  SeqIdPtr bestid;
  Uint1 repr, code, residue;
  CharPtr tmp, title;
  long  outlen, outmax;
  char  localid[256], *sp;

  /* !!! this may be called several times for a single sequence
    because SeqEntryExplore looks for parts and joins them...
    assume seq, seqid, seqlen may contain data (or NULL)
  */
  if (bsp == NULL) return;
  repr = Bioseq_repr(bsp);
  if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;

  (*seqnum)++;
  if (!(whichSeq == *seqnum || whichSeq == 0)) return;

  bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
  title = BioseqGetTitle(bsp);
  if (idonly) {
    sprintf(localid, " %d)  ", *seqnum);
    tmp= localid + strlen(localid)-1;
    }
  else {
    strcpy(localid," ");
    tmp= localid;
    }
  tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
  tmp = StringMove(tmp, " ");
  StringNCpy(tmp, title, 200);
/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */

          /* < seqid is fixed storage */
  /* strcpy( *seqid, localid);  */
          /* < seqid is variable sized */
  outmax= strlen(localid) + 3;
  if (*seqid==NULL) {
    *seqid= (char*) malloc(outmax);
    if (*seqid==NULL) return;
    strcpy(*seqid, localid);
    }
  else {
    outmax += strlen(*seqid) + 2;
    *seqid= (char*) realloc( *seqid, outmax);
    if (*seqid==NULL) return;
    if (!idonly) strcat(*seqid, "; ");
    strcat(*seqid, localid);
    }

  if (idonly) {
    strcat(*seqid,"\n");
    return;
    }

  if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
  else code = Seq_code_iupacaa;
  spp = SeqPortNew(bsp, 0, -1, 0, code);
  SeqPortSeek(spp, 0, SEEK_SET);

  sp= *seq;
  if (sp==NULL) {  /* this is always true now !? */
    outlen= 0;
    outmax= 500;
    sp= (char*) malloc(outmax);
    }
  else {
    outlen= strlen(sp);
    outmax= outlen + 500;
    sp= (char*) realloc( sp, outmax);
    }
  if (sp==NULL) return;

  while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
    if (outlen>=outmax) {
      outmax= outlen + 500;
      sp= (char*) realloc(sp, outmax);
      if (sp==NULL) return;
      }
    sp[outlen++] = residue;
    }
  sp= (char*) realloc(sp, outlen+1);
  if (sp!=NULL) sp[outlen]= '\0';
  *seq= sp;
  *seqlen= outlen;
  SeqPortFree(spp);
  return;
}
示例#11
0
文件: mgblast.c 项目: gpertea/gsrc
static int LIBCALLBACK
MegaBlastPrintEndpoints(VoidPtr ptr)
{
   BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr;
   CharPtr subject_descr;
   SeqIdPtr sip, query_id;
   CharPtr query_buffer, title;
   CharPtr subject_buffer;
   Int4 query_length, q_start, q_end, q_shift=0, s_shift=0;
   Int4 subject_end;
   Int4 hsp_index;
   Boolean numeric_sip_type = FALSE;
   BLAST_HSPPtr hsp; 
   Int2 context;
   Char context_sign;
   Int4 subject_gi, score;
   FILE *fp = (FILE *) search->output;

   if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
      search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
      return 0;
   }

   if (search->rdfp)
      readdb_get_descriptor(search->rdfp, search->subject_id, &sip,
                            &subject_descr);
   else 
      sip = SeqIdSetDup(search->subject_info->sip);
   
   if (sip->choice != SEQID_GENERAL ||
       StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
      if (search->pbp->mb_params->full_seqids) {
         subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else
         numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                                  &subject_gi, &subject_buffer);
   } else {
      DbtagPtr db_tag = (DbtagPtr) sip->data.ptrvalue;
      if (db_tag->db && 
          (!StringCmp(db_tag->db, "THC") || 
           !StringICmp(db_tag->db, "TI")) && 
          db_tag->tag->id != 0) {
         subject_buffer = (CharPtr) Malloc(16);
         sprintf(subject_buffer, "%ld", (long) db_tag->tag->id);
      } else {
         subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
         subject_descr = subject_buffer;
      }
   }

   search->current_hitlist->hspcnt_max = search->current_hitlist->hspcnt;

   /* Only for the two sequences case, get offset shift if subject 
      is a subsequence */
   if (!search->rdfp && search->query_slp->next) {
       s_shift = SeqLocStart(search->query_slp->next);
       subject_end = SeqLocStop(search->query_slp->next);
   } else {
      s_shift = 0;
      subject_end = 
         readdb_get_sequence_length(search->rdfp, search->subject_id);
   }
   /* Get offset shift if query is a subsequence */
   q_shift = SeqLocStart(search->query_slp);

   for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
      hsp = search->current_hitlist->hsp_array[hsp_index];
      if (hsp==NULL || (search->pbp->cutoff_e > 0 && 
      hsp->evalue > search->pbp->cutoff_e)) 
     continue;
      
      /* Correct query context is already found in BlastGetNonSumStatsEvalue */
      context = hsp->context; 
      query_id = search->qid_array[context/2];


      if (query_id == NULL) /* Bad hsp, something wrong */
     continue; 
      hsp->context = context & 1;      
      query_length = search->query_context_offsets[context+1] -
         search->query_context_offsets[context] - 1;
      hsp->subject.end = hsp->subject.offset + hsp->subject.length;

      if (hsp->context) {
     hsp->query.end = query_length - hsp->query.offset;
     hsp->query.offset = 
        hsp->query.end - hsp->query.length + 1;
     context_sign = '-'; 
      } else {
     hsp->query.end = (++hsp->query.offset) + hsp->query.length - 1;
         if (hsp->query.end > query_length) {
            hsp->subject.end -= (hsp->query.end - query_length);
            hsp->query.end = query_length;
         }
     context_sign = '+';  
      }
      
      if (hsp->subject.end > subject_end) {
         hsp->query.end -= (hsp->subject.end - subject_end);
         hsp->subject.end = subject_end;
      }
      hsp->subject.offset++;
      
      query_buffer = NULL;
      if (query_id->choice == SEQID_LOCAL && 
          search->pbp->mb_params->full_seqids) {
         BioseqPtr query_bsp = BioseqLockById(query_id);
         title = StringSave(BioseqGetTitle(query_bsp));
         if (title)
            query_buffer = StringTokMT(title, " ", &title);
         else {
            Int4 query_gi;
            GetAccessionFromSeqId(query_bsp->id, &query_gi,
                                  &query_buffer);
         }  
         BioseqUnlock(query_bsp);
      } else {
         query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         if (!search->pbp->mb_params->full_seqids)
            SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION,
                       BUFFER_LENGTH);
         else 
            SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG,
                    BUFFER_LENGTH);
      }

      if (search->pbp->gap_open==0 && search->pbp->gap_extend==0)
     score = ((hsp->subject.length + hsp->query.length)*
           search->sbp->reward / 2 - hsp->score) / 
        (search->sbp->reward - search->sbp->penalty);
      else 
     score = hsp->score;

      if (context_sign == '+') {
     q_start = hsp->query.offset;
     q_end = hsp->query.end;
      } else {
     q_start = hsp->query.end;
     q_end = hsp->query.offset;
      }

      /* Adjust offsets if query is a subsequence, only for first query */
      if (context < 2) {
          q_start += q_shift;
          q_end += q_shift;
      }

      hsp->subject.offset += s_shift;
      hsp->subject.end += s_shift;

      if (numeric_sip_type)
     fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi, 
         context_sign, query_buffer, hsp->subject.offset, q_start, 
         hsp->subject.end, q_end, score);
      else 
     fprintf(fp, "'%s'=='%c%s' (%d %d %d %d) %d\n", 
         subject_buffer, context_sign, query_buffer, 
         hsp->subject.offset, q_start, 
         hsp->subject.end, q_end, score);
      MemFree(query_buffer);
   }
   if (!numeric_sip_type && subject_buffer != subject_descr)
      MemFree(subject_buffer);
   MemFree(subject_descr);
   sip = SeqIdSetFree(sip);
   return 0;
}
示例#12
0
文件: mgblast.c 项目: gpertea/gsrc
static int LIBCALLBACK
MegaBlastPrintSegments(VoidPtr ptr)
{
   BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr;
   ReadDBFILEPtr rdfp = search->rdfp;
   BLAST_HSPPtr hsp; 
   Int4 i, subject_gi;
   Int2 context;
   CharPtr query_buffer, title;
   SeqIdPtr sip, query_id; 
   Int4 hsp_index, score;
   Uint1Ptr query_seq, subject_seq = NULL;
   FloatHi perc_ident;
   Char strand;
   GapXEditScriptPtr esp;
   Int4 q_start, q_end, s_start, s_end, query_length, numseg;
   Int4 q_off, num_ident, align_length, total_ident, q_shift=0, s_shift=0;
   Int4Ptr length, start;
   Uint1Ptr strands;
   CharPtr subject_descr, subject_buffer, buffer;
   Char tmp_buffer[BUFFER_LENGTH];
   Int4 buffer_size, max_buffer_size = LARGE_BUFFER_LENGTH;
   Boolean numeric_sip_type = FALSE;
   FILE *fp = (FILE *) search->output;

   if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
      search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
      return 0;
   }

   subject_seq = search->subject->sequence_start + 1;


   if (rdfp)
      readdb_get_descriptor(rdfp, search->subject_id, &sip, &subject_descr);
   else 
      sip = SeqIdSetDup(search->subject_info->sip);

   if (sip->choice != SEQID_GENERAL ||
       StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
      if (search->pbp->mb_params->full_seqids) { 
         subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         SeqIdWrite(sip, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
      } else
         numeric_sip_type = GetAccessionFromSeqId(SeqIdFindBest(sip, SEQID_GI), 
                                                  &subject_gi, &subject_buffer);
   } else {
      subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
      subject_descr = subject_buffer;
   }

   buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH);

   /* Only for the two sequences case, get offset shift if subject 
      is a subsequence */
   if (!rdfp && search->query_slp->next)
       s_shift = SeqLocStart(search->query_slp->next);
   /* Get offset shift if query is a subsequence */
   q_shift = SeqLocStart(search->query_slp);
   
   for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
      hsp = search->current_hitlist->hsp_array[hsp_index];
      if (hsp==NULL || (search->pbp->cutoff_e > 0 && 
                        hsp->evalue > search->pbp->cutoff_e)) {
     continue;
      }
      context = hsp->context;
      query_id = search->qid_array[context/2];

     
      if (query_id == NULL) /* Bad hsp, something wrong */
     continue; 
      hsp->context = context & 1;

      if (search->pbp->gap_open==0 && search->pbp->gap_extend==0)
     score = ((hsp->subject.length + hsp->query.length)*
           search->sbp->reward / 2 - hsp->score) / 
        (search->sbp->reward - search->sbp->penalty);
      else 
     score = hsp->score;

      query_length = search->query_context_offsets[context+1] -
         search->query_context_offsets[context] - 1;

      q_off = hsp->query.offset;
      if (hsp->context) {
     strand = '-'; 
     hsp->query.end = query_length - hsp->query.offset;
     hsp->query.offset = 
        hsp->query.end - hsp->query.length;
      } else {
     strand = '+';  
     hsp->query.end = hsp->query.offset + hsp->query.length;
      }

      if (strand == '+') {
     q_start = hsp->query.offset + 1;
     q_end = hsp->query.end;
      } else {
     q_start = hsp->query.end;
     q_end = hsp->query.offset + 1;
      }
      s_start = hsp->subject.offset + 1;
      s_end = hsp->subject.offset + hsp->subject.length;

      /* Adjust offsets if query is a subsequence, only for first query */
      if (context < 2) {
          q_start += q_shift;
          q_end += q_shift;
      }

      s_start += s_shift;
      s_end += s_shift;

      if (query_id->choice == SEQID_LOCAL && 
          search->pbp->mb_params->full_seqids) {
         BioseqPtr query_bsp = BioseqLockById(query_id);
         title = StringSave(BioseqGetTitle(query_bsp));
         if (title)
            query_buffer = StringTokMT(title, " ", &title);
         else {
            Int4 query_gi;
            GetAccessionFromSeqId(query_bsp->id, &query_gi,
                                  &query_buffer);
         }  
         BioseqUnlock(query_bsp);
      } else {
         query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
         if (!search->pbp->mb_params->full_seqids)
            SeqIdWrite(query_id, query_buffer, PRINTID_TEXTID_ACCESSION,
                       BUFFER_LENGTH);
         else 
            SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG,
                    BUFFER_LENGTH);
      }

      if (numeric_sip_type)
     sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n  s %d\n  b %d %d\n  e %d %d\n", 
          (long) subject_gi, strand, query_buffer, 
          s_start, q_start, s_end, q_end, score, score, 
          s_start, q_start, s_end, q_end);
      else 
     sprintf(buffer, "\n#'>%s'=='%c%s' (%d %d %d %d) %d\na {\n  s %d\n  b %d %d\n  e %d %d\n", 
          subject_buffer, strand, query_buffer, 
          s_start, q_start, s_end, q_end, score, score, 
          s_start, q_start, s_end, q_end);
      buffer_size = StringLen(buffer);

      query_seq = search->context[context].query->sequence;

      esp = hsp->gap_info->esp;
        
      for (numseg=0; esp; esp = esp->next, numseg++);

      GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg,
                &start, &length, &strands, 
                &q_off, &hsp->subject.offset);

      if (start[0] < 0) {
         length[0] += start[0];
         start[1] -= start[0];
         start[0] = 0;
      } 
      if (start[2*(numseg-1)] + length[numseg-1] > query_length) 
         length[numseg-1] = query_length - start[2*(numseg-1)];
      
      total_ident = 0;
      align_length = 0;
      for (i=0; i<numseg; i++) {
         align_length += length[i];
     if (strand == '+') {
        q_start = start[2*i] + 1;
        q_end = q_start + length[i] - 1;
     } else {
        q_start = query_length - start[2*i];
        q_end = q_start - length[i] + 1;
     }
     if (start[2*i] != -1 && start[2*i+1] != -1) {
        num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, 
                                                 start[2*i], start[2*i+1], 
                                                 length[i], FALSE);
            perc_ident = (FloatHi) num_ident / length[i] * 100;
            total_ident += num_ident;
        sprintf(tmp_buffer, "  l %d %d %d %d (%.0f)\n", start[2*i+1]+1, 
            q_start, start[2*i+1]+length[i],
            q_end, perc_ident);     
        if ((buffer_size += StringLen(tmp_buffer)) > max_buffer_size - 2) {
           max_buffer_size *= 2;
           buffer = (CharPtr) Realloc(buffer, max_buffer_size);
        }
        StringCat(buffer, tmp_buffer);
     }
      }
      if (100*total_ident >= 
          align_length*search->pbp->mb_params->perc_identity) {
        StringCat(buffer, "}");
        fprintf(fp, "%s\n", buffer);
      }
      MemFree(start);
      MemFree(length);
      MemFree(strands);
      MemFree(query_buffer);
   } /* End loop on hsp's */
   if (!numeric_sip_type && subject_buffer != subject_descr)
      MemFree(subject_buffer);
   MemFree(subject_descr);
   MemFree(buffer);
   sip = SeqIdSetFree(sip);
   fflush(fp);
   return 1;
}