char *display_domains(char target, struct annot_entry **annot_arr_p, int n_annots) {
  char *domain_s;
  char line[MAX_STR];
  int i, i_doms, n_domain_s = MAX_LSTR;

  /* since (currently) annot_var_s is MAX_LSOTR, do the same for domain_s */
  if ((domain_s = (char *)calloc(n_domain_s, sizeof(char)))==NULL) {
    fprintf(stderr,"*** error [%s:%d] *** cannot allocate domain_s[%d]\n",__FILE__, __LINE__,n_domain_s);
    return NULL;
  }

  for (i=0; i < n_annots; i++) {
    /* annot_arr_p[] has both domains and non domains, but n_domains only counts domains */
    if (annot_arr_p[i]->label != '-') continue;
    sprintf(line, "%cDomain:\t%ld-%ld\t%s\n",
	    target, annot_arr_p[i]->pos+1, annot_arr_p[i]->end+1, annot_arr_p[i]->comment);
    if (strlen(domain_s) + strlen(line)+1 > n_domain_s) {
      n_domain_s += n_domain_s/2;
      domain_s = realloc(domain_s, n_domain_s);
    }
    SAFE_STRNCAT(domain_s, line, n_domain_s);
  }

  domain_s = realloc(domain_s, (n_domain_s=strlen(domain_s))+1);
  domain_s[n_domain_s]='\0';

  return domain_s;
}
Example #2
0
static void DEHT_formatFilenames(DEHT * ht, const char * prefix)
{
	TRACE_FUNC_ENTRY();
	CHECK(NULL != ht);
	CHECK(NULL != prefix);

	SAFE_STRNCPY(ht->sKeyfileName, prefix, sizeof(ht->sKeyfileName));
	SAFE_STRNCAT(ht->sKeyfileName, KEY_FILE_EXT, sizeof(ht->sKeyfileName));

	SAFE_STRNCPY(ht->sDatafileName, prefix, sizeof(ht->sKeyfileName));
	SAFE_STRNCAT(ht->sDatafileName, DATA_FILE_EXT, sizeof(ht->sKeyfileName));

	goto LBL_CLEANUP;
	
LBL_ERROR:
	TRACE_FUNC_ERROR();
	/* Fail silently. Our caller will be able to handle this. */

LBL_CLEANUP:
	TRACE_FUNC_EXIT();
	return;
}
int
initpam (char *mfname, struct pstruct *ppst)
{
    char    line[512], *lp;
    int     i, j, iaa, pval, p_i, p_j;
    int l_nsq;
    unsigned char l_sq[MAXSQ+1];
    int ess_tmp, max_val, min_val;
    int have_es = 0;
    FILE   *fmat;

    pam_opts(mfname, ppst);

    if ((fmat = fopen (mfname, "r")) == NULL)
    {
        printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname);
        fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname);
        return 0;
    }

    /* removed because redundant, and causes crash under MacOSX -- because copying on top of itself */
    /*
       SAFE_STRNCPY (ppst->pamfile, mfname, MAX_FN);
    */
    SAFE_STRNCPY(ppst->pam_name, ppst->pamfile, MAX_FN);

    if (ppst->pam_ms) {
        SAFE_STRNCAT(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name));
    }

    /*
       the size of the alphabet is determined in advance
    */
    ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);

    /*
      look for alphabet line, skipping the comments, alphabet ends up in line[]
    */
    while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#');

    /* transfer the residue line into l_sq[] */
    l_nsq = 1;
    l_sq[0] = '\0';
    for (i=0; i<strlen(line); i++) {
        if (isalpha(line[i]) || line[i] == '*') {
            l_sq[l_nsq++] = line[i];
        }
    }

    /* if we have a DNA matrix, various defaults must be updated,
       particularly pascii, which is used to map the residue ordering
       in the matrix file to the residue ordering used by the
       program */

    if (l_nsq < 20) {
        if (ppst->dnaseq <= SEQT_PROT) {
            ppst->dnaseq = SEQT_DNA;
        }
        ppst->nt_align=1;
        pascii = nascii;	/* use correct DNA mapping, NCBIstdaa by default */
    }

    /* we no-longer re-initialize sascii[], we either use NCBIstdaa
       mapping for protein, or nascii for DNA */

    /* 11-July-2014 -- need to check that alphabet is consistent with pascii */
    /*
    for (i=0; i < l_nsq; i++) {
    }
    */

    /* check for 2D pam  - if not found, allocate it */
    if (!ppst->have_pam2) {
        alloc_pam (MAXSQ+1, MAXSQ+1, ppst);
        ppst->have_pam2 = 1;
    }

    max_val = -1;
    min_val =  1;
    ppst->pam2[0][0][0] = -BIGNUM;
    /* make certain the [0] boundaries are -BIGNUM */
    for (j=1; j < l_nsq; j++) {
        p_j = pascii[l_sq[j]];
        ppst->pam2[0][0][p_j] = ppst->pam2[0][p_j][0] = -BIGNUM;
    }

    /*  read the scoring matrix values */
    for (iaa = 1; iaa < l_nsq; iaa++) {	/* read pam value line */
        p_i = pascii[l_sq[iaa]];
        if (p_i > MAXSQ) {
            fprintf(stderr,"*** error [%s:%d] - residue character %c out of range %d\n",
                    __FILE__, __LINE__, l_sq[iaa], p_i);
            p_i = pascii['X'];
        }
        if (fgets(line,sizeof(line),fmat)==NULL) {
            fprintf (stderr," error reading pam line: %s\n",line);
            exit (1);
        }
        /*     fprintf(stderr,"%d/%d %s",iaa,nsq,line); */
        strtok(line," \t\n");		/* skip the letter (residue) */

        for (j = 1; j < l_nsq; j++) {
            p_j = pascii[l_sq[j]];
            lp=strtok(NULL," \t\n");		/* get the number string */
            pval=ppst->pam2[0][p_i][p_j]=atoi(lp);	/* convert to integer */
            if (pval > max_val) max_val = pval;
            if (pval < min_val) min_val = pval;
        }
    }
    ppst->pam_h = max_val;
    ppst->pam_l = min_val;

    if (ppst->dnaseq==0) {
        pam_sq = apam_sq;
        pam_sq_n = apam_sq_n;
        init_altpam(ppst);
    }
    else {
        pam_sq = npam_sq;
        pam_sq_n = npam_sq_n;
    }

    /* is protein but do not have '*' in alphabet*/
    p_i = pascii['*'];
    p_j = pascii['X'];
    if (!ppst->nt_align && strchr((char *)l_sq,'*')==NULL) {
        /* add it */
        for (i=0; i< l_nsq; i++) {
            ppst->pam2[0][p_i][i] = ppst->pam2[0][p_j][i];
            ppst->pam2[0][i][p_i] = ppst->pam2[0][i][p_j];
        }
    }

    /* make sure that X:X is < 0 if -S */
    if (ppst->ext_sq_set && ppst->pam2[0][p_j][p_j] >= 0) {
        ppst->pam2[0][p_j][p_j] = -1;
    }

    fclose (fmat);
    return 1;
}
void do_url1(FILE *fp, const struct mngmsg *m_msp, const struct pstruct *ppst,
	     char *l_name, int n1,
	     const struct a_struct *aln_p, const char *annot_var_s,
	     const struct annot_str *q_annot_p,
	     const struct annot_str *l_annot_p )
{
  char my_q_name[200], my_l_name[200], json_l_name[200];
  char *db, *bp;
  char pgm[10], o_pgm[10], lib[MAX_LSTR];
  char *tmp_annot_s, *q_domain_s, *l_domain_s, *tmp_domain_s, *etmp_domain_s;
  int  n_tmp_annot_s, n_tmp_domain;
  long q_offset, l_offset;
  char *ref_url, *lbp=NULL;
  char *srch_url, *srch_url1, *dom_url;

  /* set the database */
  if (m_msp->ldb_info.ldnaseq==SEQT_DNA) db="nucleotide";
  else db="Protein";

  /* set the program type */
  if (strncmp(m_msp->f_id0,"rss",3)==0) {
    strncpy(pgm,"fa",sizeof(pgm));
  }
  else if (strncmp(m_msp->f_id0,"rfx",3)==0) {
    strncpy(pgm,"fx",sizeof(pgm));
  }
  else { strncpy(pgm,m_msp->f_id0,sizeof(pgm)); }

  SAFE_STRNCPY(o_pgm, pgm, sizeof(o_pgm));

  /* get a library name (probably does not work for %, + abbreviations */
  if (m_msp->lname[0]!='%') {
    SAFE_STRNCPY(lib,m_msp->lname,sizeof(lib));
  }
  else {
    SAFE_STRNCPY(lib,"%25",sizeof(lib));
    SAFE_STRNCAT(lib,&m_msp->lname[1],sizeof(lib));
  }
  lib[sizeof(lib)-1]='\0';

  if ((lbp = strchr(l_name,'|'))==NULL) {
    lbp = l_name;
  }
  else {
    lbp++;
  }

  SAFE_STRNCPY(my_q_name,m_msp->qtitle,sizeof(my_q_name));
  if ((bp=strchr(my_q_name,' '))!=NULL) *bp='\0';

  SAFE_STRNCPY(my_l_name,lbp,sizeof(my_l_name));

  if (pgm[0]=='t' || !strcmp(pgm,"fx") || !strcmp(pgm,"fy")==0 ) {
    if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
    lbp = &my_l_name[strlen(my_l_name)-2];
    if ( *lbp == '_' ) *lbp = '\0';
  }

  /* change the program name for fastx, tfastx, tfasta */
  /* fastx returns proteins */
  if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
  else if (strcmp(pgm,"ff")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
  else if (pgm[0]=='t') {
    SAFE_STRNCPY(pgm,"fx",sizeof(pgm));
    SAFE_STRNCPY(lib,DEF_PROT_LIB,sizeof(lib));
  }

  fflush(fp);

  q_offset = aln_p->q_offset;
  l_offset = aln_p->l_offset;

  /* set up ref_url, srch_url, srch_url1, dom_url */

  fflush(fp);

  ref_url = getenv("REF_URL");
  srch_url = getenv("SRCH_URL");
  srch_url1 = getenv("SRCH_URL1");
  dom_url = NULL;
  dom_url = getenv("DOMAIN_PLOT_URL");

  if (ref_url || srch_url || srch_url1 || dom_url) {
    fprintf(fp,"<!-- LINK_START %s -->",l_name);

  /* REF_URL should provide */
  /* "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A>&nbsp;&nbsp;" */
  if (ref_url != NULL) {fprintf(fp,ref_url,db,my_l_name);}

  /* SRCH_URL should provide */
  /* "<A HREF=\"http://localhost/fasta_www2/searchfa.cgi?query=%s&db=fasta_www.cgi&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">Re-search database</A>&nbsp;&nbsp;" */
  if (srch_url != NULL) {
    fprintf(fp,srch_url,my_l_name,db,lib,pgm,
	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
  }

  /* SRCH_URL1 should provide: */
  /*  "<A HREF=\"http://localhost/fasta_www2/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">General re-search</A>\n" */

  if (srch_url1 != NULL) {
    fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
  }
  
  if (dom_url!=NULL) {
    if (annot_var_s && annot_var_s[0]) {
      tmp_annot_s = web_encode(annot_var_s);
    }
    else tmp_annot_s = "";

    q_domain_s = l_domain_s = NULL;

    if (q_annot_p && q_annot_p->n_domains > 0 && 
	(q_domain_s = display_domains('q',q_annot_p->s_annot_arr_p, q_annot_p->n_annot))!=NULL) {
    }
    if (l_annot_p && l_annot_p->n_domains > 0 && 
	(l_domain_s = display_domains('l',l_annot_p->s_annot_arr_p, l_annot_p->n_annot))!=NULL) {
    }

    /* combine domain strings */
    n_tmp_domain = 0;
    if (q_domain_s) n_tmp_domain += strlen(q_domain_s)+1;
    if (l_domain_s) n_tmp_domain += strlen(l_domain_s)+1;
    etmp_domain_s = "";
    if (n_tmp_domain > 0) {
      if ((tmp_domain_s=(char *)calloc(n_tmp_domain,sizeof(char)))==NULL) {
	fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_domain_s[%d]\n",
		__FILE__, __LINE__,n_tmp_domain);
      }
      else {
	tmp_domain_s[0] = '\0';
	if (q_domain_s) SAFE_STRNCAT(tmp_domain_s, q_domain_s, n_tmp_domain);
	if (l_domain_s) SAFE_STRNCAT(tmp_domain_s, l_domain_s, n_tmp_domain);
	etmp_domain_s = web_encode(tmp_domain_s);
      }
    }

    /* appropriate format string: */
    /* 
       pgm=%s	    -- program abbrev that created alignment
       q_name=%s     -- query info
       q_cstart=%ld
       q_cstop=%ld
       q_astart=%ld
       q_astop=%ld
       l_name=%s     -- library info
       l_cstart=%ld
       l_cstop=%ld
       l_astart=%ld
       l_astop=%ld
       region=%s       -- aligned domain and variant information
       doms=%s

       DOMAIN_PLOT_URL = "pgm=%s;q_name=%s;q_cstart=%ld;q_cstop=%ld&q_astart=%ld&q_astop=%ld&l_name=%s&l_cstart=%ld&l_cstop=%ld&l_astart=%ld&l_astop=%ld&regions=%s&doms=%s"
    */

    /* think about the alternative of running a script
       rather than embedding it */

    fprintf(fp,dom_url,o_pgm,
	    my_q_name, q_offset+seq_pos(1,aln_p->qlrev,2),q_offset+seq_pos(m_msp->n0,aln_p->qlrev,2),
	    q_offset+seq_pos(aln_p->amin0+1,aln_p->qlrev,1), q_offset+seq_pos(aln_p->amax0, aln_p->qlrev,2),
	    my_l_name, l_offset+seq_pos(1,aln_p->llrev,2), l_offset+seq_pos(n1,aln_p->llrev,2),
	    l_offset+seq_pos(aln_p->amin1+1,aln_p->llrev,1),l_offset+seq_pos(aln_p->amax1,aln_p->llrev,2),
	    tmp_annot_s, etmp_domain_s);

    if (n_tmp_domain>0 && tmp_domain_s) {
      free(tmp_domain_s);
      free(etmp_domain_s);
    }
    if (l_annot_p && l_annot_p->n_domains && l_domain_s) {
      free(l_domain_s);
    }
    if (q_annot_p && q_annot_p->n_domains && q_domain_s) {
      free(q_domain_s);
    }
    if (annot_var_s && annot_var_s[0] && tmp_annot_s) free(tmp_annot_s);
  }

  fprintf(fp,"\n<!-- LINK_STOP -->");
  fflush(fp);
  }

  /*
    if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
    fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
    else 
    fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
  */


  if (getenv("JSON_HTML")) {

    /* replace '|' with '_' */
    SAFE_STRNCPY(json_l_name, l_name, sizeof(json_l_name));
    for (bp=strchr(json_l_name,'|'); bp; bp=strchr(bp+1,'|')) { *bp = '_'; }

    /* replace '.' with '_' */
    for (bp=strchr(json_l_name,'.'); bp; bp=strchr(bp+1,'.')) { *bp = '_'; }

    fprintf(fp,"\n<script type=\"text/javascript\">\n//<![CDATA[\n var json_%s = {\n",json_l_name);
    encode_json_str(fp, "db", db, 1);
    encode_json_str(fp, "l_acc", l_name, 0);
    encode_json_str(fp, "acc", my_l_name, 0);
    encode_json_str(fp, "lib", lib, 0);
    encode_json_str(fp, "pgm", pgm, 0);
    encode_json_str(fp, "o_pgm", m_msp->f_id0, 0);
    encode_json_aln(fp, aln_p, q_offset, l_offset, 0);
    if (annot_var_s && annot_var_s[0]) { encode_json_lines(fp, "annot", annot_var_s, 0); }
    if (q_annot_p && q_annot_p->n_domains > 0) { encode_json_domains(fp, "q_domains", q_annot_p, 0); }
    if (l_annot_p && l_annot_p->n_domains > 0) { encode_json_domains(fp, "l_domains", l_annot_p, 0); }

    fprintf(fp, "\n}\n//]]>\n</script>");
    fflush(fp);
  }
}
void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn,
	       struct beststr **bptr,int nbest,
	       int qlib, struct mngmsg *m_msp,
	       struct pstruct *ppst, struct db_str db,
	       char **info_gstring2
	       ,void **f_str
)
{
  unsigned char *aa1;
  int best_align_done = 0;
  int ntmp = 0;
  char bline[MAX_BLINE], fmt[40], pad[MAX_BLINE], fmt2[40], rline[40];
  char l_name[128], link_name[140];
  int istart = 0, istop, ib;
  int nshow;		/* number of sequences shown before prompt,
			   and ultimately displayed */
  int first_line, link_shown;
  int quiet;
  int r_margin;
  struct beststr *bbp;
  int n1tot;
  char *bp, *bline_p;
  char rel_label[12];
  char score_label[120];
  char tmp_str[20], *seq_code, *ann_code;
  int seq_code_len, ann_code_len;
  long loffset;		/* loffset is offset from beginning of real sequence */
  long l_off;		/* l_off is the the virtual coordinate of residue 1 */
  int n1, ranlib_done;
  struct rstruct rst;
  int l_score0, ngap;
  double lzscore, lzscore2, lbits;
  float percent, gpercent;
  struct a_struct *aln_p;
  struct a_res_str *cur_ares_p;
  struct rstruct *rst_p;
  int gi_num;
  char html_pre_E[120], html_post_E[120];
  int have_lalign = 0;

  struct lmf_str *m_fptr;

  /* for lalign alignments, only show stuff when -m != 11 */

  if (m_msp->markx & MX_M11OUT) return;
  if (strcmp(m_msp->label,"ls-w")==0) {
    have_lalign = 1;
    if ((m_msp->markx & MX_M9SUMM) == 0) return;
  }

  rel_label[0]='\0';
  SAFE_STRNCPY(score_label,"scores", sizeof(score_label));

  quiet = m_msp->quiet;

  if (m_msp->aln.llen > MAX_BLINE) m_msp->aln.llen = MAX_BLINE;

  if (ppst->zsflag < 0) r_margin = 10;
  else if (ppst->zsflag>=0  && m_msp->srelv > 1 ) r_margin = 19;
  else r_margin = 10;

  if (m_msp->markx & MX_M9SUMM && m_msp->show_code == SHOW_CODE_ID) {
#ifdef SHOWSIM
    r_margin += 15;
#else
    r_margin += 10;
#endif
  }
  else if (m_msp->markx & MX_MBLAST2) {
    r_margin -= 10;
  }
  else if (m_msp->markx & (MX_M9SUMM + MX_M8OUT)) {
    r_margin = 0;
  }

  if (m_msp->markx & MX_HTML) {
    strncpy(html_pre_E,"<font color=\"darkred\">",sizeof(html_pre_E));
    strncpy(html_post_E,"</font>",sizeof(html_post_E));

  }
  else {
    html_pre_E[0] = html_post_E[0] = '\0';
  }

  if (m_msp->nframe < 0) {
    sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-r_margin);
  }
  else {
    sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-(r_margin+4));
  }
  sprintf(fmt2,"%%-%ds",m_msp->aln.llen-r_margin+8);

  memset(pad,' ',m_msp->aln.llen-(r_margin+6));
  pad[m_msp->aln.llen-(r_margin+12)]='\0';
  if (have_lalign) {
    if (ppst->show_ident) {
      SAFE_STRNCPY(score_label,"alignments", sizeof(score_label));
      pad[m_msp->aln.llen-(r_margin+16)]='\0';
    }
    else {
      SAFE_STRNCPY(score_label,"non-identical alignments", sizeof(score_label));
      pad[m_msp->aln.llen-(r_margin+30)]='\0';
    }
  }

  nshow = min(m_msp->nshow,nbest);

  if ((bp = strchr (m_msp->qtitle, '\n')) != NULL) *bp = '\0';
  if (m_msp->markx & MX_M8OUT) {
    if ((bp = strchr (m_msp->qtitle, ' ')) != NULL) *bp = '\0';
  }

/*   fprintf (fp, "%3d %s\n", qlib,m_msp->qtitle); */

  if (m_msp->markx & MX_HTML) fprintf(fp,"<pre>");

  /* **************************************************************** */
  /* done with display format */
  /* **************************************************************** */

  /* **************************************************************** */
  /* prompt for number of best scores if quiet == 0 */
  /* **************************************************************** */

  if (quiet == 0) {	/* interactive */
    nshow = min(m_msp->nshow, nbest);
    printf(" How many scores would you like to see? [%d] ",nshow);
    fflush(stdout);
    if (fgets(rline,20,stdin)==NULL) exit(0);
    if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
    if (nshow > nbest) nshow=nbest;
    if (nshow<=0) nshow = min(20,nbest);
  }

  /* display number of hits for -m 8C (Blast Tab-commented format) */
  if (m_msp->markx & MX_M8COMMENT) {
    /* line below copied from BLAST+ output */
    fprintf(fp,"# Fields: query id, subject id, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score");
    if (m_msp->show_code == SHOW_CODE_ALIGN || m_msp->show_code == SHOW_CODE_CIGAR) { fprintf(fp," aln_code");}
    fprintf(fp,"\n");
    fprintf(fp,"# %d hits found\n",nshow);
  }

  /* **************************************************************** */
  /* have number of scores in interactive or quiet mode */
  /* display "The best scores are" */
  /* **************************************************************** */

  if (m_msp->markx & MX_MBLAST2) {
    fprintf(fp, "%81s\n"," Score     E");
    fprintf(fp, "Sequences producing significant alignments:                          (Bits)  Value\n\n");
  }
  else if (!(m_msp->markx & MX_M8OUT)) {
    if (ppst->zsflag >= 0) {
      if (m_msp->z_bits==1) {/* show bit score */
	fprintf(fp,"\nThe best%s %s are:%s%s bits %sE(%ld)%s",
		rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E);
	if (ppst->zsflag > 20) {
	  fprintf(fp," E2()");
	}
      }
      else {/* show z-score */
	fprintf(fp,"\nThe best%s %s are:%s%s z-sc %sE(%ld)%s",
		rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E);
	if (ppst->zsflag > 20) {
	  fprintf(fp," E2()");
	}
      }
      header_aux(fp);
      if (m_msp->markx & MX_M9SUMM) {
	if (m_msp->show_code == SHOW_CODE_ID) {
#ifdef SHOWSIM
	  fprintf(fp," %%_id  %%_sim  alen");
#else
	  fprintf(fp," %%_id  alen");
#endif
	}
	else {
	  if (m_msp->markx & MX_HTML && m_msp->show_code !=1) { fprintf(fp,"<!-- ");}
#ifndef SHOWSIM
	  fprintf(fp,"\t%%_id  %%_gid %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
#else
	  fprintf(fp,"\t%%_id  %%_sim %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
#endif
	}
	if (m_msp->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
	if (m_msp->markx & MX_HTML && m_msp->show_code!=1) { fprintf(fp," -->");}
      }
      fprintf(fp,"\n");
    }
    else {
      fprintf(fp,"\nThe best%s %s are:%s%s",rel_label,score_label,pad,m_msp->label);
      header_aux(fp);
      if (m_msp->markx & MX_M9SUMM) {
	if (m_msp->show_code == SHOW_CODE_ID) {
#ifdef SHOWSIM
	  fprintf(fp," %%_id  %%_sm  alen");
#else
	  fprintf(fp," %%_id  alen");
#endif
	}
	else {
#ifndef SHOWSIM
	  fprintf(fp,"\t%%_id  %%_gid %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
#else
	  fprintf(fp,"\t%%_id  %%_sim %4s  alen  an0  ax0  pn0  px0  an1  ax1 pn1 px1 gapq gapl  fs ",m_msp->f_id1);
#endif	/* SHOWSIM */
	}
      }
      if (m_msp->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
      fprintf(fp,"\n");
    }
  }	/* !(m_msp->markx & MX_M8OUT) */

  istart = 0;
l1:
  istop = min(nshow, nbest);

  for (ib=istart; ib<istop; ib++) {
    bbp = bptr[ib];
    if (ppst->do_rep) {
      bbp->repeat_thresh = 
	min(E1_to_s(ppst->e_cut_r, m_msp->n0, bbp->seq->n1,ppst->zdb_size, m_msp->pstat_void),
	    bbp->rst.score[ppst->score_ix]);
    }

#ifdef DEBUG
    if (bbp->seq->n1 != bbp->n1 ) {
      fprintf(stderr, " *** lib len error [%d!=%d] *** %s score %d\n",
	      bbp->seq->n1,bbp->n1, bbp->mseq->libstr, bbp->rst.score[0]);
    }
#endif

    /* this gets us a valid bline[] and the library for searching if necessary
       do not read if we have a long enough bline or we don't need a sequence 
    */
    if (bbp->mseq->bline != NULL && bbp->mseq->bline_max >= m_msp->aln.llen) {
      ranlib_done = 0;

      /* copy m_msp->aln.llen, not llen-r_margin, because the r_margin
	 will be set later, possibly after the gi|12345 is removed */
      strncpy(bline,bbp->mseq->bline,m_msp->aln.llen);
      bline[m_msp->aln.llen]='\0';
    }
    else {
      if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
	fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name);
	exit(1);
      }
      RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
      ranlib_done = 1;
    }

    /* get a valid cur_ares_p chain and put it in bbp->ares */
    if (!m_msp->align_done && (m_msp->stages>1 || (m_msp->markx & MX_M9SUMM))) {	/* we need a sequence */
      if (bbp->seq->aa1b == NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) {
	if (!ranlib_done) {	/* we didn't open the library already */
	  if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
	    fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name);
	    exit(1);
	  }
	  RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
	  ranlib_done = 1;
	}
	n1 = re_getlib(aa1save,
		       (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL, 
		       maxn,m_msp->ldb_info.maxt3,
		       m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
		       &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p);

	aa1 = aa1save;

	if (m_msp->ann_flg==2 && bbp->seq->annot_p==NULL ) {
	  /* get information about this sequence from bline */
	  if (get_annot(m_msp->annot1_sname, m_msp, bline, bbp->seq->n1, &(bbp->seq->annot_p), 1, ppst->debug_lib) > 0) {
	    /* do something with annotation */
	    s_annot_to_aa1a(bbp->n1, bbp->seq->annot_p, m_msp->ann_arr);
	  }
	}
      }
      else {
	n1 = bbp->seq->n1;
	aa1 = bbp->seq->aa1b;
      }

      if (n1 != bbp->n1) {
	fprintf(stderr," *** sequence length conflict %d != %d: %s\n", n1, bbp->n1, bline);
	continue;
      }

      if ( m_msp->stages > 1 && bbp->rst.score[2] == -BIGNUM) { 
	/* this is not typically done unless m_msp->stages > 1 */
	do_opt (aa0[bbp->frame], m_msp->n0, aa1, n1, bbp->frame, ppst, f_str[bbp->frame], &rst);
	bbp->rst.score[2]=rst.score[2];
      }

      if (!bbp->have_ares & 0x1) {
	bbp->a_res = build_ares_code(aa0[bbp->frame], m_msp->n0, aa1, bbp->seq,
				     bbp->frame, &bbp->have_ares,
				     bbp->repeat_thresh, m_msp, ppst, f_str[bbp->frame] );
	best_align_done = 1;
      }
    }	/* end stages > 1 || MX_M9SUMM9 */

    n1tot = (bbp->mseq->n1tot_p) ? *bbp->mseq->n1tot_p : bbp->seq->n1;

    bline_p = bline;
    if (!(m_msp->markx & (MX_M8OUT)) && !strncmp(bline,"gi|",3)) {
      bline_p = strchr(bline+4,'|')+1;
      *(bline_p-1) = 0;
      gi_num = atoi(bline+3);
    }

  /* l_name is used to build an HTML link from the bestscore line to
     the alignment.  It can also be used to discriminate multiple hits
     from the same long sequence.  This requires that fast_pan use -m 6.

     (6-April-2013) Add ability to specify additional alignments with
     link_name;
  */

    SAFE_STRNCPY(l_name,bline_p,sizeof(l_name)); /* get rid of text after second "|" */
    if ((bp=strchr(l_name,' '))!=NULL) *bp=0;
    if ((bp=strchr(&l_name[6],'|'))!=NULL) *bp='\0'; 	/* increase to [6] from [3] to allow longer db names "ref", "unk", */
    if (m_msp->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
    else if (m_msp->nframe > 0 && bbp->frame == 1)
      SAFE_STRNCAT(l_name,"_r",sizeof(l_name));
    if (bbp->mseq->cont-1 > 0) {
      sprintf(tmp_str,":%d",bbp->mseq->cont-1);
      SAFE_STRNCAT(l_name,tmp_str,sizeof(l_name));
    }

    if (m_msp->markx & MX_M8OUT) {
      if ((bp=strchr(bline_p,' '))!=NULL) *bp = '\0';
    }
    else {
      bline_p[m_msp->aln.llen-r_margin]='\0';
      /* check for translated frame info */
      if (m_msp->nframe > -1) bline_p[m_msp->aln.llen-(r_margin+4)]='\0';
    }
    /* now its time to report the summary numbers for all the alignments */

    /* in the next loop, cur_ares_p could be NULL if we haven't done do_walign() */
    cur_ares_p = bbp->a_res;

    first_line = 1;
    do {
      /* if cur_res_p != NULL, then we get rst from a_res->rst
	 Otherwise, it comes from bbp->rst
      */

      if ((!first_line || (have_lalign && !ppst->show_ident)) && cur_ares_p ) {
	rst_p = &cur_ares_p->rst;
      }
      else {
	rst_p = &bbp->rst;
      }

      n1 = bbp->seq->n1;
      l_score0 = rst_p->score[ppst->score_ix];
      lzscore = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void);
      if (ppst->zsflag > 20) {
	lzscore2 = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void2);
      }
      lbits = zs_to_bit(lzscore, m_msp->n0, n1);

      /* *********************************** */
      /* standard "The best scores are" here */
      /* *********************************** */

      if (!(m_msp->markx & (MX_M8OUT + MX_MBLAST2))) {
	if (first_line) {
	  first_line = 0;
	  fprintf (fp, fmt,bline_p,n1tot);
	  if (m_msp->nframe > 2) fprintf (fp, " [%d]", bbp->frame+1);
	  else if (m_msp->nframe >= 0) fprintf(fp," [%c]",(bbp->frame > 0 ?'r':'f'));
	}
	else {
	  fprintf (fp, fmt2,"\n+-");
	}

	if (m_msp->srelv == 1) fprintf (fp, " %4d", rst_p->score[ppst->score_ix]);
	else {
	  if (m_msp->srelv-1 > 0) fprintf (fp, " %4d", rst_p->score[0]);
	  if (m_msp->srelv-1 > 1 || m_msp->stages>1)
	    fprintf (fp, " %4d", rst_p->score[1]);
	  fprintf (fp, " %4d", rst_p->score[ppst->score_ix]);
	}

	if (ppst->zsflag>=0) { 
	  if (m_msp->z_bits==1) {
	    fprintf (fp, " %.1f %s%7.2g%s",lbits,html_pre_E,
		     zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db),
		     html_post_E);
	    if (ppst->zsflag > 20) {
	      fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
	    }
	  }
	  else {
	    fprintf (fp, " %.1f %s%7.2g%s",lzscore,html_pre_E,
		     zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db),
		     html_post_E);
	    if (ppst->zsflag > 20) {
	      fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db));
	    }
	  }
	}
	show_aux(fp,bbp);
      }
      else if (m_msp->markx & MX_M8OUT) {	/* MX_M8OUT -- provide query, library */
	if (first_line) {first_line = 0;}
	fprintf (fp,"%s\t%s",m_msp->qtitle,bline_p);
      }
      else if (m_msp->markx & MX_MBLAST2) {	/* blast "Sequences producing" */ 
	if (first_line) {first_line = 0;}
	fprintf (fp,"%-67s %6.1f    %.1g", bline_p, lbits,
		    zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db));
      }

      if (m_msp->markx & MX_M9SUMM || m_msp->markx & MX_M8OUT) {
	loffset = bbp->seq->l_offset;
	l_off = bbp->seq->l_off;
	aln_p = &cur_ares_p->aln;
	seq_code = cur_ares_p->aln_code;
	seq_code_len = cur_ares_p->aln_code_n;
	ann_code = cur_ares_p->ann_code;
	ann_code_len = cur_ares_p->ann_code_n;

        percent = calc_fpercent_id(100.0,aln_p->nident,aln_p->lc, m_msp->tot_ident, -100.0);

	ngap = cur_ares_p->aln.ngap_q + cur_ares_p->aln.ngap_l;
#ifndef SHOWSIM
	gpercent = calc_fpercent_id(100.0, aln_p->nident, aln_p->lc-ngap, m_msp->tot_ident, -100.0);
#else
	gpercent = calc_fpercent_id(100.0, cur_ares_p->aln.nsim, aln_p->lc, m_msp->tot_ident, -100.0);
#endif	/* SHOWSIM */

	if (m_msp->show_code != SHOW_CODE_ID) {	/* show more complete info than just identity */

	  /*  	calc_astruct(aln_p, cur_ares_p); -- this function
		should not be used after calc_code or any other
		alignment that calculates amax0/amax1 */

	  /* we need the coordinates for annotated SHOW_CODE_ALIGN */
	  calc_coord(m_msp->n0,bbp->seq->n1,
		     m_msp->q_offset + (m_msp->q_off-1) + (m_msp->sq0off-1),
		     loffset + (l_off-1) + (m_msp->sq1off-1),
		     aln_p);

	  /* if (m_msp->markx & MX_HTML) fprintf(fp,"<!-- "); */
	  /*            %_id  %_sim s-w alen an0  ax0  pn0  px0  an1  ax1  pn1  px1 gapq gapl fs  */
	  /*                    alignment    min  max            min  max */
	  /*                    sequence coordinate    min  max            min  max */
	  if (!(m_msp->markx & MX_M8OUT)) {
	    fprintf(fp,"\t%5.3f %5.3f %4d %4d %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %3d %3d %3d",
		    percent/100.0,gpercent/100.0, 
		    cur_ares_p->sw_score,
		    aln_p->lc,
		    aln_p->d_start0,aln_p->d_stop0,
		    aln_p->q_start_off, aln_p->q_end_off,
		    aln_p->d_start1,aln_p->d_stop1,
		    aln_p->l_start_off, aln_p->l_end_off,
		    aln_p->ngap_q,aln_p->ngap_l,aln_p->nfs);
	    if ((m_msp->show_code & SHOW_CODE_ALIGN) == SHOW_CODE_ALIGN
		&& seq_code_len > 0 && seq_code != NULL) {
	      fprintf(fp,"\t%s",seq_code);
	      if (ann_code_len > 0 && ann_code != NULL) {
		fprintf(fp,"\t%s",ann_code);
	      }
	    }
	  }
	  else {	/* MX_M8OUT -- blast order, tab separated */
	    fprintf(fp,"\t%.2f\t%d\t%d\t%d\t%ld\t%ld\t%ld\t%ld\t%.2g\t%.1f",
		    percent,aln_p->lc,aln_p->nmismatch,
		    aln_p->ngap_q + aln_p->ngap_l+aln_p->nfs,
		    aln_p->d_start0, aln_p->d_stop0,
		    aln_p->d_start1, aln_p->d_stop1,
		    zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db),
		    lbits);
	    if ((m_msp->show_code & SHOW_CODE_ALIGN) == SHOW_CODE_ALIGN && seq_code_len > 0 && seq_code != NULL) {
	      fprintf(fp,"\t%s",seq_code);
	      if (ann_code_len > 0 && ann_code != NULL) {
		fprintf(fp,"\t%s",ann_code);
	      }
	    }
	    fprintf(fp,"\n");
	  }
	}
	else {	/* !SHOW_CODE */
#ifdef SHOWSIM
	  fprintf(fp," %5.3f %5.3f %4d", 
		  percent/100.0,
		  (float)aln_p->nsim/(float)aln_p->lc,aln_p->lc);
#else
	  fprintf(fp," %5.3f %4d", percent/100.0,aln_p->lc);
#endif
	  if (m_msp->markx & MX_HTML) {
	    if (cur_ares_p->index > 0) {
	      sprintf(link_name,"%s_%d",l_name, cur_ares_p->index);
	    }
	    else {
	      SAFE_STRNCPY(link_name, l_name, sizeof(l_name));
	    }
	    fprintf(fp," <a href=\"#%s\">align</a>",link_name);
	    link_shown = 1;
	  }
	  if (cur_ares_p->annot_var_s) {
	    fprintf(fp," |Var: %s",cur_ares_p->annot_var_s);
	  }
	  else { link_shown = 0;}
	}
      }
    } while ( cur_ares_p && (cur_ares_p = cur_ares_p->next));

    /*    if ((m_msp->markx & MX_HTML) && !link_shown) fprintf(fp," <a href=\"#%s\">align</a>",l_name); */
    if (!(m_msp->markx & MX_M8OUT)) fprintf(fp, "\n");
    fflush(fp);
  }

  if (quiet==0) {
    printf(" More scores? [0] ");
    fflush(stdout);
    if (fgets(rline,20,stdin)==NULL) exit(0);
    ntmp = 0;
    if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
    if (ntmp<=0) ntmp = 0;
    if (ntmp>0) {
      istart = istop;
      nshow = min(nshow+ntmp, nbest);
      goto l1;
    }
  }	/* end of for (ib) loop */

  if (m_msp->markx & MX_MBLAST2) {fprintf(fp, "\n\n");}

  m_msp->nshow = nshow;	/* save the number of hits displayed for showalign */

  if (best_align_done) { m_msp->align_done = 1;}	/* note that alignments are done */

  if (m_msp->markx & MX_HTML) fprintf(fp,"</pre><hr>\n");
}
Example #6
0
static DEHT * DEHT_initInstance (const char * prefix, char * fileMode, 
			   hashKeyIntoTableFunctionPtr hashfun, hashKeyforEfficientComparisonFunctionPtr validfun)
{
	bool_t filesAlreadyExist = FALSE;
	bool_t errorState = TRUE;
	bool_t deleteFilesOnError = FALSE;

	DEHT * ht = NULL;

	char tempFileMode[MAX_FILE_MODE_LEN] = {0};

	TRACE_FUNC_ENTRY();

	/* sanity */
	CHECK(NULL != prefix);
	CHECK(NULL != fileMode);
	CHECK(NULL != hashfun);
	CHECK(NULL != validfun);

	ht = malloc(sizeof(DEHT));
	CHECK_MSG("malloc", (NULL != ht));

	memset(ht, 0, sizeof(DEHT));

	SAFE_STRNCPY(ht->sKeyfileName, prefix, sizeof(ht->sKeyfileName));
	SAFE_STRNCAT(ht->sKeyfileName, KEY_FILE_EXT, sizeof(ht->sKeyfileName));

	SAFE_STRNCPY(ht->sDatafileName, prefix, sizeof(ht->sKeyfileName));
	SAFE_STRNCAT(ht->sDatafileName, DATA_FILE_EXT, sizeof(ht->sKeyfileName));

	/* Open key file. If file mode begins with 'c', first check that the file does not exist */
	SAFE_STRNCPY(tempFileMode, fileMode, sizeof(tempFileMode));
	if ('c' == tempFileMode[0]) {
		/* we were asked to make sure the files weren't already present first */
		ht->keyFP = fopen(ht->sKeyfileName, "rb");
		if (NULL != ht->keyFP) {
			deleteFilesOnError = FALSE;
			filesAlreadyExist = TRUE;
			fprintf(stderr, "Error: File \"%s\" already exist\n", ht->sKeyfileName);
		}

		ht->dataFP = fopen(ht->sDatafileName, "rb");
		if (NULL != ht->dataFP) {
			deleteFilesOnError = FALSE;
			filesAlreadyExist = TRUE;
			fprintf(stderr, "Error: File \"%s\" already exist\n", ht->sDatafileName);
		}

		/* fail if files already exist */
		CHECK(!filesAlreadyExist);

		/* that check passed. Now modify the file mode back to a standard one */
		tempFileMode[0] = 'w';
		/* From now on, if we fail, we'd like to clean up the files */
		deleteFilesOnError = TRUE;
	}

	/* Open key file */
	ht->keyFP = fopen(ht->sKeyfileName, tempFileMode);
	CHECK_MSG(ht->sKeyfileName, (NULL != ht->keyFP));

	/* Open data file */
	ht->dataFP = fopen(ht->sDatafileName, tempFileMode);
	CHECK_MSG(ht->sDatafileName, (NULL != ht->dataFP));
	
	ht->hashTableOfPointersImageInMemory = NULL;
	ht->hashPointersForLastBlockImageInMemory = NULL;

	ht->hashFunc = hashfun;
	ht->comparisonHashFunc = validfun;

	errorState = FALSE;
	goto LBL_CLEANUP;


LBL_ERROR:
	errorState = TRUE;
	TRACE_FUNC_ERROR();

LBL_CLEANUP:
	if (errorState) {
		if (NULL != ht) {
			DEHT_freeResources(ht, deleteFilesOnError);
		}
		ht = NULL;
	}

	TRACE_FUNC_EXIT();
	return ht;
}