char *display_domains(char target, struct annot_entry **annot_arr_p, int n_annots) { char *domain_s; char line[MAX_STR]; int i, i_doms, n_domain_s = MAX_LSTR; /* since (currently) annot_var_s is MAX_LSOTR, do the same for domain_s */ if ((domain_s = (char *)calloc(n_domain_s, sizeof(char)))==NULL) { fprintf(stderr,"*** error [%s:%d] *** cannot allocate domain_s[%d]\n",__FILE__, __LINE__,n_domain_s); return NULL; } for (i=0; i < n_annots; i++) { /* annot_arr_p[] has both domains and non domains, but n_domains only counts domains */ if (annot_arr_p[i]->label != '-') continue; sprintf(line, "%cDomain:\t%ld-%ld\t%s\n", target, annot_arr_p[i]->pos+1, annot_arr_p[i]->end+1, annot_arr_p[i]->comment); if (strlen(domain_s) + strlen(line)+1 > n_domain_s) { n_domain_s += n_domain_s/2; domain_s = realloc(domain_s, n_domain_s); } SAFE_STRNCAT(domain_s, line, n_domain_s); } domain_s = realloc(domain_s, (n_domain_s=strlen(domain_s))+1); domain_s[n_domain_s]='\0'; return domain_s; }
static void DEHT_formatFilenames(DEHT * ht, const char * prefix) { TRACE_FUNC_ENTRY(); CHECK(NULL != ht); CHECK(NULL != prefix); SAFE_STRNCPY(ht->sKeyfileName, prefix, sizeof(ht->sKeyfileName)); SAFE_STRNCAT(ht->sKeyfileName, KEY_FILE_EXT, sizeof(ht->sKeyfileName)); SAFE_STRNCPY(ht->sDatafileName, prefix, sizeof(ht->sKeyfileName)); SAFE_STRNCAT(ht->sDatafileName, DATA_FILE_EXT, sizeof(ht->sKeyfileName)); goto LBL_CLEANUP; LBL_ERROR: TRACE_FUNC_ERROR(); /* Fail silently. Our caller will be able to handle this. */ LBL_CLEANUP: TRACE_FUNC_EXIT(); return; }
int initpam (char *mfname, struct pstruct *ppst) { char line[512], *lp; int i, j, iaa, pval, p_i, p_j; int l_nsq; unsigned char l_sq[MAXSQ+1]; int ess_tmp, max_val, min_val; int have_es = 0; FILE *fmat; pam_opts(mfname, ppst); if ((fmat = fopen (mfname, "r")) == NULL) { printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname); fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname); return 0; } /* removed because redundant, and causes crash under MacOSX -- because copying on top of itself */ /* SAFE_STRNCPY (ppst->pamfile, mfname, MAX_FN); */ SAFE_STRNCPY(ppst->pam_name, ppst->pamfile, MAX_FN); if (ppst->pam_ms) { SAFE_STRNCAT(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name)); } /* the size of the alphabet is determined in advance */ ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA); /* look for alphabet line, skipping the comments, alphabet ends up in line[] */ while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#'); /* transfer the residue line into l_sq[] */ l_nsq = 1; l_sq[0] = '\0'; for (i=0; i<strlen(line); i++) { if (isalpha(line[i]) || line[i] == '*') { l_sq[l_nsq++] = line[i]; } } /* if we have a DNA matrix, various defaults must be updated, particularly pascii, which is used to map the residue ordering in the matrix file to the residue ordering used by the program */ if (l_nsq < 20) { if (ppst->dnaseq <= SEQT_PROT) { ppst->dnaseq = SEQT_DNA; } ppst->nt_align=1; pascii = nascii; /* use correct DNA mapping, NCBIstdaa by default */ } /* we no-longer re-initialize sascii[], we either use NCBIstdaa mapping for protein, or nascii for DNA */ /* 11-July-2014 -- need to check that alphabet is consistent with pascii */ /* for (i=0; i < l_nsq; i++) { } */ /* check for 2D pam - if not found, allocate it */ if (!ppst->have_pam2) { alloc_pam (MAXSQ+1, MAXSQ+1, ppst); ppst->have_pam2 = 1; } max_val = -1; min_val = 1; ppst->pam2[0][0][0] = -BIGNUM; /* make certain the [0] boundaries are -BIGNUM */ for (j=1; j < l_nsq; j++) { p_j = pascii[l_sq[j]]; ppst->pam2[0][0][p_j] = ppst->pam2[0][p_j][0] = -BIGNUM; } /* read the scoring matrix values */ for (iaa = 1; iaa < l_nsq; iaa++) { /* read pam value line */ p_i = pascii[l_sq[iaa]]; if (p_i > MAXSQ) { fprintf(stderr,"*** error [%s:%d] - residue character %c out of range %d\n", __FILE__, __LINE__, l_sq[iaa], p_i); p_i = pascii['X']; } if (fgets(line,sizeof(line),fmat)==NULL) { fprintf (stderr," error reading pam line: %s\n",line); exit (1); } /* fprintf(stderr,"%d/%d %s",iaa,nsq,line); */ strtok(line," \t\n"); /* skip the letter (residue) */ for (j = 1; j < l_nsq; j++) { p_j = pascii[l_sq[j]]; lp=strtok(NULL," \t\n"); /* get the number string */ pval=ppst->pam2[0][p_i][p_j]=atoi(lp); /* convert to integer */ if (pval > max_val) max_val = pval; if (pval < min_val) min_val = pval; } } ppst->pam_h = max_val; ppst->pam_l = min_val; if (ppst->dnaseq==0) { pam_sq = apam_sq; pam_sq_n = apam_sq_n; init_altpam(ppst); } else { pam_sq = npam_sq; pam_sq_n = npam_sq_n; } /* is protein but do not have '*' in alphabet*/ p_i = pascii['*']; p_j = pascii['X']; if (!ppst->nt_align && strchr((char *)l_sq,'*')==NULL) { /* add it */ for (i=0; i< l_nsq; i++) { ppst->pam2[0][p_i][i] = ppst->pam2[0][p_j][i]; ppst->pam2[0][i][p_i] = ppst->pam2[0][i][p_j]; } } /* make sure that X:X is < 0 if -S */ if (ppst->ext_sq_set && ppst->pam2[0][p_j][p_j] >= 0) { ppst->pam2[0][p_j][p_j] = -1; } fclose (fmat); return 1; }
void do_url1(FILE *fp, const struct mngmsg *m_msp, const struct pstruct *ppst, char *l_name, int n1, const struct a_struct *aln_p, const char *annot_var_s, const struct annot_str *q_annot_p, const struct annot_str *l_annot_p ) { char my_q_name[200], my_l_name[200], json_l_name[200]; char *db, *bp; char pgm[10], o_pgm[10], lib[MAX_LSTR]; char *tmp_annot_s, *q_domain_s, *l_domain_s, *tmp_domain_s, *etmp_domain_s; int n_tmp_annot_s, n_tmp_domain; long q_offset, l_offset; char *ref_url, *lbp=NULL; char *srch_url, *srch_url1, *dom_url; /* set the database */ if (m_msp->ldb_info.ldnaseq==SEQT_DNA) db="nucleotide"; else db="Protein"; /* set the program type */ if (strncmp(m_msp->f_id0,"rss",3)==0) { strncpy(pgm,"fa",sizeof(pgm)); } else if (strncmp(m_msp->f_id0,"rfx",3)==0) { strncpy(pgm,"fx",sizeof(pgm)); } else { strncpy(pgm,m_msp->f_id0,sizeof(pgm)); } SAFE_STRNCPY(o_pgm, pgm, sizeof(o_pgm)); /* get a library name (probably does not work for %, + abbreviations */ if (m_msp->lname[0]!='%') { SAFE_STRNCPY(lib,m_msp->lname,sizeof(lib)); } else { SAFE_STRNCPY(lib,"%25",sizeof(lib)); SAFE_STRNCAT(lib,&m_msp->lname[1],sizeof(lib)); } lib[sizeof(lib)-1]='\0'; if ((lbp = strchr(l_name,'|'))==NULL) { lbp = l_name; } else { lbp++; } SAFE_STRNCPY(my_q_name,m_msp->qtitle,sizeof(my_q_name)); if ((bp=strchr(my_q_name,' '))!=NULL) *bp='\0'; SAFE_STRNCPY(my_l_name,lbp,sizeof(my_l_name)); if (pgm[0]=='t' || !strcmp(pgm,"fx") || !strcmp(pgm,"fy")==0 ) { if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0'; lbp = &my_l_name[strlen(my_l_name)-2]; if ( *lbp == '_' ) *lbp = '\0'; } /* change the program name for fastx, tfastx, tfasta */ /* fastx returns proteins */ if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));} else if (strcmp(pgm,"ff")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));} else if (pgm[0]=='t') { SAFE_STRNCPY(pgm,"fx",sizeof(pgm)); SAFE_STRNCPY(lib,DEF_PROT_LIB,sizeof(lib)); } fflush(fp); q_offset = aln_p->q_offset; l_offset = aln_p->l_offset; /* set up ref_url, srch_url, srch_url1, dom_url */ fflush(fp); ref_url = getenv("REF_URL"); srch_url = getenv("SRCH_URL"); srch_url1 = getenv("SRCH_URL1"); dom_url = NULL; dom_url = getenv("DOMAIN_PLOT_URL"); if (ref_url || srch_url || srch_url1 || dom_url) { fprintf(fp,"<!-- LINK_START %s -->",l_name); /* REF_URL should provide */ /* "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A> " */ if (ref_url != NULL) {fprintf(fp,ref_url,db,my_l_name);} /* SRCH_URL should provide */ /* "<A HREF=\"http://localhost/fasta_www2/searchfa.cgi?query=%s&db=fasta_www.cgi&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">Re-search database</A> " */ if (srch_url != NULL) { fprintf(fp,srch_url,my_l_name,db,lib,pgm, l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0); } /* SRCH_URL1 should provide: */ /* "<A HREF=\"http://localhost/fasta_www2/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">General re-search</A>\n" */ if (srch_url1 != NULL) { fprintf(fp,srch_url1,my_l_name,db,lib,pgm, l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0); } if (dom_url!=NULL) { if (annot_var_s && annot_var_s[0]) { tmp_annot_s = web_encode(annot_var_s); } else tmp_annot_s = ""; q_domain_s = l_domain_s = NULL; if (q_annot_p && q_annot_p->n_domains > 0 && (q_domain_s = display_domains('q',q_annot_p->s_annot_arr_p, q_annot_p->n_annot))!=NULL) { } if (l_annot_p && l_annot_p->n_domains > 0 && (l_domain_s = display_domains('l',l_annot_p->s_annot_arr_p, l_annot_p->n_annot))!=NULL) { } /* combine domain strings */ n_tmp_domain = 0; if (q_domain_s) n_tmp_domain += strlen(q_domain_s)+1; if (l_domain_s) n_tmp_domain += strlen(l_domain_s)+1; etmp_domain_s = ""; if (n_tmp_domain > 0) { if ((tmp_domain_s=(char *)calloc(n_tmp_domain,sizeof(char)))==NULL) { fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_domain_s[%d]\n", __FILE__, __LINE__,n_tmp_domain); } else { tmp_domain_s[0] = '\0'; if (q_domain_s) SAFE_STRNCAT(tmp_domain_s, q_domain_s, n_tmp_domain); if (l_domain_s) SAFE_STRNCAT(tmp_domain_s, l_domain_s, n_tmp_domain); etmp_domain_s = web_encode(tmp_domain_s); } } /* appropriate format string: */ /* pgm=%s -- program abbrev that created alignment q_name=%s -- query info q_cstart=%ld q_cstop=%ld q_astart=%ld q_astop=%ld l_name=%s -- library info l_cstart=%ld l_cstop=%ld l_astart=%ld l_astop=%ld region=%s -- aligned domain and variant information doms=%s DOMAIN_PLOT_URL = "pgm=%s;q_name=%s;q_cstart=%ld;q_cstop=%ld&q_astart=%ld&q_astop=%ld&l_name=%s&l_cstart=%ld&l_cstop=%ld&l_astart=%ld&l_astop=%ld®ions=%s&doms=%s" */ /* think about the alternative of running a script rather than embedding it */ fprintf(fp,dom_url,o_pgm, my_q_name, q_offset+seq_pos(1,aln_p->qlrev,2),q_offset+seq_pos(m_msp->n0,aln_p->qlrev,2), q_offset+seq_pos(aln_p->amin0+1,aln_p->qlrev,1), q_offset+seq_pos(aln_p->amax0, aln_p->qlrev,2), my_l_name, l_offset+seq_pos(1,aln_p->llrev,2), l_offset+seq_pos(n1,aln_p->llrev,2), l_offset+seq_pos(aln_p->amin1+1,aln_p->llrev,1),l_offset+seq_pos(aln_p->amax1,aln_p->llrev,2), tmp_annot_s, etmp_domain_s); if (n_tmp_domain>0 && tmp_domain_s) { free(tmp_domain_s); free(etmp_domain_s); } if (l_annot_p && l_annot_p->n_domains && l_domain_s) { free(l_domain_s); } if (q_annot_p && q_annot_p->n_domains && q_domain_s) { free(q_domain_s); } if (annot_var_s && annot_var_s[0] && tmp_annot_s) free(tmp_annot_s); } fprintf(fp,"\n<!-- LINK_STOP -->"); fflush(fp); } /* if ((srch_url2 = getenv("SRCH_URL2"))==NULL) fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1); else fprintf(fp,srch_url1,my_l_name,db,lib,pgm, l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1); */ if (getenv("JSON_HTML")) { /* replace '|' with '_' */ SAFE_STRNCPY(json_l_name, l_name, sizeof(json_l_name)); for (bp=strchr(json_l_name,'|'); bp; bp=strchr(bp+1,'|')) { *bp = '_'; } /* replace '.' with '_' */ for (bp=strchr(json_l_name,'.'); bp; bp=strchr(bp+1,'.')) { *bp = '_'; } fprintf(fp,"\n<script type=\"text/javascript\">\n//<![CDATA[\n var json_%s = {\n",json_l_name); encode_json_str(fp, "db", db, 1); encode_json_str(fp, "l_acc", l_name, 0); encode_json_str(fp, "acc", my_l_name, 0); encode_json_str(fp, "lib", lib, 0); encode_json_str(fp, "pgm", pgm, 0); encode_json_str(fp, "o_pgm", m_msp->f_id0, 0); encode_json_aln(fp, aln_p, q_offset, l_offset, 0); if (annot_var_s && annot_var_s[0]) { encode_json_lines(fp, "annot", annot_var_s, 0); } if (q_annot_p && q_annot_p->n_domains > 0) { encode_json_domains(fp, "q_domains", q_annot_p, 0); } if (l_annot_p && l_annot_p->n_domains > 0) { encode_json_domains(fp, "l_domains", l_annot_p, 0); } fprintf(fp, "\n}\n//]]>\n</script>"); fflush(fp); } }
void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn, struct beststr **bptr,int nbest, int qlib, struct mngmsg *m_msp, struct pstruct *ppst, struct db_str db, char **info_gstring2 ,void **f_str ) { unsigned char *aa1; int best_align_done = 0; int ntmp = 0; char bline[MAX_BLINE], fmt[40], pad[MAX_BLINE], fmt2[40], rline[40]; char l_name[128], link_name[140]; int istart = 0, istop, ib; int nshow; /* number of sequences shown before prompt, and ultimately displayed */ int first_line, link_shown; int quiet; int r_margin; struct beststr *bbp; int n1tot; char *bp, *bline_p; char rel_label[12]; char score_label[120]; char tmp_str[20], *seq_code, *ann_code; int seq_code_len, ann_code_len; long loffset; /* loffset is offset from beginning of real sequence */ long l_off; /* l_off is the the virtual coordinate of residue 1 */ int n1, ranlib_done; struct rstruct rst; int l_score0, ngap; double lzscore, lzscore2, lbits; float percent, gpercent; struct a_struct *aln_p; struct a_res_str *cur_ares_p; struct rstruct *rst_p; int gi_num; char html_pre_E[120], html_post_E[120]; int have_lalign = 0; struct lmf_str *m_fptr; /* for lalign alignments, only show stuff when -m != 11 */ if (m_msp->markx & MX_M11OUT) return; if (strcmp(m_msp->label,"ls-w")==0) { have_lalign = 1; if ((m_msp->markx & MX_M9SUMM) == 0) return; } rel_label[0]='\0'; SAFE_STRNCPY(score_label,"scores", sizeof(score_label)); quiet = m_msp->quiet; if (m_msp->aln.llen > MAX_BLINE) m_msp->aln.llen = MAX_BLINE; if (ppst->zsflag < 0) r_margin = 10; else if (ppst->zsflag>=0 && m_msp->srelv > 1 ) r_margin = 19; else r_margin = 10; if (m_msp->markx & MX_M9SUMM && m_msp->show_code == SHOW_CODE_ID) { #ifdef SHOWSIM r_margin += 15; #else r_margin += 10; #endif } else if (m_msp->markx & MX_MBLAST2) { r_margin -= 10; } else if (m_msp->markx & (MX_M9SUMM + MX_M8OUT)) { r_margin = 0; } if (m_msp->markx & MX_HTML) { strncpy(html_pre_E,"<font color=\"darkred\">",sizeof(html_pre_E)); strncpy(html_post_E,"</font>",sizeof(html_post_E)); } else { html_pre_E[0] = html_post_E[0] = '\0'; } if (m_msp->nframe < 0) { sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-r_margin); } else { sprintf(fmt,"%%-%ds (%%4d)",m_msp->aln.llen-(r_margin+4)); } sprintf(fmt2,"%%-%ds",m_msp->aln.llen-r_margin+8); memset(pad,' ',m_msp->aln.llen-(r_margin+6)); pad[m_msp->aln.llen-(r_margin+12)]='\0'; if (have_lalign) { if (ppst->show_ident) { SAFE_STRNCPY(score_label,"alignments", sizeof(score_label)); pad[m_msp->aln.llen-(r_margin+16)]='\0'; } else { SAFE_STRNCPY(score_label,"non-identical alignments", sizeof(score_label)); pad[m_msp->aln.llen-(r_margin+30)]='\0'; } } nshow = min(m_msp->nshow,nbest); if ((bp = strchr (m_msp->qtitle, '\n')) != NULL) *bp = '\0'; if (m_msp->markx & MX_M8OUT) { if ((bp = strchr (m_msp->qtitle, ' ')) != NULL) *bp = '\0'; } /* fprintf (fp, "%3d %s\n", qlib,m_msp->qtitle); */ if (m_msp->markx & MX_HTML) fprintf(fp,"<pre>"); /* **************************************************************** */ /* done with display format */ /* **************************************************************** */ /* **************************************************************** */ /* prompt for number of best scores if quiet == 0 */ /* **************************************************************** */ if (quiet == 0) { /* interactive */ nshow = min(m_msp->nshow, nbest); printf(" How many scores would you like to see? [%d] ",nshow); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow); if (nshow > nbest) nshow=nbest; if (nshow<=0) nshow = min(20,nbest); } /* display number of hits for -m 8C (Blast Tab-commented format) */ if (m_msp->markx & MX_M8COMMENT) { /* line below copied from BLAST+ output */ fprintf(fp,"# Fields: query id, subject id, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); if (m_msp->show_code == SHOW_CODE_ALIGN || m_msp->show_code == SHOW_CODE_CIGAR) { fprintf(fp," aln_code");} fprintf(fp,"\n"); fprintf(fp,"# %d hits found\n",nshow); } /* **************************************************************** */ /* have number of scores in interactive or quiet mode */ /* display "The best scores are" */ /* **************************************************************** */ if (m_msp->markx & MX_MBLAST2) { fprintf(fp, "%81s\n"," Score E"); fprintf(fp, "Sequences producing significant alignments: (Bits) Value\n\n"); } else if (!(m_msp->markx & MX_M8OUT)) { if (ppst->zsflag >= 0) { if (m_msp->z_bits==1) {/* show bit score */ fprintf(fp,"\nThe best%s %s are:%s%s bits %sE(%ld)%s", rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E); if (ppst->zsflag > 20) { fprintf(fp," E2()"); } } else {/* show z-score */ fprintf(fp,"\nThe best%s %s are:%s%s z-sc %sE(%ld)%s", rel_label,score_label,pad,m_msp->label,html_pre_E,ppst->zdb_size,html_post_E); if (ppst->zsflag > 20) { fprintf(fp," E2()"); } } header_aux(fp); if (m_msp->markx & MX_M9SUMM) { if (m_msp->show_code == SHOW_CODE_ID) { #ifdef SHOWSIM fprintf(fp," %%_id %%_sim alen"); #else fprintf(fp," %%_id alen"); #endif } else { if (m_msp->markx & MX_HTML && m_msp->show_code !=1) { fprintf(fp,"<!-- ");} #ifndef SHOWSIM fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msp->f_id1); #else fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msp->f_id1); #endif } if (m_msp->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); } if (m_msp->markx & MX_HTML && m_msp->show_code!=1) { fprintf(fp," -->");} } fprintf(fp,"\n"); } else { fprintf(fp,"\nThe best%s %s are:%s%s",rel_label,score_label,pad,m_msp->label); header_aux(fp); if (m_msp->markx & MX_M9SUMM) { if (m_msp->show_code == SHOW_CODE_ID) { #ifdef SHOWSIM fprintf(fp," %%_id %%_sm alen"); #else fprintf(fp," %%_id alen"); #endif } else { #ifndef SHOWSIM fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msp->f_id1); #else fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msp->f_id1); #endif /* SHOWSIM */ } } if (m_msp->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); } fprintf(fp,"\n"); } } /* !(m_msp->markx & MX_M8OUT) */ istart = 0; l1: istop = min(nshow, nbest); for (ib=istart; ib<istop; ib++) { bbp = bptr[ib]; if (ppst->do_rep) { bbp->repeat_thresh = min(E1_to_s(ppst->e_cut_r, m_msp->n0, bbp->seq->n1,ppst->zdb_size, m_msp->pstat_void), bbp->rst.score[ppst->score_ix]); } #ifdef DEBUG if (bbp->seq->n1 != bbp->n1 ) { fprintf(stderr, " *** lib len error [%d!=%d] *** %s score %d\n", bbp->seq->n1,bbp->n1, bbp->mseq->libstr, bbp->rst.score[0]); } #endif /* this gets us a valid bline[] and the library for searching if necessary do not read if we have a long enough bline or we don't need a sequence */ if (bbp->mseq->bline != NULL && bbp->mseq->bline_max >= m_msp->aln.llen) { ranlib_done = 0; /* copy m_msp->aln.llen, not llen-r_margin, because the r_margin will be set later, possibly after the gi|12345 is removed */ strncpy(bline,bbp->mseq->bline,m_msp->aln.llen); bline[m_msp->aln.llen]='\0'; } else { if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) { fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name); exit(1); } RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr); ranlib_done = 1; } /* get a valid cur_ares_p chain and put it in bbp->ares */ if (!m_msp->align_done && (m_msp->stages>1 || (m_msp->markx & MX_M9SUMM))) { /* we need a sequence */ if (bbp->seq->aa1b == NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) { if (!ranlib_done) { /* we didn't open the library already */ if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) { fprintf(stderr,"*** cannot re-open %s\n",bbp->mseq->m_file_p->lb_name); exit(1); } RANLIB(bline,m_msp->aln.llen,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr); ranlib_done = 1; } n1 = re_getlib(aa1save, (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL, maxn,m_msp->ldb_info.maxt3, m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code, &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p); aa1 = aa1save; if (m_msp->ann_flg==2 && bbp->seq->annot_p==NULL ) { /* get information about this sequence from bline */ if (get_annot(m_msp->annot1_sname, m_msp, bline, bbp->seq->n1, &(bbp->seq->annot_p), 1, ppst->debug_lib) > 0) { /* do something with annotation */ s_annot_to_aa1a(bbp->n1, bbp->seq->annot_p, m_msp->ann_arr); } } } else { n1 = bbp->seq->n1; aa1 = bbp->seq->aa1b; } if (n1 != bbp->n1) { fprintf(stderr," *** sequence length conflict %d != %d: %s\n", n1, bbp->n1, bline); continue; } if ( m_msp->stages > 1 && bbp->rst.score[2] == -BIGNUM) { /* this is not typically done unless m_msp->stages > 1 */ do_opt (aa0[bbp->frame], m_msp->n0, aa1, n1, bbp->frame, ppst, f_str[bbp->frame], &rst); bbp->rst.score[2]=rst.score[2]; } if (!bbp->have_ares & 0x1) { bbp->a_res = build_ares_code(aa0[bbp->frame], m_msp->n0, aa1, bbp->seq, bbp->frame, &bbp->have_ares, bbp->repeat_thresh, m_msp, ppst, f_str[bbp->frame] ); best_align_done = 1; } } /* end stages > 1 || MX_M9SUMM9 */ n1tot = (bbp->mseq->n1tot_p) ? *bbp->mseq->n1tot_p : bbp->seq->n1; bline_p = bline; if (!(m_msp->markx & (MX_M8OUT)) && !strncmp(bline,"gi|",3)) { bline_p = strchr(bline+4,'|')+1; *(bline_p-1) = 0; gi_num = atoi(bline+3); } /* l_name is used to build an HTML link from the bestscore line to the alignment. It can also be used to discriminate multiple hits from the same long sequence. This requires that fast_pan use -m 6. (6-April-2013) Add ability to specify additional alignments with link_name; */ SAFE_STRNCPY(l_name,bline_p,sizeof(l_name)); /* get rid of text after second "|" */ if ((bp=strchr(l_name,' '))!=NULL) *bp=0; if ((bp=strchr(&l_name[6],'|'))!=NULL) *bp='\0'; /* increase to [6] from [3] to allow longer db names "ref", "unk", */ if (m_msp->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1); else if (m_msp->nframe > 0 && bbp->frame == 1) SAFE_STRNCAT(l_name,"_r",sizeof(l_name)); if (bbp->mseq->cont-1 > 0) { sprintf(tmp_str,":%d",bbp->mseq->cont-1); SAFE_STRNCAT(l_name,tmp_str,sizeof(l_name)); } if (m_msp->markx & MX_M8OUT) { if ((bp=strchr(bline_p,' '))!=NULL) *bp = '\0'; } else { bline_p[m_msp->aln.llen-r_margin]='\0'; /* check for translated frame info */ if (m_msp->nframe > -1) bline_p[m_msp->aln.llen-(r_margin+4)]='\0'; } /* now its time to report the summary numbers for all the alignments */ /* in the next loop, cur_ares_p could be NULL if we haven't done do_walign() */ cur_ares_p = bbp->a_res; first_line = 1; do { /* if cur_res_p != NULL, then we get rst from a_res->rst Otherwise, it comes from bbp->rst */ if ((!first_line || (have_lalign && !ppst->show_ident)) && cur_ares_p ) { rst_p = &cur_ares_p->rst; } else { rst_p = &bbp->rst; } n1 = bbp->seq->n1; l_score0 = rst_p->score[ppst->score_ix]; lzscore = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void); if (ppst->zsflag > 20) { lzscore2 = find_z(l_score0, rst_p->escore, n1, rst_p->comp, m_msp->pstat_void2); } lbits = zs_to_bit(lzscore, m_msp->n0, n1); /* *********************************** */ /* standard "The best scores are" here */ /* *********************************** */ if (!(m_msp->markx & (MX_M8OUT + MX_MBLAST2))) { if (first_line) { first_line = 0; fprintf (fp, fmt,bline_p,n1tot); if (m_msp->nframe > 2) fprintf (fp, " [%d]", bbp->frame+1); else if (m_msp->nframe >= 0) fprintf(fp," [%c]",(bbp->frame > 0 ?'r':'f')); } else { fprintf (fp, fmt2,"\n+-"); } if (m_msp->srelv == 1) fprintf (fp, " %4d", rst_p->score[ppst->score_ix]); else { if (m_msp->srelv-1 > 0) fprintf (fp, " %4d", rst_p->score[0]); if (m_msp->srelv-1 > 1 || m_msp->stages>1) fprintf (fp, " %4d", rst_p->score[1]); fprintf (fp, " %4d", rst_p->score[ppst->score_ix]); } if (ppst->zsflag>=0) { if (m_msp->z_bits==1) { fprintf (fp, " %.1f %s%7.2g%s",lbits,html_pre_E, zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db), html_post_E); if (ppst->zsflag > 20) { fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db)); } } else { fprintf (fp, " %.1f %s%7.2g%s",lzscore,html_pre_E, zs_to_E(lzscore, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db), html_post_E); if (ppst->zsflag > 20) { fprintf (fp, " %7.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db)); } } } show_aux(fp,bbp); } else if (m_msp->markx & MX_M8OUT) { /* MX_M8OUT -- provide query, library */ if (first_line) {first_line = 0;} fprintf (fp,"%s\t%s",m_msp->qtitle,bline_p); } else if (m_msp->markx & MX_MBLAST2) { /* blast "Sequences producing" */ if (first_line) {first_line = 0;} fprintf (fp,"%-67s %6.1f %.1g", bline_p, lbits, zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db)); } if (m_msp->markx & MX_M9SUMM || m_msp->markx & MX_M8OUT) { loffset = bbp->seq->l_offset; l_off = bbp->seq->l_off; aln_p = &cur_ares_p->aln; seq_code = cur_ares_p->aln_code; seq_code_len = cur_ares_p->aln_code_n; ann_code = cur_ares_p->ann_code; ann_code_len = cur_ares_p->ann_code_n; percent = calc_fpercent_id(100.0,aln_p->nident,aln_p->lc, m_msp->tot_ident, -100.0); ngap = cur_ares_p->aln.ngap_q + cur_ares_p->aln.ngap_l; #ifndef SHOWSIM gpercent = calc_fpercent_id(100.0, aln_p->nident, aln_p->lc-ngap, m_msp->tot_ident, -100.0); #else gpercent = calc_fpercent_id(100.0, cur_ares_p->aln.nsim, aln_p->lc, m_msp->tot_ident, -100.0); #endif /* SHOWSIM */ if (m_msp->show_code != SHOW_CODE_ID) { /* show more complete info than just identity */ /* calc_astruct(aln_p, cur_ares_p); -- this function should not be used after calc_code or any other alignment that calculates amax0/amax1 */ /* we need the coordinates for annotated SHOW_CODE_ALIGN */ calc_coord(m_msp->n0,bbp->seq->n1, m_msp->q_offset + (m_msp->q_off-1) + (m_msp->sq0off-1), loffset + (l_off-1) + (m_msp->sq1off-1), aln_p); /* if (m_msp->markx & MX_HTML) fprintf(fp,"<!-- "); */ /* %_id %_sim s-w alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs */ /* alignment min max min max */ /* sequence coordinate min max min max */ if (!(m_msp->markx & MX_M8OUT)) { fprintf(fp,"\t%5.3f %5.3f %4d %4d %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %3d %3d %3d", percent/100.0,gpercent/100.0, cur_ares_p->sw_score, aln_p->lc, aln_p->d_start0,aln_p->d_stop0, aln_p->q_start_off, aln_p->q_end_off, aln_p->d_start1,aln_p->d_stop1, aln_p->l_start_off, aln_p->l_end_off, aln_p->ngap_q,aln_p->ngap_l,aln_p->nfs); if ((m_msp->show_code & SHOW_CODE_ALIGN) == SHOW_CODE_ALIGN && seq_code_len > 0 && seq_code != NULL) { fprintf(fp,"\t%s",seq_code); if (ann_code_len > 0 && ann_code != NULL) { fprintf(fp,"\t%s",ann_code); } } } else { /* MX_M8OUT -- blast order, tab separated */ fprintf(fp,"\t%.2f\t%d\t%d\t%d\t%ld\t%ld\t%ld\t%ld\t%.2g\t%.1f", percent,aln_p->lc,aln_p->nmismatch, aln_p->ngap_q + aln_p->ngap_l+aln_p->nfs, aln_p->d_start0, aln_p->d_stop0, aln_p->d_start1, aln_p->d_stop1, zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db), lbits); if ((m_msp->show_code & SHOW_CODE_ALIGN) == SHOW_CODE_ALIGN && seq_code_len > 0 && seq_code != NULL) { fprintf(fp,"\t%s",seq_code); if (ann_code_len > 0 && ann_code != NULL) { fprintf(fp,"\t%s",ann_code); } } fprintf(fp,"\n"); } } else { /* !SHOW_CODE */ #ifdef SHOWSIM fprintf(fp," %5.3f %5.3f %4d", percent/100.0, (float)aln_p->nsim/(float)aln_p->lc,aln_p->lc); #else fprintf(fp," %5.3f %4d", percent/100.0,aln_p->lc); #endif if (m_msp->markx & MX_HTML) { if (cur_ares_p->index > 0) { sprintf(link_name,"%s_%d",l_name, cur_ares_p->index); } else { SAFE_STRNCPY(link_name, l_name, sizeof(l_name)); } fprintf(fp," <a href=\"#%s\">align</a>",link_name); link_shown = 1; } if (cur_ares_p->annot_var_s) { fprintf(fp," |Var: %s",cur_ares_p->annot_var_s); } else { link_shown = 0;} } } } while ( cur_ares_p && (cur_ares_p = cur_ares_p->next)); /* if ((m_msp->markx & MX_HTML) && !link_shown) fprintf(fp," <a href=\"#%s\">align</a>",l_name); */ if (!(m_msp->markx & MX_M8OUT)) fprintf(fp, "\n"); fflush(fp); } if (quiet==0) { printf(" More scores? [0] "); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); ntmp = 0; if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp); if (ntmp<=0) ntmp = 0; if (ntmp>0) { istart = istop; nshow = min(nshow+ntmp, nbest); goto l1; } } /* end of for (ib) loop */ if (m_msp->markx & MX_MBLAST2) {fprintf(fp, "\n\n");} m_msp->nshow = nshow; /* save the number of hits displayed for showalign */ if (best_align_done) { m_msp->align_done = 1;} /* note that alignments are done */ if (m_msp->markx & MX_HTML) fprintf(fp,"</pre><hr>\n"); }
static DEHT * DEHT_initInstance (const char * prefix, char * fileMode, hashKeyIntoTableFunctionPtr hashfun, hashKeyforEfficientComparisonFunctionPtr validfun) { bool_t filesAlreadyExist = FALSE; bool_t errorState = TRUE; bool_t deleteFilesOnError = FALSE; DEHT * ht = NULL; char tempFileMode[MAX_FILE_MODE_LEN] = {0}; TRACE_FUNC_ENTRY(); /* sanity */ CHECK(NULL != prefix); CHECK(NULL != fileMode); CHECK(NULL != hashfun); CHECK(NULL != validfun); ht = malloc(sizeof(DEHT)); CHECK_MSG("malloc", (NULL != ht)); memset(ht, 0, sizeof(DEHT)); SAFE_STRNCPY(ht->sKeyfileName, prefix, sizeof(ht->sKeyfileName)); SAFE_STRNCAT(ht->sKeyfileName, KEY_FILE_EXT, sizeof(ht->sKeyfileName)); SAFE_STRNCPY(ht->sDatafileName, prefix, sizeof(ht->sKeyfileName)); SAFE_STRNCAT(ht->sDatafileName, DATA_FILE_EXT, sizeof(ht->sKeyfileName)); /* Open key file. If file mode begins with 'c', first check that the file does not exist */ SAFE_STRNCPY(tempFileMode, fileMode, sizeof(tempFileMode)); if ('c' == tempFileMode[0]) { /* we were asked to make sure the files weren't already present first */ ht->keyFP = fopen(ht->sKeyfileName, "rb"); if (NULL != ht->keyFP) { deleteFilesOnError = FALSE; filesAlreadyExist = TRUE; fprintf(stderr, "Error: File \"%s\" already exist\n", ht->sKeyfileName); } ht->dataFP = fopen(ht->sDatafileName, "rb"); if (NULL != ht->dataFP) { deleteFilesOnError = FALSE; filesAlreadyExist = TRUE; fprintf(stderr, "Error: File \"%s\" already exist\n", ht->sDatafileName); } /* fail if files already exist */ CHECK(!filesAlreadyExist); /* that check passed. Now modify the file mode back to a standard one */ tempFileMode[0] = 'w'; /* From now on, if we fail, we'd like to clean up the files */ deleteFilesOnError = TRUE; } /* Open key file */ ht->keyFP = fopen(ht->sKeyfileName, tempFileMode); CHECK_MSG(ht->sKeyfileName, (NULL != ht->keyFP)); /* Open data file */ ht->dataFP = fopen(ht->sDatafileName, tempFileMode); CHECK_MSG(ht->sDatafileName, (NULL != ht->dataFP)); ht->hashTableOfPointersImageInMemory = NULL; ht->hashPointersForLastBlockImageInMemory = NULL; ht->hashFunc = hashfun; ht->comparisonHashFunc = validfun; errorState = FALSE; goto LBL_CLEANUP; LBL_ERROR: errorState = TRUE; TRACE_FUNC_ERROR(); LBL_CLEANUP: if (errorState) { if (NULL != ht) { DEHT_freeResources(ht, deleteFilesOnError); } ht = NULL; } TRACE_FUNC_EXIT(); return ht; }