main(int argc, char **argv) { char *aa0; char libstr[MAX_FN]; char qname[MAX_FN]; int sq0off; int i, n0; FILE *fp; struct pstruct pst, *ppst; /* stuff from initfa.c/h_init() */ memcpy(qascii,aascii,sizeof(qascii)); /* initialize a pam matrix */ ppst = &pst; strncpy(ppst->pamfile,"BL50",MAX_FN); standard_pam(ppst->pamfile,ppst,0,0); /* this is always protein by default */ ppst->nsq = naa; ppst->nsqx = naax; for (i=0; i<=ppst->nsqx; i++) { ppst->sq[i] = aa[i]; ppst->hsq[i] = haa[i]; ppst->sqx[i]=aax[i]; /* sq = aa */ ppst->hsqx[i]=haax[i]; /* hsq = haa */ } ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0'; if ((aa0 = calloc(MAXTST,sizeof(char)))==NULL) { fprintf(stderr,"Cannot allocate aa0\n"); exit(1); } initenv(argc, argv, &pst, qname); alloc_pam(pst.nsq+1,pst.nsq+1, &pst); initpam2(&pst); n0 = getseq (qname, qascii, aa0, MAXTST, libstr,&sq0off); if (!pst.pam_pssm) { fprintf(stderr," ** ERROR ** No -P PSSM provided\n"); } else { ppst->pam2p[0] = alloc_pam2p(n0,pst.nsq); ppst->pam2p[1] = alloc_pam2p(n0,pst.nsq); if ((fp = fopen(pst.pgpfile,"rb"))!=NULL) { read_pssm(aa0, n0, pst.nsq, pst.pamscale,fp,ppst); } } }
int initpam (char *mfname, struct pstruct *ppst) { char line[512], *lp; int i, j, iaa, pval, p_i, p_j; int l_nsq; unsigned char l_sq[MAXSQ+1]; int ess_tmp, max_val, min_val; int have_es = 0; FILE *fmat; pam_opts(mfname, ppst); if ((fmat = fopen (mfname, "r")) == NULL) { printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname); fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname); return 0; } /* removed because redundant, and causes crash under MacOSX -- because copying on top of itself */ /* SAFE_STRNCPY (ppst->pamfile, mfname, MAX_FN); */ SAFE_STRNCPY(ppst->pam_name, ppst->pamfile, MAX_FN); if (ppst->pam_ms) { SAFE_STRNCAT(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name)); } /* the size of the alphabet is determined in advance */ ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA); /* look for alphabet line, skipping the comments, alphabet ends up in line[] */ while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#'); /* transfer the residue line into l_sq[] */ l_nsq = 1; l_sq[0] = '\0'; for (i=0; i<strlen(line); i++) { if (isalpha(line[i]) || line[i] == '*') { l_sq[l_nsq++] = line[i]; } } /* if we have a DNA matrix, various defaults must be updated, particularly pascii, which is used to map the residue ordering in the matrix file to the residue ordering used by the program */ if (l_nsq < 20) { if (ppst->dnaseq <= SEQT_PROT) { ppst->dnaseq = SEQT_DNA; } ppst->nt_align=1; pascii = nascii; /* use correct DNA mapping, NCBIstdaa by default */ } /* we no-longer re-initialize sascii[], we either use NCBIstdaa mapping for protein, or nascii for DNA */ /* 11-July-2014 -- need to check that alphabet is consistent with pascii */ /* for (i=0; i < l_nsq; i++) { } */ /* check for 2D pam - if not found, allocate it */ if (!ppst->have_pam2) { alloc_pam (MAXSQ+1, MAXSQ+1, ppst); ppst->have_pam2 = 1; } max_val = -1; min_val = 1; ppst->pam2[0][0][0] = -BIGNUM; /* make certain the [0] boundaries are -BIGNUM */ for (j=1; j < l_nsq; j++) { p_j = pascii[l_sq[j]]; ppst->pam2[0][0][p_j] = ppst->pam2[0][p_j][0] = -BIGNUM; } /* read the scoring matrix values */ for (iaa = 1; iaa < l_nsq; iaa++) { /* read pam value line */ p_i = pascii[l_sq[iaa]]; if (p_i > MAXSQ) { fprintf(stderr,"*** error [%s:%d] - residue character %c out of range %d\n", __FILE__, __LINE__, l_sq[iaa], p_i); p_i = pascii['X']; } if (fgets(line,sizeof(line),fmat)==NULL) { fprintf (stderr," error reading pam line: %s\n",line); exit (1); } /* fprintf(stderr,"%d/%d %s",iaa,nsq,line); */ strtok(line," \t\n"); /* skip the letter (residue) */ for (j = 1; j < l_nsq; j++) { p_j = pascii[l_sq[j]]; lp=strtok(NULL," \t\n"); /* get the number string */ pval=ppst->pam2[0][p_i][p_j]=atoi(lp); /* convert to integer */ if (pval > max_val) max_val = pval; if (pval < min_val) min_val = pval; } } ppst->pam_h = max_val; ppst->pam_l = min_val; if (ppst->dnaseq==0) { pam_sq = apam_sq; pam_sq_n = apam_sq_n; init_altpam(ppst); } else { pam_sq = npam_sq; pam_sq_n = npam_sq_n; } /* is protein but do not have '*' in alphabet*/ p_i = pascii['*']; p_j = pascii['X']; if (!ppst->nt_align && strchr((char *)l_sq,'*')==NULL) { /* add it */ for (i=0; i< l_nsq; i++) { ppst->pam2[0][p_i][i] = ppst->pam2[0][p_j][i]; ppst->pam2[0][i][p_i] = ppst->pam2[0][i][p_j]; } } /* make sure that X:X is < 0 if -S */ if (ppst->ext_sq_set && ppst->pam2[0][p_j][p_j] >= 0) { ppst->pam2[0][p_j][p_j] = -1; } fclose (fmat); return 1; }