Пример #1
0
main(int argc, char **argv) {

  char *aa0;
  char libstr[MAX_FN];
  char qname[MAX_FN];
  int sq0off;
  int i, n0;
  FILE *fp;
  struct pstruct pst, *ppst;

  /* stuff from initfa.c/h_init() */

  memcpy(qascii,aascii,sizeof(qascii));

  /* initialize a pam matrix */
  ppst = &pst;
  strncpy(ppst->pamfile,"BL50",MAX_FN);
  standard_pam(ppst->pamfile,ppst,0,0);

  /* this is always protein by default */
  ppst->nsq = naa;
  ppst->nsqx = naax;
  for (i=0; i<=ppst->nsqx; i++) {
    ppst->sq[i] = aa[i];
    ppst->hsq[i] = haa[i];
    ppst->sqx[i]=aax[i];	/* sq = aa */
    ppst->hsqx[i]=haax[i];	/* hsq = haa */
  }
  ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';

  if ((aa0 = calloc(MAXTST,sizeof(char)))==NULL) {
    fprintf(stderr,"Cannot allocate aa0\n");
    exit(1);
  }

  initenv(argc, argv, &pst, qname);
  alloc_pam(pst.nsq+1,pst.nsq+1, &pst);
  initpam2(&pst);

  n0 = getseq (qname, qascii, aa0, MAXTST, libstr,&sq0off);

  if (!pst.pam_pssm) {
    fprintf(stderr," ** ERROR ** No -P PSSM provided\n");
  }
  else {
    ppst->pam2p[0] = alloc_pam2p(n0,pst.nsq);
    ppst->pam2p[1] = alloc_pam2p(n0,pst.nsq);
    if ((fp = fopen(pst.pgpfile,"rb"))!=NULL) {
      read_pssm(aa0, n0, pst.nsq, pst.pamscale,fp,ppst);
    }
  }
}
Пример #2
0
int
initpam (char *mfname, struct pstruct *ppst)
{
    char    line[512], *lp;
    int     i, j, iaa, pval, p_i, p_j;
    int l_nsq;
    unsigned char l_sq[MAXSQ+1];
    int ess_tmp, max_val, min_val;
    int have_es = 0;
    FILE   *fmat;

    pam_opts(mfname, ppst);

    if ((fmat = fopen (mfname, "r")) == NULL)
    {
        printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname);
        fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname);
        return 0;
    }

    /* removed because redundant, and causes crash under MacOSX -- because copying on top of itself */
    /*
       SAFE_STRNCPY (ppst->pamfile, mfname, MAX_FN);
    */
    SAFE_STRNCPY(ppst->pam_name, ppst->pamfile, MAX_FN);

    if (ppst->pam_ms) {
        SAFE_STRNCAT(ppst->pam_name,"-MS",MAX_FN-strlen(ppst->pam_name));
    }

    /*
       the size of the alphabet is determined in advance
    */
    ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);

    /*
      look for alphabet line, skipping the comments, alphabet ends up in line[]
    */
    while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#');

    /* transfer the residue line into l_sq[] */
    l_nsq = 1;
    l_sq[0] = '\0';
    for (i=0; i<strlen(line); i++) {
        if (isalpha(line[i]) || line[i] == '*') {
            l_sq[l_nsq++] = line[i];
        }
    }

    /* if we have a DNA matrix, various defaults must be updated,
       particularly pascii, which is used to map the residue ordering
       in the matrix file to the residue ordering used by the
       program */

    if (l_nsq < 20) {
        if (ppst->dnaseq <= SEQT_PROT) {
            ppst->dnaseq = SEQT_DNA;
        }
        ppst->nt_align=1;
        pascii = nascii;	/* use correct DNA mapping, NCBIstdaa by default */
    }

    /* we no-longer re-initialize sascii[], we either use NCBIstdaa
       mapping for protein, or nascii for DNA */

    /* 11-July-2014 -- need to check that alphabet is consistent with pascii */
    /*
    for (i=0; i < l_nsq; i++) {
    }
    */

    /* check for 2D pam  - if not found, allocate it */
    if (!ppst->have_pam2) {
        alloc_pam (MAXSQ+1, MAXSQ+1, ppst);
        ppst->have_pam2 = 1;
    }

    max_val = -1;
    min_val =  1;
    ppst->pam2[0][0][0] = -BIGNUM;
    /* make certain the [0] boundaries are -BIGNUM */
    for (j=1; j < l_nsq; j++) {
        p_j = pascii[l_sq[j]];
        ppst->pam2[0][0][p_j] = ppst->pam2[0][p_j][0] = -BIGNUM;
    }

    /*  read the scoring matrix values */
    for (iaa = 1; iaa < l_nsq; iaa++) {	/* read pam value line */
        p_i = pascii[l_sq[iaa]];
        if (p_i > MAXSQ) {
            fprintf(stderr,"*** error [%s:%d] - residue character %c out of range %d\n",
                    __FILE__, __LINE__, l_sq[iaa], p_i);
            p_i = pascii['X'];
        }
        if (fgets(line,sizeof(line),fmat)==NULL) {
            fprintf (stderr," error reading pam line: %s\n",line);
            exit (1);
        }
        /*     fprintf(stderr,"%d/%d %s",iaa,nsq,line); */
        strtok(line," \t\n");		/* skip the letter (residue) */

        for (j = 1; j < l_nsq; j++) {
            p_j = pascii[l_sq[j]];
            lp=strtok(NULL," \t\n");		/* get the number string */
            pval=ppst->pam2[0][p_i][p_j]=atoi(lp);	/* convert to integer */
            if (pval > max_val) max_val = pval;
            if (pval < min_val) min_val = pval;
        }
    }
    ppst->pam_h = max_val;
    ppst->pam_l = min_val;

    if (ppst->dnaseq==0) {
        pam_sq = apam_sq;
        pam_sq_n = apam_sq_n;
        init_altpam(ppst);
    }
    else {
        pam_sq = npam_sq;
        pam_sq_n = npam_sq_n;
    }

    /* is protein but do not have '*' in alphabet*/
    p_i = pascii['*'];
    p_j = pascii['X'];
    if (!ppst->nt_align && strchr((char *)l_sq,'*')==NULL) {
        /* add it */
        for (i=0; i< l_nsq; i++) {
            ppst->pam2[0][p_i][i] = ppst->pam2[0][p_j][i];
            ppst->pam2[0][i][p_i] = ppst->pam2[0][i][p_j];
        }
    }

    /* make sure that X:X is < 0 if -S */
    if (ppst->ext_sq_set && ppst->pam2[0][p_j][p_j] >= 0) {
        ppst->pam2[0][p_j][p_j] = -1;
    }

    fclose (fmat);
    return 1;
}