コード例 #1
0
ファイル: Word.c プロジェクト: ACRMGroup/bioplib
/*>void blWord(char *string1, char *string2)
   -----------------------------------------
*//**

   \param[in]     *string1       A string
   \param[out]    *string2       A new string

   Removes leading spaces and extracts a space/tab delimited word. string2
   must have the same amount of space as string1.

-  20.03.91 Original
-  28.05.92 ANSIed
-  22.06.92 Added tab check.
-  29.03.01 Changed to use KILLLEADSPACES macro
-  07.07.14 Use bl prefix for functions By: CTP
*/
void blWord(char  *string1,
            char  *string2)
{
   int j;
   char *str;
   
   KILLLEADSPACES(str, string1);
   strcpy(string2,str);
   
   for(j=0;j<strlen(string2);j++)
   {
      if(string2[j] == ' ' || string2[j] == '\t')
      {
         string2[j] = '\0';
         break;
      }
   }
}
コード例 #2
0
ファイル: ParseRes.c プロジェクト: SavOK/bioplib
/*>BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum, 
                         char *insert, BOOL uppercaseresspec)
   -------------------------------------------------------------
*//**

   \param[in]     *inSpec              Residue specification
   \param[out]    *chain               Chain label
   \param[out]    *resnum              Residue number
   \param[out]    *insert              Insert label
   \param[in]     uppercaseresspec     Convert spec to upper case.
   \return                             Success?

   Note that chain and insert must be arrays of at least 2 characters,
   not character pointers

   Splits up a residue specification of the form 
         [c][.]num[i]
   into chain, resnum and insert. Chain and insert are optional and will
   be set to spaces if not specified. If uppercaseresspec equals TRUE,
   the spec is upper cased before processing
   
   Multi-letter chain IDs can be parsed. Additionally, chain IDs with 
   numerical characters can be parsed if a period is used to separate the 
   chain from the residue number.
   
-  21.07.93 Original    By: ACRM
-  17.07.95 Added BOOL return
-  18.03.98 Added option to include a . to separate chain and residue
            number so numeric chain names can be used
-  29.09.05 Moved this code to from ParseResSpec() to DoParseResSpec()
            and made that function just call this new function.
            This move is to allow this underlying function to have an
            extra parameter to specify whether or not the residue
            specification should be upper cased (without affecting code
            that calls the old function). By: TL
-  12.10.12 insert is now a properly terminated string when there is
            no insert
-  28.08.12 chain  is now a properly terminated string
            The input specification is now copied so that actual strings
            can be passed into the routine as opposed to string delimited
            variables. This also removes the need for restoring the 
            string which has now been removed
-  26.02.14 Parsing handles multi-letter chains and numerical chain IDs.
            The "Extract chain from spec" section was re-written.
            If the period separator between the chain id and the residue
            number is absent then the chain id is set from any non-numeric
            lead characters. By: CTP
-  07.07.14 Use bl prefix for functions By: CTP
*/
BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum,
                      char *insert, BOOL uppercaseresspec)
{
   char  *ptr,
         *ptr2,
         spec[64];
   BOOL  /* DoRestore = FALSE, */
         retval    = TRUE,
         chain_found = FALSE;
   int   i;

   strncpy(spec, inSpec, 64);

   /* 11.10.99 Default resnum of 0                                      */
   *resnum = 0;

   /* Upper case the residue specification if it has been requested     */
   if (uppercaseresspec == TRUE)
   {
      UPPER(spec);   
   }
   KILLLEADSPACES(ptr, spec);
     
   /* Extract chain from spec.                   Added 26.02.14 By: CTP */

   /* Extract chain from spec (dot format)                              */
   for(ptr2=ptr,i=0;*ptr2;ptr2++,i++)
   {
      if(*ptr2 == '.')
      {
         /* set chain */
         if(i > 0)
         {
            strncpy(chain,ptr,i);
            chain[i] = '\0';
         }
         else 
         {
            strcpy(chain," ");
         }
         
         chain_found = TRUE;
         ptr = ptr2 + 1; /* update start point */
         break;
      }
   }
   
   /* Extract chain from spec (non-numeric lead characters)             */
   if(chain_found == FALSE)
   {
      for(ptr2=ptr,i=0;*ptr2;ptr2++,i++)
      {
         if(!isdigit(*ptr2) && (*ptr2 != '-'))
         {
            chain[i]    = *ptr2;
            chain[i+1]  = '\0';
            chain_found = TRUE;
            ptr = ptr2 + 1; /* update start point */
         }
         else
         {
            break;
         }
      }
   }
   
   /* Extract chain from spec (set chain to space)                      */
   if(chain_found == FALSE)
   {
      strcpy(chain," ");
   }

   
   /* Extract insert from spec                                          */
   insert[0] = ' ';
   insert[1] = '\0';  /* Added 12.10.12                                 */
   
   for(ptr2 = ptr; *ptr2; ptr2++)
   {
      /* 11.10.99 Now also checks that it isn't a - as the first 
         character 
      */
      if(!isdigit(*ptr2) && ((ptr2!=ptr)||(*ptr2 != '-')))
      {
         insert[0] = *ptr2;
         insert[1] = '\0';
         *ptr2   = '\0';
/*         DoRestore = TRUE; */
         break;
      }
   }
   
   /* Extract residue number from spec                                  */
   if(sscanf(ptr,"%d",resnum) == 0)
      retval = FALSE;

/*   if(DoRestore) */
/*   { */
      /* V1.1: Restore the original string                              */
/*      *ptr2 = *insert; */
/*   } */

   return(retval);
}
コード例 #3
0
ファイル: align.c プロジェクト: frederic-mahe/bioplib
/*>BOOL blReadMDM(char *mdmfile)
   -----------------------------
*//**

   \param[in]     *mdmfile    Mutation data matrix filename
   \return                      Success?
   
   Read mutation data matrix into static global arrays. The matrix may
   have comments at the start introduced with a ! in the first column.
   The matrix must be complete (i.e. a triangular matrix will not
   work). A line describing the residue types must appear, and may
   be placed before or after the matrix itself

-  07.10.92 Original
-  18.03.94 getc() -> fgetc()
-  24.11.94 Automatically looks in DATAENV if not found in current 
            directory
-  28.02.95 Modified to read any size MDM and allow comments
            Also allows the list of aa types before or after the actual
            matrix
-  26.07.95 Removed unused variables
-  06.02.03 Fixed for new version of GetWord()
-  07.04.09 Completely re-written to allow it to read BLAST style matrix
            files as well as the ones used previously
            Allow comments introduced with # as well as !
            Uses MAXWORD rather than hardcoded 16
-  07.07.14 Use bl prefix for functions By: CTP
*/
BOOL blReadMDM(char *mdmfile)
{
   FILE *mdm = NULL;
   int  i, j, k, row, tmpStoreSize;
   char buffer[MAXBUFF],
        word[MAXWORD],
        *p,
        **tmpStore;
   BOOL noenv;

   if((mdm=blOpenFile(mdmfile, DATAENV, "r", &noenv))==NULL)
   {
      return(FALSE);
   }

   /* First read the file to determine the dimensions                   */
   while(fgets(buffer,MAXBUFF,mdm))
   {
      TERMINATE(buffer);
      KILLLEADSPACES(p,buffer);

      /* First line which is non-blank and non-comment                  */
      if(strlen(p) && p[0] != '!' && p[0] != '#')
      {
         sMDMSize = 0;
         for(p = buffer; p!=NULL;)
         {
            p = blGetWord(p, word, MAXWORD);
            /* Increment counter if this is numeric                     */
            if(isdigit(word[0]) || 
               ((word[0] == '-')&&(isdigit(word[1]))))
               sMDMSize++;
         }
         if(sMDMSize)
            break;
      }
   }

   /* Allocate memory for the MDM and the AA List                       */
   if((sMDMScore = (int **)blArray2D(sizeof(int),sMDMSize,sMDMSize))==NULL)
      return(FALSE);
   if((sMDM_AAList = (char *)malloc((sMDMSize+1)*sizeof(char)))==NULL)
   {
      blFreeArray2D((char **)sMDMScore, sMDMSize, sMDMSize);
      return(FALSE);
   }

   /* Allocate temporary storage for a row from the matrix              */
   tmpStoreSize = 2*sMDMSize;
   if((tmpStore = (char **)blArray2D(sizeof(char), tmpStoreSize, MAXWORD))
      ==NULL)
   {
      free(sMDM_AAList);
      blFreeArray2D((char **)sMDMScore, sMDMSize, sMDMSize);
      return(FALSE);
   }

   /* Fill the matrix with zeros                                        */
   for(i=0; i<sMDMSize; i++)
   {
      for(j=0; j<sMDMSize; j++)
      {
         sMDMScore[i][j] = 0;
      }
   }

   /* Rewind the file and read the actual data                          */
   rewind(mdm);
   row = 0;
   while(fgets(buffer,MAXBUFF,mdm))
   {
      int Numeric;
      
      TERMINATE(buffer);
      KILLLEADSPACES(p,buffer);

      /* Check line is non-blank and non-comment                        */
      if(strlen(p) && p[0] != '!' && p[0] != '#')
      {
         Numeric = 0;
         for(p = buffer, i = 0; p!=NULL && i<tmpStoreSize; i++)
         {
            p = blGetWord(p, tmpStore[i], MAXWORD);
            /* Incremement Numeric counter if it's a numeric field      */
            if(isdigit(tmpStore[i][0]) || 
               ((tmpStore[i][0] == '-')&&(isdigit(tmpStore[i][1]))))
            {
               Numeric++;
            }
         }

         /* No numeric fields so it is the amino acid names             */
         if(Numeric == 0)
         {
            for(j = 0; j<i && j<sMDMSize; j++)
            {
               sMDM_AAList[j] = tmpStore[j][0];
            }
         }
         else
         {
            /* There were numeric fields, so copy them into the matrix,
               skipping any non-numeric fields
               j counts the input fields
               k counts the fields in sMDMScore
               row counts the row in sMDMScore
            */
            for(j=0, k=0; j<i && k<sMDMSize; j++)
            {
               if(isdigit(tmpStore[j][0]) || 
                  ((tmpStore[j][0] == '-')&&(isdigit(tmpStore[j][1]))))
               {
                  sscanf(tmpStore[j],"%d",&(sMDMScore[row][k]));
                  k++;
               }
            }
            
            row++;
         }
      }
   }
   fclose(mdm);
   blFreeArray2D((char **)tmpStore, tmpStoreSize, MAXWORD);
   
   return(TRUE);
}
コード例 #4
0
ファイル: ReadPIR.c プロジェクト: tjacobs2/scripts
/*>int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, 
               SEQINFO *seqinfo, BOOL *punct, BOOL *error)
   ------------------------------------------------------------------
   Input:   FILE    *fp      File pointer
            BOOL    DoInsert TRUE Read - characters into the sequence
                             FALSE Skip - characters
            int     maxchain Max number of chains to read. This is the
                             dimension of the seqs array.
                             N.B. THIS SHOULD BE AT LEAST 1 MORE THAN
                             THE EXPECTED MAXIMUM NUMBER OF SEQUENCES
   Output:  char    **seqs   Array of character pointers which will
                             be filled in with sequence information.
                             Memory will be allocated for any sequence
                             length.
            SEQINFO *seqinfo This structure will be filled in with
                             extra information about the sequence.
                             Header & title information and details
                             of any punctuation.
            BOOL    *punct   TRUE if any punctuation found.
            BOOL    *error   TRUE if an error occured (e.g. memory
                             allocation)
   Returns: int              Number of chains in this sequence.
                             0 if file ended, or no valid sequence
                             entries found.

   This is an all-singing, all-dancing PIR reader which should handle
   all legal PIR files and some (slightly) incorrect ones. The only
   requirements of the code are that the PIR file should have 2 title
   lines per entry, the first line starting with a > sign.

   The routine will handle multiple sequence files. Successive calls
   will return information on the next entry. The routine will return
   0 when there are no more entries.

   Header line: Must start with >. Will handle files which don't have
   the proper P1; or F1; parts of the header as well as those which
   do.

   Title line: Will read the name and source fields if correctly
   separated by a -, otherwise copies all information into the name.

   Sequence: May contain allowed puctuation. This will set the punct
   flag and information on the types found will be placed in seqinfo.
   White space and line breaks are ignored. Each chain should end with
   a *, but the routine will accept the last chain of an entry with no
   *. While the standard requires upper case text, this routine will
   handle lower case and convert it to upper case. While the routine
   does pretty well at last chains not terminated with a *, a last
   chain ending with a / not followed by a * but followed by a text
   line will be identified as incomplete rather than truncated.
   If the DoInsert flag is set, - signs in the sequence will be
   read as part of the sequence, otherwise they will be skipped. This
   is an addition to the PIR standard.

   Text lines: Text lines after an entry (beginning with R;, C;, A;, 
   N; or F;) are ignored.

   02.03.94 Original    By: ACRM
   03.03.94 Added / and = handling, upcasing, strcpy()->strncpy(),
            header lines without semi-colon, title lines without -
   07.03.94 Added sequence insertion handling and DoInsert parameter.
   11.05.94 buffer is now 504 characters (V38.0 spec allows 500 chars)
            Removes leading spaces from entry code and terminates at
            first space (V39.0 spec allows comments after the code).
   28.02.95 Added check that buffer doesn't overflow. Check on nseq
            changed to >=
   06.02.96 Removes trailing spaces from comment line
*/
int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, 
            SEQINFO *seqinfo, BOOL *punct, BOOL *error)
{
   int  ch,
        i,
        chpos,
        nseq = 0,
	ArraySize,
        SeqPos;
   char buffer[504],
        *ptr;
   BOOL InParen,
        GotStar;

   /* Initialise error and punct outputs                                */
   *error = FALSE;
   *punct = FALSE;

   /* Initialise seqinfo structure                                      */
   if(seqinfo != NULL)
   {
      seqinfo->code[0]    = '\0';
      seqinfo->name[0]    = '\0';
      seqinfo->source[0]  = '\0';
      seqinfo->fragment   = FALSE;
      seqinfo->paren      = FALSE;
      seqinfo->DotInParen = FALSE;
      seqinfo->NonExpJoin = FALSE;
      seqinfo->UnknownPos = FALSE;
      seqinfo->Incomplete = FALSE;
      seqinfo->Juxtapose  = FALSE;
      seqinfo->Truncated  = FALSE;
   }
   
   /* Skip over any characters until the first > sign                   */
   while((ch=fgetc(fp)) != EOF && ch != '>') ;

   /* Check for end of file                                             */
   if(ch==EOF) return(0);

   /* Read the rest of this line into a buffer                          */
   i = 0;
   while((ch=fgetc(fp)) != EOF && ch != '\n' && i<503)
      buffer[i++] = (char)ch;
   buffer[i] = '\0';

   /* Check for end of file                                             */
   if(ch==EOF) return(0);

   /* Set information in the seqinfo structure                          */
   if(seqinfo != NULL)
   {
      /* Fragment flag                                                  */
      if(buffer[2] == ';' && buffer[0] == 'F')
         seqinfo->fragment = TRUE;
      else 
         seqinfo->fragment = FALSE;

      /* Entry code                                                     */
      if(buffer[2] == ';')
      {
         KILLLEADSPACES(ptr,(buffer+3));
      }
      else
      {
         KILLLEADSPACES(ptr,buffer);
      }
      
      strncpy(seqinfo->code, ptr, 16);
      seqinfo->code[15] = '\0';

      /* Terminate entry code at first space since comments are allowed
         after the entry code (V39.0 spec)
      */
      for(i=0; seqinfo->code[i]; i++)
      {
         if(seqinfo->code[i] == ' ' || seqinfo->code[i] == '\t')
         {
            seqinfo->code[i] = '\0';
            break;
         }
      }
   }

   /* Now read the title line                                           */
   if(!fgets(buffer,240,fp))
      return(0);
   buffer[240] = '\0';

   /* 06.02.96 Remove any trailing spaces                               */
   KILLTRAILSPACES(buffer);

   /* Set information in the seqinfo structure                          */
   if(seqinfo)
   {
      TERMINATE(buffer);
      /* If it's a fully legal PIR file, there will be a - in the midle
         of the title line to separate name from source. If we don't
         find one, we copy the whole line into the name
      */
      if((ptr = strstr(buffer," - ")) != NULL)
      {
         *ptr = '\0';
         strncpy(seqinfo->source, ptr+3, 160);
         seqinfo->source[159] = '\0';
      }
      strncpy(seqinfo->name,   buffer, 160);
      seqinfo->name[159] = '\0';
      /* 06.02.96 Remove any trailing spaces                            */
      KILLTRAILSPACES(seqinfo->name);
   }

   /* Read the actual sequence info.                                    */
   chpos = 0;
   for(;;)
   {
      GotStar = FALSE;
      InParen = FALSE;

      /* Allocate some space for the sequence                           */
      ArraySize = ALLOCSIZE;
      if((seqs[nseq] = (char *)malloc(ArraySize * sizeof(char)))==NULL)
      {
         *error = TRUE;
         return(0);
      }

      SeqPos    = 0;
      
      /* Read characters, storing sequence and handling any 
         punctuation
      */
      while((ch = fgetc(fp)) != EOF && ch != '*' && ch != '>')
      {
         chpos++;
         
         if(isalpha(ch) || (ch == '-' && DoInsert))
         {
            /* This is a sequence entry (probably!)                     */
            seqs[nseq][SeqPos++] = (isupper(ch) ? ch : toupper(ch));

            /* If necessary, expand the sequence array                  */
            if(SeqPos >= ArraySize)
            {
               ArraySize += ALLOCSIZE;
               seqs[nseq] = (char *)realloc((void *)(seqs[nseq]), 
                                            ArraySize);
               if(seqs[nseq] == NULL)
               {
                  *error = TRUE;
                  return(0);
               }
            }
         }
         else if(ch == '/')
         {
            /* Sequence is incomplete or truncated                      */
            *punct = TRUE;
            
            if(seqinfo != NULL)
            {
               if(SeqPos == 0)   /* It's the first character in a chain */
               {

                  seqinfo->Truncated = TRUE;
               }
               else              /* Not first, is it last?              */
               {
                  /* Skip spaces and newlines till we get the next real
                     character
                  */
                  while((ch = fgetc(fp)) != EOF && 
                        (ch == ' ' || ch == '\t' || ch == '\n')) ;
                  /* Replace the character in the input stream          */
                  ungetc(ch,fp);
                  
                  if(ch == '*' || 
                     ch == EOF ||
                     ch == '>')               /* End of chain           */
                     seqinfo->Truncated  = TRUE;
                  else                        /* Middle of chain        */
                     seqinfo->Incomplete = TRUE;
               }
            }
         }
         else if(ch == '=')
         {
            /* Parts of the sequence may be juxtaposed                  */
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->Juxtapose = TRUE;
         }
         else if(ch == '(')
         {
            /* Start of a region in parentheses                         */
            InParen = TRUE;
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->paren = TRUE;
         }
         else if(ch == ')')
         {
            /* End of region in parentheses                             */
            InParen = FALSE;
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->paren = TRUE;
         }
         else if(ch == '.')
         {
            *punct = TRUE;

            if(InParen)
            {
               /* Previous aa >90% certain in position                  */
               if(seqinfo != NULL) seqinfo->DotInParen = TRUE;
            }
            else
            {
               /* Join in sequence not known experimentally but is clear
                  from sequence homology.
               */
               if(seqinfo != NULL) seqinfo->NonExpJoin = TRUE;
            }
         }
         else if(ch == ',')
         {
            /* Position of previous aa not known with confidence        */
            if(seqinfo != NULL) seqinfo->UnknownPos = TRUE;
         }
         else if(ch == '\n')
         {
            /* Start of new line, relevant to check on ;                */
            chpos = 0;
         }
         else if(ch == ';' && chpos == 2)
         {
            /* This is a text line, so the previous character wasn't
               a sequence item
            */
            SeqPos--;

            /* Ignore the rest of this line and reset chpos             */
            while((ch = fgetc(fp))!=EOF && ch != '\n') ;
            chpos = 0;
         }
      }  /* Reading this sequence                                       */

      /* Test the exit conditions from the read character loop          */
      if(ch == '*')
      {
         /* End of chain                                                */
         seqs[nseq][SeqPos] = '\0';
         GotStar = TRUE;
         if(++nseq >= maxchain)
         {
            *error = TRUE;
            return(nseq);
         }
      }
      else if(ch == '>')
      {
         /* Start of new entry                                          */
         ungetc(ch,fp);
         break;          /* Out of read for this sequence               */
      }
      else if(ch == EOF)
      {
         /* End of file                                                 */
         break;          /* Out of read for this sequence               */
      }
   }  /* Loop on with this sequence (next chain)                        */


   /* Now tidy up if we have an unfinished sequence                     */
   if(!GotStar) 
   {
      seqs[nseq][SeqPos] = '\0';
      if(!strlen(seqs[nseq]))
         free(seqs[nseq]);
      else
         nseq++;
   }

   return(nseq);
}
コード例 #5
0
ファイル: GetWord.c プロジェクト: frederic-mahe/bioplib
/*>static char *doGetWord(char *buffer, char *word, int maxlen,
                          BOOL comma)
   ------------------------------------------------------------
*//**

   \param[in]     *buffer     Input buffer to read words from
   \param[in]     maxlen      Max length of output word
   \param[in]     comma       Treat commas like white space?
   \param[out]    *word       Word read from buffer
   \return                        Pointer to start of next word in buffer
                                or NULL

   This code is designed to be called from GetWord() or GetWordNC()

   Reads a whitespace delimted word out of buffer into word. If comma is
   TRUE, then commas are treated just like white space, otherwise they
   are treated like normal characters.

   Words containing white space may be wrapped in double inverted commas.
   A \ is used as an escape character and maybe used to escape *any*
   following character. In particular:
      "\\" -> '\'     To get a backslash
      "\ " -> ' '     To get a hard whitespace (alternatively wrap the
                      string in double inverted commas)
      "\"" -> '"'     To get a double inverted comma

-  10.06.99 Original   By: ACRM (based on code from Bioplib)
-  03.08.14 Made static By: CTP
*/
static char *doGetWord(char *buffer, char *word, int maxlen, BOOL comma)
{
   int  i, j;
   BOOL dic    = FALSE,
        escape = FALSE;
   char *chp;
   
   /* Decrement maxlen so we can terminate correctly                    */
   maxlen--;
   
   /* Check validity of passed pointers                                 */
   if(word==NULL)
      return(NULL);

   word[0] = '\0';
   if(buffer==NULL)
      return(NULL);
   
   KILLLEADSPACES(chp, buffer);

   /* Run through each character in the input buffer                    */
   for(i=0, j=0; chp[i]; i++)
   {
      switch(chp[i])
      {
      case '\\':
         /* Use backslash as an escape character. If we've just had an
            escape, then simply store it
         */
         if(escape)
         {
            escape = FALSE;
            if(j<maxlen)
               word[j++] = chp[i];
         }
         else
         {
            escape = TRUE;
         }
         break;
      case '\"':
         /* Double inverted commas enclose strings containing white space
            If we've just had an escape then handle as a normal character,
            otherwise, toggle the dic flag
         */
         if(escape)
         {
            if(j<maxlen)
               word[j++] = chp[i];
         }
         else
         {
            TOGGLE(dic);
         }
         escape = FALSE;
         break;
      case ',':
         /* A comma is handled as white space or a normal character,
            depending on the comma flag
         */
         if(!comma)   /* Treat as default                               */
         {
            if(j<maxlen)
               word[j++] = chp[i];
            escape = FALSE;
            break;
         }
         /* Otherwise, if comma is true, just fall through to treat it
            like whitespace
         */
      case ' ':
      case '\t':
         /* If we are in double inverted commas or last char was an escape
            just handle as a normal character
         */
         if(dic || escape)
         {
            if(j<maxlen)
               word[j++] = chp[i];
         }
         else
         {
            /* Otherwise, this terminates the word, so terminate, move 
               the pointer on and return
            */
            word[j] = '\0';
            chp += i;
            KILLLEADSPACES(chp, chp);
            if(comma)
            {
               /* If we are handling commas as whitespace, then k
                  the comma if found      
               */
               if(*chp == ',') chp++;
            }
            if(*chp == '\0') chp = NULL;
            return(chp);
         }
         escape = FALSE;
         break;
      default:
         /* A normal character, copy it across                          */
         if(j<maxlen)
            word[j++] = chp[i];
         escape = FALSE;
      }
   }

   word[j] = '\0';
   return(NULL);
}
コード例 #6
0
ファイル: main.c プロジェクト: ACRMGroup/bioplib
/*>void testWriteAsPDBML(FILE *fp, PDB *pdb)
   -----------------------------------------
*//**

   \param[in]     *fp   PDB file pointer to be written
   \param[in]     *pdb  PDB linked list to write

   Write a PDB linked list in PDBML format.
   
   This test function is based on the bioplib function blWriteAsPDBML(). 
   The function calls blAddTagVariablesNodes() which writes additional 
   user-defined tags for each atom.

   Tags are written if gPDBTagWrite is TRUE.

-  25.08.14 Original. By: CTP
-  28.08.14 Use gNPDBTagFunctions to control output of user-defined tags.
            By: CTP

*/
void testWriteAsPDBML(FILE *fp, PDB  *pdb)
{
   /* PDBML format supported */
   PDB         *p;
   xmlDocPtr   doc         = NULL;
   xmlNodePtr  root_node   = NULL, 
               sites_node  = NULL, 
               atom_node   = NULL, 
               node        = NULL;
   xmlNsPtr    pdbx        = NULL,
               xsi         = NULL;
   char        buffer[16], 
               *buffer_ptr;
   
   /* Create doc */
   doc = xmlNewDoc((xmlChar *) "1.0");
   doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
   
   /* Root node */
   root_node = xmlNewNode(NULL, (xmlChar *) "datablock");
   xmlDocSetRootElement(doc, root_node);
   pdbx = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "PDBx");
   xsi  = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "xsi");
   xmlSetNs(root_node,pdbx);
   
   
   /* Atom_sites node */
   sites_node = xmlNewChild(root_node, NULL,
                            (xmlChar *) "atom_siteCategory", NULL);
   
   /* Atom nodes */
   for(p = pdb ; p ; NEXT(p))
   {
      /* skip TER */
      if(!strncmp("TER",p->resnam,3))
      {
         continue;
      }

      /* Add atom node */
      atom_node = xmlNewChild(sites_node, NULL,
                              (xmlChar *) "atom_site", NULL);
      sprintf(buffer, "%d", p->atnum);
      xmlNewProp(atom_node, (xmlChar *) "id", (xmlChar *) buffer);
      
      /* Add atom data nodes */
      /* B value */
      sprintf(buffer,"%.2f", p->bval);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "B_iso_or_equiv",
                         (xmlChar *) buffer);

      /* coordinates */
      sprintf(buffer,"%.3f", p->x);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_x",
                         (xmlChar *) buffer);

      sprintf(buffer,"%.3f", p->y);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_y",
                         (xmlChar *) buffer);

      sprintf(buffer,"%.3f", p->z);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_z",
                         (xmlChar *) buffer);

      /* author atom site labels */
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_asym_id",
                         (xmlChar *) p->chain);

      strcpy(buffer,p->atnam);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_atom_id",
                         (xmlChar *) buffer);

      strcpy(buffer,p->resnam);
      KILLTRAILSPACES(buffer);
      KILLLEADSPACES(buffer_ptr,buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_comp_id",
                         (xmlChar *) buffer_ptr);
      
      sprintf(buffer,"%d", p->resnum);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_seq_id",
                         (xmlChar *) buffer);

      /* record type atom/hetatm */
      strcpy(buffer,p->record_type);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "group_PDB",
                         (xmlChar *) buffer);

      /* atom site labels */
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_alt_id",
                         NULL);
      if(p->altpos == ' ')
      {
         xmlNewNsProp(node, xsi, (xmlChar *) "nil", (xmlChar *) "true");
      }
      else
      {
         buffer[0] = p->altpos;
         buffer[1] = '\0';
         xmlNodeSetContent(node, (xmlChar *) buffer);
      }
      
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_asym_id",
                         (xmlChar *) p->chain);

      strcpy(buffer,p->atnam);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_atom_id",
                         (xmlChar *) buffer);

      strcpy(buffer,p->resnam);
      KILLTRAILSPACES(buffer);
      KILLLEADSPACES(buffer_ptr,buffer);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_comp_id",
                         (xmlChar *) buffer_ptr);

      /* Note: Entity ID is not stored in PDB data structure. 
               Value set to 1 */
      node = xmlNewChild(atom_node, NULL,
                         (xmlChar *) "label_entity_id",
                         (xmlChar *) "1");
      
      sprintf(buffer,"%d", p->resnum);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_seq_id",
                         (xmlChar *) buffer);

      /* occupancy */
      sprintf(buffer,"%.2f", p->occ);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "occupancy",
                         (xmlChar *) buffer);
                         
      /* insertion code */
      /* Note: Insertion code node only included for residues with 
               insertion codes */
      if(strcmp(p->insert," "))
      {
         sprintf(buffer,"%s", p->insert);
         node = xmlNewChild(atom_node, NULL, 
                            (xmlChar *) "pdbx_PDB_ins_code",
                            (xmlChar *) buffer);
      }

      /* model number */
      /* Note: Model number is not stored in PDB data structure.
               Value set to 1 */
      node = xmlNewChild(atom_node, NULL,
                         (xmlChar *) "pdbx_PDB_model_num",
                         (xmlChar *) "1");

      /* formal charge */
      /* Note: Formal charge node not included for neutral atoms */
      if(p->formal_charge != 0)
      {
         sprintf(buffer,"%d", p->formal_charge);
         node = xmlNewChild(atom_node, NULL, 
                            (xmlChar *) "pdbx_formal_charge",
                            (xmlChar *) buffer);
      }

      /* atom symbol */
      /* Note: If the atomic symbol is not set in PDB data structure then
               the value set is based on columns 13-14 of pdb-formated
               text file.  */
      sprintf(buffer,"%s", p->element);
      KILLLEADSPACES(buffer_ptr,buffer);
      if(strlen(buffer_ptr))
      {
         node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol",
                            (xmlChar *) buffer_ptr);
      }
      else
      {
         blSetElementSymbolFromAtomName(buffer,p->atnam_raw);
         node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol",
                            (xmlChar *) buffer);
      }

      /* NEW CODE */
      /* user-defined tags */
      if(gNPDBTagFunctions)
      {
         blAddTagVariablesNodes(p,atom_node);
      }
   }

   /* Write to doc file pointer */
   xmlDocFormatDump(fp,doc,1);

   /* Free Memory */
    xmlFreeDoc(doc);
    xmlCleanupParser();

   return;
}