Exemplo n.º 1
0
/*>BOOL blGetHeaderWholePDB(WHOLEPDB *wpdb, 
                            char *header,  int maxheader,
                            char *date,    int maxdate,
                            char *pdbcode, int maxcode)
   ---------------------------------------------------
*//**

   \param[in]     *wpdb        WHOLEPDB structure pointer
   \param[out]    *header      String containing header text
   \param[in]     maxheader    Max length for storing header
   \param[out]    *date        Date string
   \param[in]     maxdate      Max length for storing date
   \param[out]    *pdbcode     PDB code
   \param[in]     maxcode      Max length for storing PDB code
   \return                     TRUE:  Found HEADER
                               FALSE: Didn't find HEADER

   Obtains information from the PDB HEADER record

-  26.03.15  Original   By: ACRM
*/
BOOL blGetHeaderWholePDB(WHOLEPDB *wpdb, 
                         char *header,  int maxheader,
                         char *date,    int maxdate,
                         char *pdbcode, int maxcode)
{
   STRINGLIST *s;
   int        i;
   BOOL       retval = FALSE;

   /* Blank all the strings                                             */
   for(i=0; i<maxheader; i++) header[i]  = '\0';
   for(i=0; i<maxdate;   i++) date[i]    = '\0';
   for(i=0; i<maxcode;   i++) pdbcode[i] = '\0';

   for(s=wpdb->header; s!=NULL; NEXT(s))
   {
      if(!strncmp(s->string, "HEADER", 6))
      {
         retval = TRUE;
         strncpy(header,  s->string+10, MIN(40, maxheader));
         KILLTRAILSPACES(header);
         strncpy(date,    s->string+50, MIN( 9, maxdate));
         strncpy(pdbcode, s->string+62, MIN( 4, maxcode));
         break;
      }
   }

   return(retval);
}
Exemplo n.º 2
0
/*>static BOOL ExtractField(STRINGLIST *molidStart, 
                            STRINGLIST *molidStop, char *data,
                            char *type, char *field)
   ------------------------------------------------------------
*//**
   \param[in]   *molidStart    Start of a set of header records 
   \param[in]   *molidStop     Start of next set of headers (or NULL)
   \param[out]  *data          Storage for extracted string
   \param[in]   *type          Record type (COMPND or SOURCE)
   \param[in]   *field         Sub-record field of interest
   \return                     Success

   Extracts data for a field from a COMPND or SOURCE record. The field 
   data after the field specfication and is terminated by a ;

   Returns FALSE if field not found.

-  28.04.15  Original   By: ACRM
*/
static BOOL ExtractField(STRINGLIST *molidStart, STRINGLIST *molidStop,
                         char *data, char *type, char *field)
{
   STRINGLIST *s;
   BOOL       GotField = FALSE;
   char       *chp,
              buffer[MAXPDBANNOTATION];

   data[0] = '\0';

   for(s=molidStart; s!=molidStop; NEXT(s))
   {
      if(strncmp(s->string, type, 6))
         break;

      chp = NULL;

      if(GotField && isdigit(s->string[9]))
      {
         /* We have found the field already on previous line and this is
            marked as a continuation line            
         */
         chp = s->string+10;
      }
      else
      {
         /* Look for this field                                         */
         if((chp=strstr(s->string, field))!=NULL)
         {
            GotField = TRUE;
            /* Step over the field name                                 */
            chp += strlen(field);
            if(*chp == ' ')
               chp++;
         }
      }
      
      if(GotField && (chp != NULL))
      {
         /* Copy into the buffer                                        */
         strncpy(buffer, chp, MAXPDBANNOTATION);
         /* Remove spaces                                               */
         TERMINATE(buffer);
         KILLTRAILSPACES(buffer);
         /* Add to output data                                          */
         blStrncat(data, buffer, MAXPDBANNOTATION);

         /* Exit if the string contains a ;                             */
         if((chp=strchr(data, ';'))!=NULL)
         {
            *chp = '\0';
            return(TRUE);
         }
      }
   }
   return(FALSE);
}
Exemplo n.º 3
0
/*>char *blGetTitleWholePDB(WHOLEPDB *wpdb)
   ----------------------------------------
*//**
   \param[in]    *wpdb    WHOLEPDB structure
   \return                Tit;le from PDB file (malloc()'d)

   Extracts the title from a PDB file malloc()ing a string in which to
   store the data. This must be freed by user code

-  28.04.15 Original   By: ACRM
-  11.05.15 Return NULL if TITLE line absent. By: CTP
-  09.06.15 Add columns 11 to 80 to title string for both start and 
            continuation lines. By: CTP
*/
char *blGetTitleWholePDB(WHOLEPDB *wpdb)
{
   char       *title = NULL,
              *cleanTitle = NULL;
   STRINGLIST *s;
   BOOL       inTitle = FALSE;

   for(s=wpdb->header; s!=NULL; NEXT(s))
   {
      if(!strncmp(s->string, "TITLE ", 6))
      {
         char buffer[MAXPDBANNOTATION];
         strcpy(buffer, s->string);
         TERMINATE(buffer);

         /* append cols 11-80 to title string                           */
         title = blStrcatalloc(title, buffer+10);

         if(title == NULL)
            return(NULL);
      }
      else if(inTitle)
      {
         break;
      }
   }

   /* title line not found                                              */
   if(title == NULL)
      return(NULL);

   cleanTitle = blCollapseSpaces(title);
   free(title);
   KILLTRAILSPACES(cleanTitle);
   
   return(cleanTitle);
}
Exemplo n.º 4
0
/*>int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, 
               SEQINFO *seqinfo, BOOL *punct, BOOL *error)
   ------------------------------------------------------------------
   Input:   FILE    *fp      File pointer
            BOOL    DoInsert TRUE Read - characters into the sequence
                             FALSE Skip - characters
            int     maxchain Max number of chains to read. This is the
                             dimension of the seqs array.
                             N.B. THIS SHOULD BE AT LEAST 1 MORE THAN
                             THE EXPECTED MAXIMUM NUMBER OF SEQUENCES
   Output:  char    **seqs   Array of character pointers which will
                             be filled in with sequence information.
                             Memory will be allocated for any sequence
                             length.
            SEQINFO *seqinfo This structure will be filled in with
                             extra information about the sequence.
                             Header & title information and details
                             of any punctuation.
            BOOL    *punct   TRUE if any punctuation found.
            BOOL    *error   TRUE if an error occured (e.g. memory
                             allocation)
   Returns: int              Number of chains in this sequence.
                             0 if file ended, or no valid sequence
                             entries found.

   This is an all-singing, all-dancing PIR reader which should handle
   all legal PIR files and some (slightly) incorrect ones. The only
   requirements of the code are that the PIR file should have 2 title
   lines per entry, the first line starting with a > sign.

   The routine will handle multiple sequence files. Successive calls
   will return information on the next entry. The routine will return
   0 when there are no more entries.

   Header line: Must start with >. Will handle files which don't have
   the proper P1; or F1; parts of the header as well as those which
   do.

   Title line: Will read the name and source fields if correctly
   separated by a -, otherwise copies all information into the name.

   Sequence: May contain allowed puctuation. This will set the punct
   flag and information on the types found will be placed in seqinfo.
   White space and line breaks are ignored. Each chain should end with
   a *, but the routine will accept the last chain of an entry with no
   *. While the standard requires upper case text, this routine will
   handle lower case and convert it to upper case. While the routine
   does pretty well at last chains not terminated with a *, a last
   chain ending with a / not followed by a * but followed by a text
   line will be identified as incomplete rather than truncated.
   If the DoInsert flag is set, - signs in the sequence will be
   read as part of the sequence, otherwise they will be skipped. This
   is an addition to the PIR standard.

   Text lines: Text lines after an entry (beginning with R;, C;, A;, 
   N; or F;) are ignored.

   02.03.94 Original    By: ACRM
   03.03.94 Added / and = handling, upcasing, strcpy()->strncpy(),
            header lines without semi-colon, title lines without -
   07.03.94 Added sequence insertion handling and DoInsert parameter.
   11.05.94 buffer is now 504 characters (V38.0 spec allows 500 chars)
            Removes leading spaces from entry code and terminates at
            first space (V39.0 spec allows comments after the code).
   28.02.95 Added check that buffer doesn't overflow. Check on nseq
            changed to >=
   06.02.96 Removes trailing spaces from comment line
*/
int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, 
            SEQINFO *seqinfo, BOOL *punct, BOOL *error)
{
   int  ch,
        i,
        chpos,
        nseq = 0,
	ArraySize,
        SeqPos;
   char buffer[504],
        *ptr;
   BOOL InParen,
        GotStar;

   /* Initialise error and punct outputs                                */
   *error = FALSE;
   *punct = FALSE;

   /* Initialise seqinfo structure                                      */
   if(seqinfo != NULL)
   {
      seqinfo->code[0]    = '\0';
      seqinfo->name[0]    = '\0';
      seqinfo->source[0]  = '\0';
      seqinfo->fragment   = FALSE;
      seqinfo->paren      = FALSE;
      seqinfo->DotInParen = FALSE;
      seqinfo->NonExpJoin = FALSE;
      seqinfo->UnknownPos = FALSE;
      seqinfo->Incomplete = FALSE;
      seqinfo->Juxtapose  = FALSE;
      seqinfo->Truncated  = FALSE;
   }
   
   /* Skip over any characters until the first > sign                   */
   while((ch=fgetc(fp)) != EOF && ch != '>') ;

   /* Check for end of file                                             */
   if(ch==EOF) return(0);

   /* Read the rest of this line into a buffer                          */
   i = 0;
   while((ch=fgetc(fp)) != EOF && ch != '\n' && i<503)
      buffer[i++] = (char)ch;
   buffer[i] = '\0';

   /* Check for end of file                                             */
   if(ch==EOF) return(0);

   /* Set information in the seqinfo structure                          */
   if(seqinfo != NULL)
   {
      /* Fragment flag                                                  */
      if(buffer[2] == ';' && buffer[0] == 'F')
         seqinfo->fragment = TRUE;
      else 
         seqinfo->fragment = FALSE;

      /* Entry code                                                     */
      if(buffer[2] == ';')
      {
         KILLLEADSPACES(ptr,(buffer+3));
      }
      else
      {
         KILLLEADSPACES(ptr,buffer);
      }
      
      strncpy(seqinfo->code, ptr, 16);
      seqinfo->code[15] = '\0';

      /* Terminate entry code at first space since comments are allowed
         after the entry code (V39.0 spec)
      */
      for(i=0; seqinfo->code[i]; i++)
      {
         if(seqinfo->code[i] == ' ' || seqinfo->code[i] == '\t')
         {
            seqinfo->code[i] = '\0';
            break;
         }
      }
   }

   /* Now read the title line                                           */
   if(!fgets(buffer,240,fp))
      return(0);
   buffer[240] = '\0';

   /* 06.02.96 Remove any trailing spaces                               */
   KILLTRAILSPACES(buffer);

   /* Set information in the seqinfo structure                          */
   if(seqinfo)
   {
      TERMINATE(buffer);
      /* If it's a fully legal PIR file, there will be a - in the midle
         of the title line to separate name from source. If we don't
         find one, we copy the whole line into the name
      */
      if((ptr = strstr(buffer," - ")) != NULL)
      {
         *ptr = '\0';
         strncpy(seqinfo->source, ptr+3, 160);
         seqinfo->source[159] = '\0';
      }
      strncpy(seqinfo->name,   buffer, 160);
      seqinfo->name[159] = '\0';
      /* 06.02.96 Remove any trailing spaces                            */
      KILLTRAILSPACES(seqinfo->name);
   }

   /* Read the actual sequence info.                                    */
   chpos = 0;
   for(;;)
   {
      GotStar = FALSE;
      InParen = FALSE;

      /* Allocate some space for the sequence                           */
      ArraySize = ALLOCSIZE;
      if((seqs[nseq] = (char *)malloc(ArraySize * sizeof(char)))==NULL)
      {
         *error = TRUE;
         return(0);
      }

      SeqPos    = 0;
      
      /* Read characters, storing sequence and handling any 
         punctuation
      */
      while((ch = fgetc(fp)) != EOF && ch != '*' && ch != '>')
      {
         chpos++;
         
         if(isalpha(ch) || (ch == '-' && DoInsert))
         {
            /* This is a sequence entry (probably!)                     */
            seqs[nseq][SeqPos++] = (isupper(ch) ? ch : toupper(ch));

            /* If necessary, expand the sequence array                  */
            if(SeqPos >= ArraySize)
            {
               ArraySize += ALLOCSIZE;
               seqs[nseq] = (char *)realloc((void *)(seqs[nseq]), 
                                            ArraySize);
               if(seqs[nseq] == NULL)
               {
                  *error = TRUE;
                  return(0);
               }
            }
         }
         else if(ch == '/')
         {
            /* Sequence is incomplete or truncated                      */
            *punct = TRUE;
            
            if(seqinfo != NULL)
            {
               if(SeqPos == 0)   /* It's the first character in a chain */
               {

                  seqinfo->Truncated = TRUE;
               }
               else              /* Not first, is it last?              */
               {
                  /* Skip spaces and newlines till we get the next real
                     character
                  */
                  while((ch = fgetc(fp)) != EOF && 
                        (ch == ' ' || ch == '\t' || ch == '\n')) ;
                  /* Replace the character in the input stream          */
                  ungetc(ch,fp);
                  
                  if(ch == '*' || 
                     ch == EOF ||
                     ch == '>')               /* End of chain           */
                     seqinfo->Truncated  = TRUE;
                  else                        /* Middle of chain        */
                     seqinfo->Incomplete = TRUE;
               }
            }
         }
         else if(ch == '=')
         {
            /* Parts of the sequence may be juxtaposed                  */
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->Juxtapose = TRUE;
         }
         else if(ch == '(')
         {
            /* Start of a region in parentheses                         */
            InParen = TRUE;
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->paren = TRUE;
         }
         else if(ch == ')')
         {
            /* End of region in parentheses                             */
            InParen = FALSE;
            *punct = TRUE;
            if(seqinfo != NULL) seqinfo->paren = TRUE;
         }
         else if(ch == '.')
         {
            *punct = TRUE;

            if(InParen)
            {
               /* Previous aa >90% certain in position                  */
               if(seqinfo != NULL) seqinfo->DotInParen = TRUE;
            }
            else
            {
               /* Join in sequence not known experimentally but is clear
                  from sequence homology.
               */
               if(seqinfo != NULL) seqinfo->NonExpJoin = TRUE;
            }
         }
         else if(ch == ',')
         {
            /* Position of previous aa not known with confidence        */
            if(seqinfo != NULL) seqinfo->UnknownPos = TRUE;
         }
         else if(ch == '\n')
         {
            /* Start of new line, relevant to check on ;                */
            chpos = 0;
         }
         else if(ch == ';' && chpos == 2)
         {
            /* This is a text line, so the previous character wasn't
               a sequence item
            */
            SeqPos--;

            /* Ignore the rest of this line and reset chpos             */
            while((ch = fgetc(fp))!=EOF && ch != '\n') ;
            chpos = 0;
         }
      }  /* Reading this sequence                                       */

      /* Test the exit conditions from the read character loop          */
      if(ch == '*')
      {
         /* End of chain                                                */
         seqs[nseq][SeqPos] = '\0';
         GotStar = TRUE;
         if(++nseq >= maxchain)
         {
            *error = TRUE;
            return(nseq);
         }
      }
      else if(ch == '>')
      {
         /* Start of new entry                                          */
         ungetc(ch,fp);
         break;          /* Out of read for this sequence               */
      }
      else if(ch == EOF)
      {
         /* End of file                                                 */
         break;          /* Out of read for this sequence               */
      }
   }  /* Loop on with this sequence (next chain)                        */


   /* Now tidy up if we have an unfinished sequence                     */
   if(!GotStar) 
   {
      seqs[nseq][SeqPos] = '\0';
      if(!strlen(seqs[nseq]))
         free(seqs[nseq]);
      else
         nseq++;
   }

   return(nseq);
}
Exemplo n.º 5
0
/*>void testWriteAsPDBML(FILE *fp, PDB *pdb)
   -----------------------------------------
*//**

   \param[in]     *fp   PDB file pointer to be written
   \param[in]     *pdb  PDB linked list to write

   Write a PDB linked list in PDBML format.
   
   This test function is based on the bioplib function blWriteAsPDBML(). 
   The function calls blAddTagVariablesNodes() which writes additional 
   user-defined tags for each atom.

   Tags are written if gPDBTagWrite is TRUE.

-  25.08.14 Original. By: CTP
-  28.08.14 Use gNPDBTagFunctions to control output of user-defined tags.
            By: CTP

*/
void testWriteAsPDBML(FILE *fp, PDB  *pdb)
{
   /* PDBML format supported */
   PDB         *p;
   xmlDocPtr   doc         = NULL;
   xmlNodePtr  root_node   = NULL, 
               sites_node  = NULL, 
               atom_node   = NULL, 
               node        = NULL;
   xmlNsPtr    pdbx        = NULL,
               xsi         = NULL;
   char        buffer[16], 
               *buffer_ptr;
   
   /* Create doc */
   doc = xmlNewDoc((xmlChar *) "1.0");
   doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
   
   /* Root node */
   root_node = xmlNewNode(NULL, (xmlChar *) "datablock");
   xmlDocSetRootElement(doc, root_node);
   pdbx = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "PDBx");
   xsi  = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "xsi");
   xmlSetNs(root_node,pdbx);
   
   
   /* Atom_sites node */
   sites_node = xmlNewChild(root_node, NULL,
                            (xmlChar *) "atom_siteCategory", NULL);
   
   /* Atom nodes */
   for(p = pdb ; p ; NEXT(p))
   {
      /* skip TER */
      if(!strncmp("TER",p->resnam,3))
      {
         continue;
      }

      /* Add atom node */
      atom_node = xmlNewChild(sites_node, NULL,
                              (xmlChar *) "atom_site", NULL);
      sprintf(buffer, "%d", p->atnum);
      xmlNewProp(atom_node, (xmlChar *) "id", (xmlChar *) buffer);
      
      /* Add atom data nodes */
      /* B value */
      sprintf(buffer,"%.2f", p->bval);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "B_iso_or_equiv",
                         (xmlChar *) buffer);

      /* coordinates */
      sprintf(buffer,"%.3f", p->x);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_x",
                         (xmlChar *) buffer);

      sprintf(buffer,"%.3f", p->y);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_y",
                         (xmlChar *) buffer);

      sprintf(buffer,"%.3f", p->z);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_z",
                         (xmlChar *) buffer);

      /* author atom site labels */
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_asym_id",
                         (xmlChar *) p->chain);

      strcpy(buffer,p->atnam);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_atom_id",
                         (xmlChar *) buffer);

      strcpy(buffer,p->resnam);
      KILLTRAILSPACES(buffer);
      KILLLEADSPACES(buffer_ptr,buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_comp_id",
                         (xmlChar *) buffer_ptr);
      
      sprintf(buffer,"%d", p->resnum);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_seq_id",
                         (xmlChar *) buffer);

      /* record type atom/hetatm */
      strcpy(buffer,p->record_type);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "group_PDB",
                         (xmlChar *) buffer);

      /* atom site labels */
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_alt_id",
                         NULL);
      if(p->altpos == ' ')
      {
         xmlNewNsProp(node, xsi, (xmlChar *) "nil", (xmlChar *) "true");
      }
      else
      {
         buffer[0] = p->altpos;
         buffer[1] = '\0';
         xmlNodeSetContent(node, (xmlChar *) buffer);
      }
      
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_asym_id",
                         (xmlChar *) p->chain);

      strcpy(buffer,p->atnam);
      KILLTRAILSPACES(buffer);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_atom_id",
                         (xmlChar *) buffer);

      strcpy(buffer,p->resnam);
      KILLTRAILSPACES(buffer);
      KILLLEADSPACES(buffer_ptr,buffer);
      node = xmlNewChild(atom_node, NULL, 
                         (xmlChar *) "label_comp_id",
                         (xmlChar *) buffer_ptr);

      /* Note: Entity ID is not stored in PDB data structure. 
               Value set to 1 */
      node = xmlNewChild(atom_node, NULL,
                         (xmlChar *) "label_entity_id",
                         (xmlChar *) "1");
      
      sprintf(buffer,"%d", p->resnum);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_seq_id",
                         (xmlChar *) buffer);

      /* occupancy */
      sprintf(buffer,"%.2f", p->occ);
      node = xmlNewChild(atom_node, NULL, (xmlChar *) "occupancy",
                         (xmlChar *) buffer);
                         
      /* insertion code */
      /* Note: Insertion code node only included for residues with 
               insertion codes */
      if(strcmp(p->insert," "))
      {
         sprintf(buffer,"%s", p->insert);
         node = xmlNewChild(atom_node, NULL, 
                            (xmlChar *) "pdbx_PDB_ins_code",
                            (xmlChar *) buffer);
      }

      /* model number */
      /* Note: Model number is not stored in PDB data structure.
               Value set to 1 */
      node = xmlNewChild(atom_node, NULL,
                         (xmlChar *) "pdbx_PDB_model_num",
                         (xmlChar *) "1");

      /* formal charge */
      /* Note: Formal charge node not included for neutral atoms */
      if(p->formal_charge != 0)
      {
         sprintf(buffer,"%d", p->formal_charge);
         node = xmlNewChild(atom_node, NULL, 
                            (xmlChar *) "pdbx_formal_charge",
                            (xmlChar *) buffer);
      }

      /* atom symbol */
      /* Note: If the atomic symbol is not set in PDB data structure then
               the value set is based on columns 13-14 of pdb-formated
               text file.  */
      sprintf(buffer,"%s", p->element);
      KILLLEADSPACES(buffer_ptr,buffer);
      if(strlen(buffer_ptr))
      {
         node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol",
                            (xmlChar *) buffer_ptr);
      }
      else
      {
         blSetElementSymbolFromAtomName(buffer,p->atnam_raw);
         node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol",
                            (xmlChar *) buffer);
      }

      /* NEW CODE */
      /* user-defined tags */
      if(gNPDBTagFunctions)
      {
         blAddTagVariablesNodes(p,atom_node);
      }
   }

   /* Write to doc file pointer */
   xmlDocFormatDump(fp,doc,1);

   /* Free Memory */
    xmlFreeDoc(doc);
    xmlCleanupParser();

   return;
}