/*>void blWord(char *string1, char *string2) ----------------------------------------- *//** \param[in] *string1 A string \param[out] *string2 A new string Removes leading spaces and extracts a space/tab delimited word. string2 must have the same amount of space as string1. - 20.03.91 Original - 28.05.92 ANSIed - 22.06.92 Added tab check. - 29.03.01 Changed to use KILLLEADSPACES macro - 07.07.14 Use bl prefix for functions By: CTP */ void blWord(char *string1, char *string2) { int j; char *str; KILLLEADSPACES(str, string1); strcpy(string2,str); for(j=0;j<strlen(string2);j++) { if(string2[j] == ' ' || string2[j] == '\t') { string2[j] = '\0'; break; } } }
/*>BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum, char *insert, BOOL uppercaseresspec) ------------------------------------------------------------- *//** \param[in] *inSpec Residue specification \param[out] *chain Chain label \param[out] *resnum Residue number \param[out] *insert Insert label \param[in] uppercaseresspec Convert spec to upper case. \return Success? Note that chain and insert must be arrays of at least 2 characters, not character pointers Splits up a residue specification of the form [c][.]num[i] into chain, resnum and insert. Chain and insert are optional and will be set to spaces if not specified. If uppercaseresspec equals TRUE, the spec is upper cased before processing Multi-letter chain IDs can be parsed. Additionally, chain IDs with numerical characters can be parsed if a period is used to separate the chain from the residue number. - 21.07.93 Original By: ACRM - 17.07.95 Added BOOL return - 18.03.98 Added option to include a . to separate chain and residue number so numeric chain names can be used - 29.09.05 Moved this code to from ParseResSpec() to DoParseResSpec() and made that function just call this new function. This move is to allow this underlying function to have an extra parameter to specify whether or not the residue specification should be upper cased (without affecting code that calls the old function). By: TL - 12.10.12 insert is now a properly terminated string when there is no insert - 28.08.12 chain is now a properly terminated string The input specification is now copied so that actual strings can be passed into the routine as opposed to string delimited variables. This also removes the need for restoring the string which has now been removed - 26.02.14 Parsing handles multi-letter chains and numerical chain IDs. The "Extract chain from spec" section was re-written. If the period separator between the chain id and the residue number is absent then the chain id is set from any non-numeric lead characters. By: CTP - 07.07.14 Use bl prefix for functions By: CTP */ BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum, char *insert, BOOL uppercaseresspec) { char *ptr, *ptr2, spec[64]; BOOL /* DoRestore = FALSE, */ retval = TRUE, chain_found = FALSE; int i; strncpy(spec, inSpec, 64); /* 11.10.99 Default resnum of 0 */ *resnum = 0; /* Upper case the residue specification if it has been requested */ if (uppercaseresspec == TRUE) { UPPER(spec); } KILLLEADSPACES(ptr, spec); /* Extract chain from spec. Added 26.02.14 By: CTP */ /* Extract chain from spec (dot format) */ for(ptr2=ptr,i=0;*ptr2;ptr2++,i++) { if(*ptr2 == '.') { /* set chain */ if(i > 0) { strncpy(chain,ptr,i); chain[i] = '\0'; } else { strcpy(chain," "); } chain_found = TRUE; ptr = ptr2 + 1; /* update start point */ break; } } /* Extract chain from spec (non-numeric lead characters) */ if(chain_found == FALSE) { for(ptr2=ptr,i=0;*ptr2;ptr2++,i++) { if(!isdigit(*ptr2) && (*ptr2 != '-')) { chain[i] = *ptr2; chain[i+1] = '\0'; chain_found = TRUE; ptr = ptr2 + 1; /* update start point */ } else { break; } } } /* Extract chain from spec (set chain to space) */ if(chain_found == FALSE) { strcpy(chain," "); } /* Extract insert from spec */ insert[0] = ' '; insert[1] = '\0'; /* Added 12.10.12 */ for(ptr2 = ptr; *ptr2; ptr2++) { /* 11.10.99 Now also checks that it isn't a - as the first character */ if(!isdigit(*ptr2) && ((ptr2!=ptr)||(*ptr2 != '-'))) { insert[0] = *ptr2; insert[1] = '\0'; *ptr2 = '\0'; /* DoRestore = TRUE; */ break; } } /* Extract residue number from spec */ if(sscanf(ptr,"%d",resnum) == 0) retval = FALSE; /* if(DoRestore) */ /* { */ /* V1.1: Restore the original string */ /* *ptr2 = *insert; */ /* } */ return(retval); }
/*>BOOL blReadMDM(char *mdmfile) ----------------------------- *//** \param[in] *mdmfile Mutation data matrix filename \return Success? Read mutation data matrix into static global arrays. The matrix may have comments at the start introduced with a ! in the first column. The matrix must be complete (i.e. a triangular matrix will not work). A line describing the residue types must appear, and may be placed before or after the matrix itself - 07.10.92 Original - 18.03.94 getc() -> fgetc() - 24.11.94 Automatically looks in DATAENV if not found in current directory - 28.02.95 Modified to read any size MDM and allow comments Also allows the list of aa types before or after the actual matrix - 26.07.95 Removed unused variables - 06.02.03 Fixed for new version of GetWord() - 07.04.09 Completely re-written to allow it to read BLAST style matrix files as well as the ones used previously Allow comments introduced with # as well as ! Uses MAXWORD rather than hardcoded 16 - 07.07.14 Use bl prefix for functions By: CTP */ BOOL blReadMDM(char *mdmfile) { FILE *mdm = NULL; int i, j, k, row, tmpStoreSize; char buffer[MAXBUFF], word[MAXWORD], *p, **tmpStore; BOOL noenv; if((mdm=blOpenFile(mdmfile, DATAENV, "r", &noenv))==NULL) { return(FALSE); } /* First read the file to determine the dimensions */ while(fgets(buffer,MAXBUFF,mdm)) { TERMINATE(buffer); KILLLEADSPACES(p,buffer); /* First line which is non-blank and non-comment */ if(strlen(p) && p[0] != '!' && p[0] != '#') { sMDMSize = 0; for(p = buffer; p!=NULL;) { p = blGetWord(p, word, MAXWORD); /* Increment counter if this is numeric */ if(isdigit(word[0]) || ((word[0] == '-')&&(isdigit(word[1])))) sMDMSize++; } if(sMDMSize) break; } } /* Allocate memory for the MDM and the AA List */ if((sMDMScore = (int **)blArray2D(sizeof(int),sMDMSize,sMDMSize))==NULL) return(FALSE); if((sMDM_AAList = (char *)malloc((sMDMSize+1)*sizeof(char)))==NULL) { blFreeArray2D((char **)sMDMScore, sMDMSize, sMDMSize); return(FALSE); } /* Allocate temporary storage for a row from the matrix */ tmpStoreSize = 2*sMDMSize; if((tmpStore = (char **)blArray2D(sizeof(char), tmpStoreSize, MAXWORD)) ==NULL) { free(sMDM_AAList); blFreeArray2D((char **)sMDMScore, sMDMSize, sMDMSize); return(FALSE); } /* Fill the matrix with zeros */ for(i=0; i<sMDMSize; i++) { for(j=0; j<sMDMSize; j++) { sMDMScore[i][j] = 0; } } /* Rewind the file and read the actual data */ rewind(mdm); row = 0; while(fgets(buffer,MAXBUFF,mdm)) { int Numeric; TERMINATE(buffer); KILLLEADSPACES(p,buffer); /* Check line is non-blank and non-comment */ if(strlen(p) && p[0] != '!' && p[0] != '#') { Numeric = 0; for(p = buffer, i = 0; p!=NULL && i<tmpStoreSize; i++) { p = blGetWord(p, tmpStore[i], MAXWORD); /* Incremement Numeric counter if it's a numeric field */ if(isdigit(tmpStore[i][0]) || ((tmpStore[i][0] == '-')&&(isdigit(tmpStore[i][1])))) { Numeric++; } } /* No numeric fields so it is the amino acid names */ if(Numeric == 0) { for(j = 0; j<i && j<sMDMSize; j++) { sMDM_AAList[j] = tmpStore[j][0]; } } else { /* There were numeric fields, so copy them into the matrix, skipping any non-numeric fields j counts the input fields k counts the fields in sMDMScore row counts the row in sMDMScore */ for(j=0, k=0; j<i && k<sMDMSize; j++) { if(isdigit(tmpStore[j][0]) || ((tmpStore[j][0] == '-')&&(isdigit(tmpStore[j][1])))) { sscanf(tmpStore[j],"%d",&(sMDMScore[row][k])); k++; } } row++; } } } fclose(mdm); blFreeArray2D((char **)tmpStore, tmpStoreSize, MAXWORD); return(TRUE); }
/*>int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, SEQINFO *seqinfo, BOOL *punct, BOOL *error) ------------------------------------------------------------------ Input: FILE *fp File pointer BOOL DoInsert TRUE Read - characters into the sequence FALSE Skip - characters int maxchain Max number of chains to read. This is the dimension of the seqs array. N.B. THIS SHOULD BE AT LEAST 1 MORE THAN THE EXPECTED MAXIMUM NUMBER OF SEQUENCES Output: char **seqs Array of character pointers which will be filled in with sequence information. Memory will be allocated for any sequence length. SEQINFO *seqinfo This structure will be filled in with extra information about the sequence. Header & title information and details of any punctuation. BOOL *punct TRUE if any punctuation found. BOOL *error TRUE if an error occured (e.g. memory allocation) Returns: int Number of chains in this sequence. 0 if file ended, or no valid sequence entries found. This is an all-singing, all-dancing PIR reader which should handle all legal PIR files and some (slightly) incorrect ones. The only requirements of the code are that the PIR file should have 2 title lines per entry, the first line starting with a > sign. The routine will handle multiple sequence files. Successive calls will return information on the next entry. The routine will return 0 when there are no more entries. Header line: Must start with >. Will handle files which don't have the proper P1; or F1; parts of the header as well as those which do. Title line: Will read the name and source fields if correctly separated by a -, otherwise copies all information into the name. Sequence: May contain allowed puctuation. This will set the punct flag and information on the types found will be placed in seqinfo. White space and line breaks are ignored. Each chain should end with a *, but the routine will accept the last chain of an entry with no *. While the standard requires upper case text, this routine will handle lower case and convert it to upper case. While the routine does pretty well at last chains not terminated with a *, a last chain ending with a / not followed by a * but followed by a text line will be identified as incomplete rather than truncated. If the DoInsert flag is set, - signs in the sequence will be read as part of the sequence, otherwise they will be skipped. This is an addition to the PIR standard. Text lines: Text lines after an entry (beginning with R;, C;, A;, N; or F;) are ignored. 02.03.94 Original By: ACRM 03.03.94 Added / and = handling, upcasing, strcpy()->strncpy(), header lines without semi-colon, title lines without - 07.03.94 Added sequence insertion handling and DoInsert parameter. 11.05.94 buffer is now 504 characters (V38.0 spec allows 500 chars) Removes leading spaces from entry code and terminates at first space (V39.0 spec allows comments after the code). 28.02.95 Added check that buffer doesn't overflow. Check on nseq changed to >= 06.02.96 Removes trailing spaces from comment line */ int ReadPIR(FILE *fp, BOOL DoInsert, char **seqs, int maxchain, SEQINFO *seqinfo, BOOL *punct, BOOL *error) { int ch, i, chpos, nseq = 0, ArraySize, SeqPos; char buffer[504], *ptr; BOOL InParen, GotStar; /* Initialise error and punct outputs */ *error = FALSE; *punct = FALSE; /* Initialise seqinfo structure */ if(seqinfo != NULL) { seqinfo->code[0] = '\0'; seqinfo->name[0] = '\0'; seqinfo->source[0] = '\0'; seqinfo->fragment = FALSE; seqinfo->paren = FALSE; seqinfo->DotInParen = FALSE; seqinfo->NonExpJoin = FALSE; seqinfo->UnknownPos = FALSE; seqinfo->Incomplete = FALSE; seqinfo->Juxtapose = FALSE; seqinfo->Truncated = FALSE; } /* Skip over any characters until the first > sign */ while((ch=fgetc(fp)) != EOF && ch != '>') ; /* Check for end of file */ if(ch==EOF) return(0); /* Read the rest of this line into a buffer */ i = 0; while((ch=fgetc(fp)) != EOF && ch != '\n' && i<503) buffer[i++] = (char)ch; buffer[i] = '\0'; /* Check for end of file */ if(ch==EOF) return(0); /* Set information in the seqinfo structure */ if(seqinfo != NULL) { /* Fragment flag */ if(buffer[2] == ';' && buffer[0] == 'F') seqinfo->fragment = TRUE; else seqinfo->fragment = FALSE; /* Entry code */ if(buffer[2] == ';') { KILLLEADSPACES(ptr,(buffer+3)); } else { KILLLEADSPACES(ptr,buffer); } strncpy(seqinfo->code, ptr, 16); seqinfo->code[15] = '\0'; /* Terminate entry code at first space since comments are allowed after the entry code (V39.0 spec) */ for(i=0; seqinfo->code[i]; i++) { if(seqinfo->code[i] == ' ' || seqinfo->code[i] == '\t') { seqinfo->code[i] = '\0'; break; } } } /* Now read the title line */ if(!fgets(buffer,240,fp)) return(0); buffer[240] = '\0'; /* 06.02.96 Remove any trailing spaces */ KILLTRAILSPACES(buffer); /* Set information in the seqinfo structure */ if(seqinfo) { TERMINATE(buffer); /* If it's a fully legal PIR file, there will be a - in the midle of the title line to separate name from source. If we don't find one, we copy the whole line into the name */ if((ptr = strstr(buffer," - ")) != NULL) { *ptr = '\0'; strncpy(seqinfo->source, ptr+3, 160); seqinfo->source[159] = '\0'; } strncpy(seqinfo->name, buffer, 160); seqinfo->name[159] = '\0'; /* 06.02.96 Remove any trailing spaces */ KILLTRAILSPACES(seqinfo->name); } /* Read the actual sequence info. */ chpos = 0; for(;;) { GotStar = FALSE; InParen = FALSE; /* Allocate some space for the sequence */ ArraySize = ALLOCSIZE; if((seqs[nseq] = (char *)malloc(ArraySize * sizeof(char)))==NULL) { *error = TRUE; return(0); } SeqPos = 0; /* Read characters, storing sequence and handling any punctuation */ while((ch = fgetc(fp)) != EOF && ch != '*' && ch != '>') { chpos++; if(isalpha(ch) || (ch == '-' && DoInsert)) { /* This is a sequence entry (probably!) */ seqs[nseq][SeqPos++] = (isupper(ch) ? ch : toupper(ch)); /* If necessary, expand the sequence array */ if(SeqPos >= ArraySize) { ArraySize += ALLOCSIZE; seqs[nseq] = (char *)realloc((void *)(seqs[nseq]), ArraySize); if(seqs[nseq] == NULL) { *error = TRUE; return(0); } } } else if(ch == '/') { /* Sequence is incomplete or truncated */ *punct = TRUE; if(seqinfo != NULL) { if(SeqPos == 0) /* It's the first character in a chain */ { seqinfo->Truncated = TRUE; } else /* Not first, is it last? */ { /* Skip spaces and newlines till we get the next real character */ while((ch = fgetc(fp)) != EOF && (ch == ' ' || ch == '\t' || ch == '\n')) ; /* Replace the character in the input stream */ ungetc(ch,fp); if(ch == '*' || ch == EOF || ch == '>') /* End of chain */ seqinfo->Truncated = TRUE; else /* Middle of chain */ seqinfo->Incomplete = TRUE; } } } else if(ch == '=') { /* Parts of the sequence may be juxtaposed */ *punct = TRUE; if(seqinfo != NULL) seqinfo->Juxtapose = TRUE; } else if(ch == '(') { /* Start of a region in parentheses */ InParen = TRUE; *punct = TRUE; if(seqinfo != NULL) seqinfo->paren = TRUE; } else if(ch == ')') { /* End of region in parentheses */ InParen = FALSE; *punct = TRUE; if(seqinfo != NULL) seqinfo->paren = TRUE; } else if(ch == '.') { *punct = TRUE; if(InParen) { /* Previous aa >90% certain in position */ if(seqinfo != NULL) seqinfo->DotInParen = TRUE; } else { /* Join in sequence not known experimentally but is clear from sequence homology. */ if(seqinfo != NULL) seqinfo->NonExpJoin = TRUE; } } else if(ch == ',') { /* Position of previous aa not known with confidence */ if(seqinfo != NULL) seqinfo->UnknownPos = TRUE; } else if(ch == '\n') { /* Start of new line, relevant to check on ; */ chpos = 0; } else if(ch == ';' && chpos == 2) { /* This is a text line, so the previous character wasn't a sequence item */ SeqPos--; /* Ignore the rest of this line and reset chpos */ while((ch = fgetc(fp))!=EOF && ch != '\n') ; chpos = 0; } } /* Reading this sequence */ /* Test the exit conditions from the read character loop */ if(ch == '*') { /* End of chain */ seqs[nseq][SeqPos] = '\0'; GotStar = TRUE; if(++nseq >= maxchain) { *error = TRUE; return(nseq); } } else if(ch == '>') { /* Start of new entry */ ungetc(ch,fp); break; /* Out of read for this sequence */ } else if(ch == EOF) { /* End of file */ break; /* Out of read for this sequence */ } } /* Loop on with this sequence (next chain) */ /* Now tidy up if we have an unfinished sequence */ if(!GotStar) { seqs[nseq][SeqPos] = '\0'; if(!strlen(seqs[nseq])) free(seqs[nseq]); else nseq++; } return(nseq); }
/*>static char *doGetWord(char *buffer, char *word, int maxlen, BOOL comma) ------------------------------------------------------------ *//** \param[in] *buffer Input buffer to read words from \param[in] maxlen Max length of output word \param[in] comma Treat commas like white space? \param[out] *word Word read from buffer \return Pointer to start of next word in buffer or NULL This code is designed to be called from GetWord() or GetWordNC() Reads a whitespace delimted word out of buffer into word. If comma is TRUE, then commas are treated just like white space, otherwise they are treated like normal characters. Words containing white space may be wrapped in double inverted commas. A \ is used as an escape character and maybe used to escape *any* following character. In particular: "\\" -> '\' To get a backslash "\ " -> ' ' To get a hard whitespace (alternatively wrap the string in double inverted commas) "\"" -> '"' To get a double inverted comma - 10.06.99 Original By: ACRM (based on code from Bioplib) - 03.08.14 Made static By: CTP */ static char *doGetWord(char *buffer, char *word, int maxlen, BOOL comma) { int i, j; BOOL dic = FALSE, escape = FALSE; char *chp; /* Decrement maxlen so we can terminate correctly */ maxlen--; /* Check validity of passed pointers */ if(word==NULL) return(NULL); word[0] = '\0'; if(buffer==NULL) return(NULL); KILLLEADSPACES(chp, buffer); /* Run through each character in the input buffer */ for(i=0, j=0; chp[i]; i++) { switch(chp[i]) { case '\\': /* Use backslash as an escape character. If we've just had an escape, then simply store it */ if(escape) { escape = FALSE; if(j<maxlen) word[j++] = chp[i]; } else { escape = TRUE; } break; case '\"': /* Double inverted commas enclose strings containing white space If we've just had an escape then handle as a normal character, otherwise, toggle the dic flag */ if(escape) { if(j<maxlen) word[j++] = chp[i]; } else { TOGGLE(dic); } escape = FALSE; break; case ',': /* A comma is handled as white space or a normal character, depending on the comma flag */ if(!comma) /* Treat as default */ { if(j<maxlen) word[j++] = chp[i]; escape = FALSE; break; } /* Otherwise, if comma is true, just fall through to treat it like whitespace */ case ' ': case '\t': /* If we are in double inverted commas or last char was an escape just handle as a normal character */ if(dic || escape) { if(j<maxlen) word[j++] = chp[i]; } else { /* Otherwise, this terminates the word, so terminate, move the pointer on and return */ word[j] = '\0'; chp += i; KILLLEADSPACES(chp, chp); if(comma) { /* If we are handling commas as whitespace, then k the comma if found */ if(*chp == ',') chp++; } if(*chp == '\0') chp = NULL; return(chp); } escape = FALSE; break; default: /* A normal character, copy it across */ if(j<maxlen) word[j++] = chp[i]; escape = FALSE; } } word[j] = '\0'; return(NULL); }
/*>void testWriteAsPDBML(FILE *fp, PDB *pdb) ----------------------------------------- *//** \param[in] *fp PDB file pointer to be written \param[in] *pdb PDB linked list to write Write a PDB linked list in PDBML format. This test function is based on the bioplib function blWriteAsPDBML(). The function calls blAddTagVariablesNodes() which writes additional user-defined tags for each atom. Tags are written if gPDBTagWrite is TRUE. - 25.08.14 Original. By: CTP - 28.08.14 Use gNPDBTagFunctions to control output of user-defined tags. By: CTP */ void testWriteAsPDBML(FILE *fp, PDB *pdb) { /* PDBML format supported */ PDB *p; xmlDocPtr doc = NULL; xmlNodePtr root_node = NULL, sites_node = NULL, atom_node = NULL, node = NULL; xmlNsPtr pdbx = NULL, xsi = NULL; char buffer[16], *buffer_ptr; /* Create doc */ doc = xmlNewDoc((xmlChar *) "1.0"); doc->encoding = xmlStrdup((xmlChar *) "UTF-8"); /* Root node */ root_node = xmlNewNode(NULL, (xmlChar *) "datablock"); xmlDocSetRootElement(doc, root_node); pdbx = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "PDBx"); xsi = xmlNewNs(root_node, (xmlChar *) "null", (xmlChar *) "xsi"); xmlSetNs(root_node,pdbx); /* Atom_sites node */ sites_node = xmlNewChild(root_node, NULL, (xmlChar *) "atom_siteCategory", NULL); /* Atom nodes */ for(p = pdb ; p ; NEXT(p)) { /* skip TER */ if(!strncmp("TER",p->resnam,3)) { continue; } /* Add atom node */ atom_node = xmlNewChild(sites_node, NULL, (xmlChar *) "atom_site", NULL); sprintf(buffer, "%d", p->atnum); xmlNewProp(atom_node, (xmlChar *) "id", (xmlChar *) buffer); /* Add atom data nodes */ /* B value */ sprintf(buffer,"%.2f", p->bval); node = xmlNewChild(atom_node, NULL, (xmlChar *) "B_iso_or_equiv", (xmlChar *) buffer); /* coordinates */ sprintf(buffer,"%.3f", p->x); node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_x", (xmlChar *) buffer); sprintf(buffer,"%.3f", p->y); node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_y", (xmlChar *) buffer); sprintf(buffer,"%.3f", p->z); node = xmlNewChild(atom_node, NULL, (xmlChar *) "Cartn_z", (xmlChar *) buffer); /* author atom site labels */ node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_asym_id", (xmlChar *) p->chain); strcpy(buffer,p->atnam); KILLTRAILSPACES(buffer); node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_atom_id", (xmlChar *) buffer); strcpy(buffer,p->resnam); KILLTRAILSPACES(buffer); KILLLEADSPACES(buffer_ptr,buffer); node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_comp_id", (xmlChar *) buffer_ptr); sprintf(buffer,"%d", p->resnum); node = xmlNewChild(atom_node, NULL, (xmlChar *) "auth_seq_id", (xmlChar *) buffer); /* record type atom/hetatm */ strcpy(buffer,p->record_type); KILLTRAILSPACES(buffer); node = xmlNewChild(atom_node, NULL, (xmlChar *) "group_PDB", (xmlChar *) buffer); /* atom site labels */ node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_alt_id", NULL); if(p->altpos == ' ') { xmlNewNsProp(node, xsi, (xmlChar *) "nil", (xmlChar *) "true"); } else { buffer[0] = p->altpos; buffer[1] = '\0'; xmlNodeSetContent(node, (xmlChar *) buffer); } node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_asym_id", (xmlChar *) p->chain); strcpy(buffer,p->atnam); KILLTRAILSPACES(buffer); node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_atom_id", (xmlChar *) buffer); strcpy(buffer,p->resnam); KILLTRAILSPACES(buffer); KILLLEADSPACES(buffer_ptr,buffer); node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_comp_id", (xmlChar *) buffer_ptr); /* Note: Entity ID is not stored in PDB data structure. Value set to 1 */ node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_entity_id", (xmlChar *) "1"); sprintf(buffer,"%d", p->resnum); node = xmlNewChild(atom_node, NULL, (xmlChar *) "label_seq_id", (xmlChar *) buffer); /* occupancy */ sprintf(buffer,"%.2f", p->occ); node = xmlNewChild(atom_node, NULL, (xmlChar *) "occupancy", (xmlChar *) buffer); /* insertion code */ /* Note: Insertion code node only included for residues with insertion codes */ if(strcmp(p->insert," ")) { sprintf(buffer,"%s", p->insert); node = xmlNewChild(atom_node, NULL, (xmlChar *) "pdbx_PDB_ins_code", (xmlChar *) buffer); } /* model number */ /* Note: Model number is not stored in PDB data structure. Value set to 1 */ node = xmlNewChild(atom_node, NULL, (xmlChar *) "pdbx_PDB_model_num", (xmlChar *) "1"); /* formal charge */ /* Note: Formal charge node not included for neutral atoms */ if(p->formal_charge != 0) { sprintf(buffer,"%d", p->formal_charge); node = xmlNewChild(atom_node, NULL, (xmlChar *) "pdbx_formal_charge", (xmlChar *) buffer); } /* atom symbol */ /* Note: If the atomic symbol is not set in PDB data structure then the value set is based on columns 13-14 of pdb-formated text file. */ sprintf(buffer,"%s", p->element); KILLLEADSPACES(buffer_ptr,buffer); if(strlen(buffer_ptr)) { node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol", (xmlChar *) buffer_ptr); } else { blSetElementSymbolFromAtomName(buffer,p->atnam_raw); node = xmlNewChild(atom_node, NULL, (xmlChar *) "type_symbol", (xmlChar *) buffer); } /* NEW CODE */ /* user-defined tags */ if(gNPDBTagFunctions) { blAddTagVariablesNodes(p,atom_node); } } /* Write to doc file pointer */ xmlDocFormatDump(fp,doc,1); /* Free Memory */ xmlFreeDoc(doc); xmlCleanupParser(); return; }