void print_cs(int p) { str_number t= cs_text(p); if(p<hash_base){ if(p==null_cs){ tprint_esc("csname"); tprint_esc("endcsname"); print_char(' '); }else{ tprint_esc("IMPOSSIBLE."); } }else if((p>=undefined_control_sequence)&& ((p<=eqtb_size)||p> eqtb_size+hash_extra)){ tprint_esc("IMPOSSIBLE."); }else if(t>=str_ptr){ tprint_esc("NONEXISTENT."); }else{ if(is_active_cs(t)){ print(active_cs_value(t)); }else{ print_esc(t); if(single_letter(t)){ if(get_cat_code(cat_code_table_par,pool_to_unichar(str_string(t)))==letter_cmd) print_char(' '); }else{ print_char(' '); } } } }
int read_pdb ( char * pdbname, Protein * protein, char chain) { Residue * sequence; FILE * fptr = NULL; char line[BUFFLEN]; char oldresno[PDB_ATOM_RES_NO_LEN+2]; /* res name: 4 digits + insertion code + \0 */ char oldrestype [PDB_ATOM_RES_NAME_LEN+2]; char tmp[PDB_ATOM_X_LEN+1], *auxptr; int atomctr, resctr, no_res, ctr, nonblank; char single_letter ( char code[]); /* open file */ fptr = fopen ( pdbname, "r"); if ( !fptr ) { fprintf (stderr, "Cno %s.\n", pdbname); return 1; } /* warn if no chain given */ if ( !chain) fprintf ( stderr,"No chain specified. Using the first one.\n"); /* count residues */ memset (line, 0, BUFFLEN); memset (oldresno, 0, PDB_ATOM_RES_NO_LEN+2); resctr = 0; while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( chain && line[PDB_ATOM_CHAINID] != chain ) continue; if ( ! strncmp(line,"TER", 3) || ! strncmp(line,"END", 3) ) break; if( ! strncmp(line,"ATOM", 4) || ! strncmp(line,"HETATM", 6)){ if ( strncmp (line+PDB_ATOM_RES_NO, oldresno, PDB_ATOM_RES_NO_LEN+1) ) { strncpy (oldresno, line+PDB_ATOM_RES_NO, PDB_ATOM_RES_NO_LEN+1); oldresno[PDB_ATOM_RES_NO_LEN+1] = '\0'; /* printf ( "New residue number: %s \n", oldresno); */ resctr ++; } } } no_res = resctr; /* printf ("no residues: %d\n", no_res); */ /* allocate space */ sequence = NULL; sequence = emalloc ( no_res*sizeof (Residue)); /* read in the atom */ rewind ( fptr); memset (line, 0, BUFFLEN); memset (oldresno, 0, PDB_ATOM_RES_NO_LEN+2); /* tyring to account for the insertion code */ memset (oldrestype, 0, PDB_ATOM_RES_NAME_LEN+2); resctr= -1; atomctr = 0; while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( chain && line[PDB_ATOM_CHAINID] != chain ) continue; if ( ! strncmp(line,"TER", 3) || ! strncmp(line,"END", 3) ) break; if( ! strncmp(line,"ATOM", 4) || ! strncmp(line,"HETATM", 6)){ /* if it's a hydrogen - skip */ if ( line[PDB_ATOM_ATOM_NAME] == 'H' || line[PDB_ATOM_ATOM_NAME+1] == 'H') continue; /* adjust the counters */ if ( strncmp (line+PDB_ATOM_RES_NO, oldresno, PDB_ATOM_RES_NO_LEN+1) ) { strncpy (oldresno, line+PDB_ATOM_RES_NO, PDB_ATOM_RES_NO_LEN+1); strncpy (oldrestype, line+PDB_ATOM_RES_NAME, PDB_ATOM_RES_NAME_LEN); oldresno[PDB_ATOM_RES_NO_LEN+1] = '\0'; oldrestype[PDB_ATOM_RES_NAME_LEN] = '\0'; resctr ++; atomctr = 0; sequence[resctr].no_atoms = 1; strncpy ( sequence[resctr].pdb_id, oldresno, PDB_ATOM_RES_NO_LEN+2); sequence[resctr].pdb_id[PDB_ATOM_RES_NO_LEN+1] = '\0'; strncpy ( sequence[resctr].res_type, oldrestype, PDB_ATOM_RES_NAME_LEN+1); sequence[resctr].res_type[PDB_ATOM_RES_NAME_LEN] = '\0'; sequence[resctr].res_type_short = single_letter ( sequence[resctr].res_type ); if ( !sequence[resctr].res_type_short ) return 1; } else { atomctr ++; sequence[resctr].no_atoms = atomctr + 1; if ( atomctr >= MAX_NO_ATOMS ) { fprintf ( stderr, "Error: I thought every aa has < %d atoms.\n", MAX_NO_ATOMS ); return 1; } } /* read in atom info */ auxptr = line+ PDB_ATOM_ATOM_NAME; memset ( tmp, 0, PDB_ATOM_ATOM_NAME_LEN+1); /* skip initial blanks*/ ctr = 0; while ( !(isalpha (*(auxptr + ctr))) && (ctr <= PDB_ATOM_ATOM_NAME_LEN) ) ctr++; /* copy alphanum info */ nonblank = 0; while ( isalpha (*(auxptr +ctr)) && (ctr <= PDB_ATOM_ATOM_NAME_LEN) ) { tmp[nonblank] = *(auxptr +ctr); nonblank ++; ctr++; } strncpy ( sequence[resctr].atom[atomctr].type, tmp, PDB_ATOM_ATOM_NAME_LEN ); /* is this a backbone atom?*/ sequence[resctr].atom[atomctr].backbone = 0; if ( nonblank == 1) { sequence[resctr].atom[atomctr].backbone = !( strcmp ( tmp, "N") && strcmp ( tmp, "C") && strcmp ( tmp, "O") ); } else if ( nonblank == 2) { sequence[resctr].atom[atomctr].backbone = ! strcmp ( tmp, "CA" ); } /* printf ( " %4d %4d %4s is backbone: %1d \n", resctr, atomctr, */ /* sequence[resctr].atom[atomctr].type, sequence[resctr].atom[atomctr].backbone); */ strncpy ( tmp, line+PDB_ATOM_X, PDB_ATOM_X_LEN); tmp[PDB_ATOM_X_LEN] = '\0'; sequence[resctr].atom[atomctr].x=atof(tmp); strncpy ( tmp, line+PDB_ATOM_Y, PDB_ATOM_Y_LEN); tmp[PDB_ATOM_Y_LEN] = '\0'; sequence[resctr].atom[atomctr].y=atof(tmp); strncpy ( tmp, line+PDB_ATOM_Z, PDB_ATOM_Z_LEN); tmp[PDB_ATOM_Z_LEN] = '\0'; sequence[resctr].atom[atomctr].z=atof(tmp); } } /* close file */ fclose (fptr); /* clean PDB id tags from spaces */ for (resctr=0; resctr < no_res; resctr ++ ) { string_clean (sequence[resctr].pdb_id, PDB_ATOM_RES_NO_LEN+2); } /*return values: */ protein->sequence= sequence; protein->length = no_res; return 0; }
int fill_protein_info ( FILE * fptr, char chain, Protein * protein) { /* TODO for the moment we rely on PDB annotation to extract structural elements - that should be changed */ Residue * sequence = NULL; char line[BUFFLEN]; char oldresno[PDB_ATOM_RES_NO_LEN+2]; /* res name: 4 digits + insertion code + \0 */ char oldrestype [PDB_ATOM_RES_NAME_LEN+2]; char tmp[BUFFLEN], *auxptr; char atomtypes_read_in[BUFFLEN]; char old_chain; int atomctr, resctr, no_res,ctr, nonblank; int retval; int chain_found; int ca_trace; char single_letter ( char code[]); int has_backbone (Residue * sequence, int from, int to); /********************************************/ /********************************************/ /* cleanup */ memset (protein, 0, sizeof(Protein) ); /********************************************/ /********************************************/ /* count residues */ memset (line, 0, BUFFLEN); memset (oldresno, 0, PDB_ATOM_RES_NO_LEN+2); resctr = 0; chain_found = 0; old_chain = '\0'; while(fgets(line, BUFFLEN, fptr)!=NULL){ if (resctr) { if ( ! strncmp(line,"END", 3) || (chain && line[PDB_ATOM_CHAINID] != old_chain) ) break; } if (chain && line[PDB_ATOM_CHAINID] != chain) continue; chain_found = 1; if( ! strncmp(line,"ATOM", 4)){ if ( strncmp (line+PDB_ATOM_RES_NO, oldresno, PDB_ATOM_RES_NO_LEN+1) ) { strncpy (oldresno, line+PDB_ATOM_RES_NO, PDB_ATOM_RES_NO_LEN+1); oldresno[PDB_ATOM_RES_NO_LEN+1] = '\0'; /* handling the case when the chain is not given, meaning: "take the first chain" */ old_chain = line[PDB_ATOM_CHAINID]; resctr ++; } } } /* sanity: */ if ( chain && ! chain_found) { fprintf (stderr, "Chain %c not found.\n", chain); return ERR_NO_FILE_OR_CHAIN; } no_res = resctr; if ( !no_res ) return -1; /* take it as the end of the read */ /* allocate space */ sequence = emalloc ( no_res*sizeof (Residue)); if ( ! sequence ) return 1; /*********************************************/ /*********************************************/ /* read in residue numbers and atom coords */ rewind ( fptr); memset (line, 0, BUFFLEN); old_chain = '\0'; memset (oldresno, 0, PDB_ATOM_RES_NO_LEN+2); memset (oldrestype, 0, PDB_ATOM_RES_NAME_LEN+2); resctr= -1; atomctr = 0; ca_trace = 1; while(fgets(line, BUFFLEN, fptr)!=NULL){ if ( resctr > -1) { if (! strncmp(line,"END", 3) || (chain && line[PDB_ATOM_CHAINID] != old_chain)) break; } if ( chain && line[PDB_ATOM_CHAINID] != chain ) continue; if( ! strncmp(line,"ATOM", 4) ){ /* if it's a hydrogen - skip */ if ( line[PDB_ATOM_ATOM_NAME] == 'H' || line[PDB_ATOM_ATOM_NAME+1] == 'H') continue; /* adjust the counters */ if ( strncmp (line+PDB_ATOM_RES_NO, oldresno, PDB_ATOM_RES_NO_LEN+1) ) { /*+1 in PDB_ATOM_RES_NO_LEN+1 means I am including the insertion code in the identifier */ strncpy (oldresno, line+PDB_ATOM_RES_NO, PDB_ATOM_RES_NO_LEN+1); strncpy (oldrestype, line+PDB_ATOM_RES_NAME, PDB_ATOM_RES_NAME_LEN); oldresno[PDB_ATOM_RES_NO_LEN+1] = '\0'; oldrestype[PDB_ATOM_RES_NAME_LEN] = '\0'; /* handling the case when the chain is not given, meaning: "take the first chain" */ old_chain = line[PDB_ATOM_CHAINID]; resctr ++; if ( resctr >= no_res ) { fprintf (stderr, "Error reading pdb: resctr:%d no res: %d\n", resctr, no_res); return ERR_NONSENSE; } atomctr = 0; /* keep track of atom types we have read in */ memset (atomtypes_read_in, 0, BUFFLEN*sizeof(char)); atomtypes_read_in[0] = '_'; sequence[resctr].no_atoms = 1; strncpy ( sequence[resctr].pdb_id, oldresno, PDB_ATOM_RES_NO_LEN+2); sequence[resctr].pdb_id[PDB_ATOM_RES_NO_LEN+1] = '\0'; strncpy ( sequence[resctr].res_type, oldrestype, PDB_ATOM_RES_NAME_LEN+1); sequence[resctr].res_type[PDB_ATOM_RES_NAME_LEN] = '\0'; sequence[resctr].res_type_short = single_letter ( sequence[resctr].res_type ); /* modified residues are ok for the purposes here */ /* unless they are sugars or some such - deal with it below */ /* by checking the backbone atoms */ ///if ( !sequence[resctr].res_type_short ) return 1; } else { atomctr ++; sequence[resctr].no_atoms = atomctr + 1; if ( atomctr >= MAX_NO_ATOMS ) { fprintf ( stderr, "Error parsing pdb: I thought every aa has < %d atoms.\n", MAX_NO_ATOMS ); return ERR_MAX_ATOMS; } } /* read in atom info */ auxptr = line+ PDB_ATOM_ATOM_NAME; memset ( tmp, 0, PDB_ATOM_ATOM_NAME_LEN+1); /* skip initial blanks*/ ctr = 0; while ( !(isalpha (*(auxptr + ctr))) && (ctr <= PDB_ATOM_ATOM_NAME_LEN) ) ctr++; /* copy alphanum info */ nonblank = 0; while ( isalnum (*(auxptr +ctr)) && (ctr <= PDB_ATOM_ATOM_NAME_LEN) ) { tmp[nonblank] = *(auxptr +ctr); nonblank ++; ctr++; } /* have we already seen this atom type by any chance? */ tmp[nonblank] = '_'; if ( strstr (atomtypes_read_in, tmp) ) { /* ahould I check for an alt location code, or just move on? */ //printf ( " %s >> %s //// %s\n", sequence[resctr].pdb_id, atomtypes_read_in, tmp); continue; } else { sprintf (atomtypes_read_in, "%s%s", atomtypes_read_in, tmp); } tmp[nonblank] = '\0'; strncpy ( sequence[resctr].atom[atomctr].type, tmp, PDB_ATOM_ATOM_NAME_LEN ); /* is this a backbone atom?*/ sequence[resctr].atom[atomctr].backbone = 0; if ( nonblank == 1) { sequence[resctr].atom[atomctr].backbone = !( strcmp ( tmp, "N") && strcmp ( tmp, "C") && strcmp ( tmp, "O") ); } else if ( nonblank == 2) { if ( ! strcmp ( tmp, "CA" )) { sequence[resctr].atom[atomctr].backbone = 1; sequence[resctr].Ca = sequence[resctr].atom+atomctr; } else { sequence[resctr].atom[atomctr].backbone = 0; } } /* check if this is Ca trace */ if ( strcmp ( tmp, "CA" ) ) ca_trace = 0; strncpy ( tmp, line+PDB_ATOM_X, PDB_ATOM_X_LEN); tmp[PDB_ATOM_X_LEN] = '\0'; sequence[resctr].atom[atomctr].x=atof(tmp); strncpy ( tmp, line+PDB_ATOM_Y, PDB_ATOM_Y_LEN); tmp[PDB_ATOM_Y_LEN] = '\0'; sequence[resctr].atom[atomctr].y=atof(tmp); strncpy ( tmp, line+PDB_ATOM_Z, PDB_ATOM_Z_LEN); tmp[PDB_ATOM_Z_LEN] = '\0'; sequence[resctr].atom[atomctr].z=atof(tmp); } } if ( ca_trace) return ERR_SSE_NONE|ERR_CA_TRACE; /* clean PDB id tags from spaces */ for (resctr=0; resctr < no_res; resctr ++ ) { retval = string_clean (sequence[resctr].pdb_id, PDB_ATOM_RES_NO_LEN+1); if ( retval ) { fprintf (stderr, "Error in read_pdb(): empty id string for residue with sequential no %d.\n", resctr); return ERR_NONSENSE; } } /* store the sequence and its length */ protein->sequence = sequence; protein->length = no_res; return 0; }