AjBool ajTimeSetS(AjPTime thys, const AjPStr timestr) { ajint year = 0; ajint mon = 0; ajint mday = 0; ajint hour = 0; ajint min = 0; ajint sec = 0; if(!thys) return ajFalse; if(!ajStrGetLen(timestr)) return ajFalse; if(!ajFmtScanS(timestr, "%4d-%2d-%2d %2d:%2d:%2d", &year, &mon, &mday, &hour, &min, &sec)) return ajFalse; if(year > 1899) year = year-1900; thys->time.tm_year = year ; thys->time.tm_mon = mon-1; thys->time.tm_mday = mday ; thys->time.tm_hour = hour; thys->time.tm_min = min; thys->time.tm_sec = sec; thys->time.tm_isdst = -1; mktime(&thys->time); return ajTrue; }
static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; line = ajStrNewC(""); while(!ajStrPrefixC(line,"//")) { pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } if(ajStrPrefixC(line,"ID")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); ajStrTrimEndC(&entry->id, ";"); /* ++global; printf("%d. %s\n",global,ajStrGetPtr(entry->id)); */ if(svfield) embBtreeEmblSV(line,svfield->data); } if(svfield) if(ajStrPrefixC(line,"SV") || ajStrPrefixC(line,"IV")) /* emblcds database format */ embBtreeEmblAC(line,svfield->data); if(accfield) if(ajStrPrefixC(line,"AC") || ajStrPrefixC(line,"PA")) /* emblcds database format */ embBtreeEmblAC(line,accfield->data); if(keyfield) if(ajStrPrefixC(line,"KW")) embBtreeEmblKW(line,keyfield->data,keyfield->len); if(desfield) if(ajStrPrefixC(line,"DE")) embBtreeEmblDE(line,desfield->data,desfield->len); if(orgfield) if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS")) embBtreeEmblTX(line,orgfield->data,orgfield->len); } ajStrDel(&line); return ajTrue; }
static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; line = ajStrNewC(""); while(!ajStrPrefixC(line,"//")) { pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } if(ajStrPrefixC(line,"ID")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); ajStrTrimEndC(&entry->id, ";"); /* ++global; printf("%d. %s\n",global,ajStrGetPtr(entry->id)); */ if(entry->do_sv) embBtreeEmblSV(line,entry->sv); } if(entry->do_sv) if(ajStrPrefixC(line,"SV") || ajStrPrefixC(line,"IV")) /* emblcds database format */ embBtreeEmblAC(line,entry->sv); if(entry->do_accession) if(ajStrPrefixC(line,"AC") || ajStrPrefixC(line,"PA")) /* emblcds database format */ embBtreeEmblAC(line,entry->ac); if(entry->do_keyword) if(ajStrPrefixC(line,"KW")) embBtreeEmblKW(line,entry->kw,entry->kwlen); if(entry->do_description) if(ajStrPrefixC(line,"DE")) embBtreeEmblDE(line,entry->de,entry->delen); if(entry->do_taxonomy) if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS")) embBtreeEmblTX(line,entry->tx,entry->txlen); } ajStrDel(&line); return ajTrue; }
static void jaspextract_readmatrixlist(AjPTable mtable, const AjPStr directory) { const AjPStr datadir = NULL; AjPStr matrixfile = NULL; AjPFile inf = NULL; AjPStr line = NULL; AjPStr key = NULL; AjPStr value = NULL; matrixfile = ajStrNew(); datadir = ajDatafileValuePath(); if(!datadir) ajFatal("jaspextract: Cannot determine the EMBOSS data directory"); ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE); if(!ajFilenameExistsRead(matrixfile)) ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR " "one\nNo matrix_list.txt file found",directory); inf = ajFileNewInNameS(matrixfile); if(!inf) ajFatal("Cannot open input file: %S",matrixfile); while(ajReadline(inf,&line)) { key = ajStrNew(); if(ajFmtScanS(line,"%S",&key) != 1) { ajStrDel(&key); continue; } value = ajStrNew(); ajStrAssignS(&value,line); ajTablePut(mtable,(void *)key, (void *)value); } ajFileClose(&inf); ajStrDel(&matrixfile); ajStrDel(&line); return; }
/* @funcstatic seqwords_keysearch ******************************************** ** ** Search swissprot with terms structure and writes a hitlist structure ** ** @param [r] inf [AjPFile] File pointer to swissprot database ** @param [r] terms [AjPTerms] Terms object pointer ** @param [w] hits [EmbPHitlist*] Hitlist object pointer ** ** @return [AjBool] True on success ** @@ ******************************************************************************/ static AjBool seqwords_keysearch(AjPFile inf, AjPTerms terms, EmbPHitlist *hits) { AjPStr line =NULL; /* Line of text. */ AjPStr id =NULL; /* Line of text. */ AjPStr temp =NULL; ajint s =0; /* Temp. start of hit value. */ ajint e =0; /* Temp. end of hit value. */ AjPInt start =NULL; /* Array of start of hit(s). */ AjPInt end =NULL; /* Array of end of hit(s). */ ajint nhits =0; /* Number of hits. */ ajint x =0; AjBool foundkw =ajFalse; AjBool foundft =ajFalse; /* Check for valid args. */ if(!inf) return ajFalse; /* Allocate strings and arrays. */ line = ajStrNew(); id = ajStrNew(); start = ajIntNew(); end = ajIntNew(); /* Start of main loop. */ while((ajReadlineTrim(inf,&line))) { /* Parse the AC line. */ if(ajStrPrefixC(line,"AC")) { /* Copy accesion number and remove the ';' from the end. */ ajFmtScanS(line, "%*s %S", &id); ajStrExchangeCC(&id, ";", "\0"); /* Reset flags & no. hits. */ foundkw=ajFalse; foundft=ajFalse; nhits=0; } /* Search the description and keyword lines with search terms. */ else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW")))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a DE or KW line begins with a search ** term, we must add a leading and trailing space to line. ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); ajStrInsertC(&line, 0, " "); for (x = 0; x < terms->N; x++) /* Search term is found. */ if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { foundkw=ajTrue; break; } } /* Search the feature table line with search terms. */ else if((ajStrPrefixC(line,"FT DOMAIN"))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a FT line ends with a search ** term, we must add a trailing space to line ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); for (x = 0; x < terms->N; x++) if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { /* Search term is found. */ foundft = ajTrue; nhits++; /* Assign start and end of hit. */ ajFmtScanS(line, "%*s %*s %d %d", &s, &e); ajIntPut(&start, nhits-1, s); ajIntPut(&end, nhits-1, e); break; } } /* Parse the sequence. */ else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) || (foundft == ajTrue)))) { /* Allocate memory for temp. sequence. */ temp = ajStrNew(); /* Read the sequence into hitlist structure. */ while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//")) /* Read sequence line into temp. */ ajStrAppendC(&temp,ajStrGetPtr(line)+3); /* Clean up temp. sequence. */ ajStrRemoveWhite(&temp); /*Priority is given to domain (rather than full length) sequence.*/ if(foundft) { for(x=0;x<nhits;x++) { /* Increment counter of hits for subsequent hits*/ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure. */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Assign start and end of hit. */ (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x); (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x); /* Extract sequence within specified range */ ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, (*hits)->hits[(*hits)->N - 1]->Start - 1, (*hits)->hits[(*hits)->N - 1]->End - 1); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } } else { /* Increment counter of hits */ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Extract whole sequence */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); (*hits)->hits[(*hits)->N - 1]->Start = 1; (*hits)->hits[(*hits)->N - 1]->End = ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } /* Free temp. sequence */ ajStrDel(&temp); } } /* Clean up */ ajStrDel(&line); ajStrDel(&id); ajIntDel(&start); ajIntDel(&end); return ajTrue; }
/* @funcstatic seqwords_TermsRead ********************************************* ** ** Read the next Terms object from a file in embl-like format. The search ** terms are modified with a leading and trailing space. ** ** @param [r] inf [AjPFile] Input file stream ** @param [w] thys [AjPTerms*] Terms object ** ** @return [AjBool] True on succcess ** @@ *****************************************************************************/ static AjBool seqwords_TermsRead(AjPFile inf, AjPTerms *thys) { AjPStr line =NULL; /* Line of text. */ AjPStr temp =NULL; AjPList list_terms =NULL; /* List of keywords for a scop node*/ AjBool ok =ajFalse; AjPStr type = NULL; /* Memory management */ (*thys)=seqwords_TermsNew(); list_terms = ajListstrNew(); line = ajStrNew(); type = ajStrNew(); /* Read first line. */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line,"//")) { if(ajStrPrefixC(line,"XX")) { ok = ajReadlineTrim(inf,&line); continue; } else if(ajStrPrefixC(line,"TY")) { ajFmtScanS(line, "%*s %S", &type); if(ajStrMatchC(type, "SCOP")) (*thys)->Type = ajSCOP; else if(ajStrMatchC(type, "CATH")) (*thys)->Type = ajCATH; } else if(ajStrPrefixC(line,"CL")) { ajStrAssignC(&(*thys)->Class,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Class); } else if(ajStrPrefixC(line,"AR")) { ajStrAssignC(&(*thys)->Architecture,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Architecture); } else if(ajStrPrefixC(line,"TP")) { ajStrAssignC(&(*thys)->Topology,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Topology); } else if(ajStrPrefixC(line,"FO")) { ajStrAssignC(&(*thys)->Fold,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Fold,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Fold); } else if(ajStrPrefixC(line,"SF")) { ajStrAssignC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Superfamily); } else if(ajStrPrefixC(line,"FA")) { ajStrAssignC(&(*thys)->Family,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Family,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Family); } else if(ajStrPrefixC(line,"TE")) { /* Copy and clean up term. */ temp = ajStrNew(); ajStrAssignC(&temp,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&temp); /* Append a leading and trailing space to search term*/ ajStrAppendK(&temp, ' '); ajStrInsertC(&temp, 0, " "); /* Add the current term to the list. */ ajListstrPush(list_terms,temp); } ok = ajReadlineTrim(inf,&line); } if(!ok) { /* Clean up. */ ajListstrFree(&list_terms); ajStrDel(&line); /* Return. */ return ajFalse; } /* Convert the AjPList of terms to array of AjPSeq's. */ if(!((*thys)->N=ajListstrToarray((AjPList)list_terms,&(*thys)->Keywords))) ajWarn("Zero sized list of terms passed into seqwords_TermsRead"); /* Clean up. Free the list (not the nodes!). */ ajListstrFree(&list_terms); ajStrDel(&line); ajStrDel(&type); return ajTrue; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPReport outf = NULL; AjPFile inf = NULL; ajint begin; ajint end; AjPList l = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPStr acc = NULL; AjPStr bf = NULL; AjPStr menu; AjPStr pattern = NULL; AjPStr opattern = NULL; AjPStr pname = NULL; AjPStr key = NULL; AjPStr value = NULL; AjPTable atable = NULL; AjPTable btable = NULL; ajint mismatch; ajint minlength; ajint sum; ajint v; char cp; const char *p; embInit("tfscan", argc, argv); seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetReport("outfile"); mismatch = ajAcdGetInt("mismatch"); minlength = ajAcdGetInt("minlength"); menu = ajAcdGetListSingle("menu"); pname = ajStrNew(); cp=ajStrGetCharFirst(menu); if(cp=='F') ajStrAssignC(&pname,"tffungi"); else if(cp=='I') ajStrAssignC(&pname,"tfinsect"); else if(cp=='O') ajStrAssignC(&pname,"tfother"); else if(cp=='P') ajStrAssignC(&pname,"tfplant"); else if(cp=='V') ajStrAssignC(&pname,"tfvertebrate"); else if(cp=='C') inf = ajAcdGetDatafile("custom"); if(cp!='C') { inf = ajDatafileNewInNameS(pname); if(!inf) ajFatal("Either EMBOSS_DATA undefined or TFEXTRACT needs running"); } name = ajStrNew(); acc = ajStrNew(); bf = ajStrNewC(""); substr = ajStrNew(); line = ajStrNew(); pattern = ajStrNewC("AA"); opattern = ajStrNew(); while(ajSeqallNext(seqall, &seq)) { begin=ajSeqallGetseqBegin(seqall); end=ajSeqallGetseqEnd(seqall); ajStrAssignC(&name,ajSeqGetNameC(seq)); strand=ajSeqGetSeqCopyS(seq); ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1); ajStrFmtUpper(&substr); l=ajListNew(); atable = ajTablestrNew(1000); btable = ajTablestrNew(1000); sum=0; while(ajReadlineTrim(inf,&line)) { p = ajStrGetPtr(line); if(!*p || *p=='#' || *p=='\n' || *p=='!') continue; ajFmtScanS(line,"%S%S%S",&pname,&pattern,&acc); p += ajStrGetLen(pname); while(*p && *p==' ') ++p; p += ajStrGetLen(pattern); while(*p && *p==' ') ++p; p += ajStrGetLen(acc); while(*p && *p==' ') ++p; ajStrAssignS(&opattern,pattern); ajStrAssignC(&bf,p); /* rest of line */ v = embPatVariablePattern(pattern,substr,pname,l,0, mismatch,begin); if(v) { key = ajStrNewS(pname); value = ajStrNewS(acc); ajTablePut(atable,(void *)key,(void *)value); key = ajStrNewS(pname); value = ajStrNewS(bf); ajTablePut(btable,(void *)key,(void *)value); } sum += v; } if(sum) tfscan_print_hits(&l,sum,outf,atable,seq,minlength, btable); ajFileSeek(inf,0L,0); ajListFree(&l); ajTablestrFree(&atable); ajTablestrFree(&btable); ajStrDel(&strand); } ajStrDel(&line); ajStrDel(&name); ajStrDel(&acc); ajStrDel(&pname); ajStrDel(&opattern); ajStrDel(&bf); ajStrDel(&pattern); ajStrDel(&substr); ajSeqDel(&seq); ajFileClose(&inf); ajReportClose(outf); ajReportDel(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&menu); embExit(); return 0; }
/* @prog ssematch *********************************************************** ** ** Searches a DCF file (domain classification file) for secondary structure ** matches. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variables declarations */ AjPFile dcfin = NULL; /* Domain classification file */ AjPFile ssin = NULL; /* Secondary structure input file*/ AjPMatrixf matrix = NULL; /* Substitution matrix */ AjPFile out_ss = NULL; /* For ss top matches*/ AjPFile out_se = NULL; /* For se top matches*/ AjPFile outfile = NULL; /* Output file*/ AjPFile logf = NULL; /* Log file */ float gapopen_sss = 0.0; /* Gap insertion penalty */ float gapopen_sse = 0.0; float gapopen = 0.0; float gapextend_sss = 0.0; /* Gap extension penalty */ float gapextend_sse = 0.0; float gapextend = 0.0; ajint max_hits = 0; /* number of top alignments to display*/ ajint mode = 0; ajint x = 0; AjPScop temp_scop = NULL; /* scop object pointer*/ AjPList scop_list = NULL; /* list of scop objects for entire domain classification file */ AjIList iter = NULL; AjPStr msg = NULL; /* Pointer to String used for messages */ AjPStr line = NULL; AjPStr qse = NULL; /* query secondary structure elements*/ AjPStr qss = NULL; /* query secondary structure (by residue)*/ AjPSeq q3se = NULL; /* query secondary structure elements, 3-letter code*/ AjPSeq q3ss = NULL; /* query secondary structure (by residue), 3-letter code*/ AjPSeq query = NULL; /* Read data from acd */ embInitPV("ssematch",argc,argv,"DOMAINATRIX",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); ssin = ajAcdGetInfile("ssinfile"); max_hits = ajAcdGetInt("maxhits"); matrix = ajAcdGetMatrixf("datafile"); gapopen_sss = ajAcdGetFloat("rgapopen"); gapextend_sss = ajAcdGetFloat("rgapextend"); gapopen_sse = ajAcdGetFloat("egapopen"); gapextend_sse = ajAcdGetFloat("egapextend"); out_ss = ajAcdGetOutfile("outssfile"); out_se = ajAcdGetOutfile("outsefile"); logf = ajAcdGetOutfile("logfile"); /* Create list of scop objects for entire input domain classification file. */ scop_list = ajListNew(); while((temp_scop = (ajScopReadCNew(dcfin, "*")))) ajListPushAppend(scop_list,temp_scop); /* Error handing if domain classification file was empty. */ if(!(ajListGetLength(scop_list))) { ajWarn("Empty list from scop input file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Error handling in case of empty query file. */ if(ssin == NULL) { ajWarn("Empty secondary structure query file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Assign sequences in query file to sequence objects. */ qse = ajStrNew(); qss = ajStrNew(); while(ajReadlineTrim(ssin,&line)) { /* SE string */ if(ajStrPrefixC(line,"SE")) { ajFmtScanS(line, "%*s %S", &qse); /* Convert this string to 3-letter code & then convert to AjPSeq object. */ q3se = ssematch_convertbases(qse); } /* SS string */ else if(ajStrPrefixC(line,"SS")) { while((ajReadlineTrim(ssin,&line)) && !ajStrPrefixC(line,"XX")) ajStrAppendS(&qss,line); ajStrRemoveWhite(&qss); /* Convert this string to 3-letter code & then to AjPSeq object. */ q3ss = ssematch_convertbases(qss); } } /* For se & then for ss, modes 0 & 1. */ for(mode = 0; mode <= 1; mode++) { /* Assign arguments for alignment function. */ if (mode == 0) { query = q3se; gapopen = gapopen_sse; gapextend = gapextend_sse; outfile = out_se; } else if(mode == 1) { query = q3ss; gapopen = gapopen_sss; gapextend = gapextend_sss; outfile = out_ss; } /* Iterate through list of scop objects & calculate alignment scores. */ iter=ajListIterNew(scop_list); while((temp_scop=(AjPScop)ajListIterGet(iter))) { /* The function extracts the se (mode 0) or ss (mode 1) subject sequences from the scop object, performs a Needleman-Wunsch global alignment with the query sequence & allocates the score to the Score element of the scop object*/ if(!(ssematch_NWScore(temp_scop , query, mode, matrix, gapopen, gapextend))) { ajFmtPrintF(logf, "%-15s\n", "ALIGNMENT"); ajFmtPrintF(logf, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajFmtPrintS(&msg, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajWarn(ajStrGetPtr(msg)); continue; } } ajListIterDel(&iter); temp_scop = NULL; /* Sort list of Scop objects by Score */ ajListSort(scop_list, ssematch_CompScoreInv); iter=ajListIterNew(scop_list); /* Write top-scoring hits to outfile. */ for(x=0; x < max_hits; x++ ) { temp_scop=(AjPScop)ajListIterGet(iter); /* Print score to output file. */ ajFmtPrintF(outfile, "XX ALIGNMENT SCORE %.3f\nXX\n", temp_scop->Score); /* Could also write alignment - later modification. */ if(!ajScopWrite(outfile, temp_scop)) ajFatal("Could not write output file %S\n", outfile); } ajListIterDel(&iter); temp_scop = NULL; } /* Memoryt management. */ ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajStrDel(&msg); ajStrDel(&line); ajStrDel(&qse); ajStrDel(&qss); ajSeqDel(&q3se); ajSeqDel(&q3ss); ajExit(); return 0; }
static void domainalign_ProcessStampFile(AjPStr in, AjPStr out, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ ajint blk = 1; /* Count of the current block in the input file. Block 1 is the numbering and protein sequences, Block 2 is the secondary structure, Block 3 is the Very/Less/Post similar records*/ AjBool ok = ajFalse; /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessStampFile"); /* Start of code for reading input file. Ignore everything up to first line beginning with 'Number'. */ while((ajReadlineTrim(inf,&line))) { /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) { ok = ajTrue; break; } } /* Read rest of input file. */ if(ok) { /* Write DOMAIN classification records to file. */ if(!(outf=ajFileNewOutNameS(out))) ajFatal("Could not open output file in domainalign_ProcessStampFile"); if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ", 75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75, " \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75, " \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } while((ajReadlineTrim(inf,&line))) { /* Increment counter for block of file. */ if((ajStrGetCharPos(line, 1)=='\0')) { blk++; if(blk==4) blk=1; continue; } /* Block of numbering line and protein sequences. */ if(blk==1) { /* Print the number line out as it is. */ if(ajStrPrefixC(line,"Number")) ajFmtPrintF(outf,"\n# %7s %S\n"," ", line); else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S", &temp1); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment " "with 'X'\n"); ajStrFmtUpper(&temp3); /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-15S%7d %S%7d\n", temp2, 0, temp3, 0); } } /* Secondary structure filled with '????' (unwanted). */ else if(blk==2) { continue; } /* Similarity lines. */ else { if(ajStrPrefixC(line,"Post")) { /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); /* Write post similar line out. */ ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ", temp3); } /* Ignore Very and Less similar lines. */ else continue; } } } else /* ok == ajFalse. */ { ajWarn("\n***********************************************\n" "* STAMP was called but output file was EMPTY! *\n" "* NO OUTPUT FILE GENERATED FOR THIS NODE. *\n" "***********************************************\n"); ajFmtPrintF(logf, "STAMP called but output file empty. " "No output file for this node!"); } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
/* @funcstatic domainalign_ProcessTcoffeeFile ********************************* ** ** Parses tcoffee output. ** ** @param [r] in [AjPStr] Name of TCOFFEE input file ** @param [r] align [AjPStr] Name of sequence alignment file for output ** @param [r] domain [AjPDomain] Domain being aligned ** @param [r] noden [ajint] Node-level of alignment** ** @param [r] logf [AjPFile] Log file ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_ProcessTcoffeeFile(AjPStr in, AjPStr align, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile"); if(!(outf=ajFileNewOutNameS(align))) ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile"); /*Write DOMAIN classification records to file*/ if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } /* Start of code for reading input file. */ /*Ignore everything up to first line beginning with 'Number'*/ while((ajReadlineTrim(inf,&line))) /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) break; /* Read rest of input file. */ while((ajReadlineTrim(inf,&line))) { if((ajStrGetCharPos(line, 1)=='\0')) continue; /* Print the number line out as it is. */ else if(ajStrPrefixC(line,"CLUSTAL")) continue; else if(ajStrPrefixC(line," ")) ajFmtPrintF(outf,"\n"); /* write out a block of protein sequences. */ else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S %S", &temp1,&temp3); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajStrFmtUpper(&temp3);*/ /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3); } } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
/* @funcstatic newcoils_read_matrix ******************************************* ** ** Reads the matrix and stores in a hept_pref structure ** ** @param [u] inf [AjPFile] matrix input file ** @return [struct hept_pref*] Matrix data for heptad preference ******************************************************************************/ static struct hept_pref* newcoils_read_matrix(AjPFile inf) { ajint i; ajint j; ajint pt; ajint aa_len; ajint win; float m_g; float sd_g; float m_cc; float sd_cc; float sc; float hept[NCHEPTAD]; AjPStr buff; const char *pbuff; struct hept_pref *h; buff = ajStrNew(); aa_len = strlen(NCAAs); AJNEW(h); AJCNEW(h->m,aa_len); for(i=0; i<aa_len; ++i) { AJCNEW(h->m[i],NCHEPTAD); for(j=0; j<NCHEPTAD; ++j) h->m[i][j] = -1; } AJNEW(h->f); h->n = 0; h->smallest = 1.0; while(ajReadlineTrim(inf,&buff)) { pbuff = ajStrGetPtr(buff); if(*pbuff != '%') { if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0)) { i = h->n; if(strncmp(pbuff,"uw ",3)==0) h->f[i].w = 0; else h->f[i].w = 1; ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc, &sd_cc,&m_g,&sd_g,&sc); h->f[i].win = win; h->f[i].m_cc = (float)m_cc; h->f[i].sd_cc = (float)sd_cc; h->f[i].m_g = (float)m_g; h->f[i].sd_g = (float)sd_g; h->f[i].sc = (float)sc; h->n++; AJCRESIZE(h->f,(h->n)+1); if((h->n)>=9) ajFatal("Too many window parms in matrix file\n"); } else if(*pbuff>='A' && *pbuff<='Z') { /* AA data */ pt = (int)(pbuff[0]-'A'); if(h->m[pt][0]==-1) { ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0], &hept[1],&hept[2],&hept[3],&hept[4], &hept[5],&hept[6]); for(i=0; i<NCHEPTAD; ++i) { h->m[pt][i] = (float)hept[i]; if(h->m[pt][i]>0) { if(h->m[pt][i]<h->smallest) h->smallest = h->m[pt][i]; } else h->m[pt][i]=-1; /* Don't permit zero values */ } } else ajWarn("multiple entries for AA %c in matrix file\n", *pbuff); } else { ajWarn("strange characters in matrix file\n"); ajWarn("Ignoring line: %S\n",buff); } } } ajStrDel(&buff); return h; }
int main(int argc, char **argv) { AjPList sigin = NULL; /* Signature input file names. */ AjPStr signame = NULL; /* Name of signature file. */ AjPFile sigf = NULL; /* Signature input file. */ EmbPSignature sig = NULL; /* Signature. */ AjPList siglist = NULL; /* List of signatures. */ AjIList sigiter = NULL; /* Iterator for siglist. */ AjBool sigok = ajFalse; /* True if signature processed ok. */ EmbPHit hit = NULL; /* Hit to store signature-sequence match. */ AjPList hits = NULL; /* List of hits */ AjPList ligands = NULL; /* List of top-scoring ligands. */ AjPSeqall database=NULL; /* Protein sequences to match signature against. */ AjPSeq seq = NULL; /* Current sequence. */ AjPMatrixf sub =NULL; /* Residue substitution matrix. */ float gapo =0.0; /* Gap insertion penalty. */ float gape =0.0; /* Gap extension penalty. */ AjPStr nterm=NULL; /* Holds N-terminal matching options from acd. */ ajint ntermi=0; /* N-terminal option as int. */ AjPFile hitsf =NULL; /* Hits output file. sequence matches. */ AjPDirout hitsdir=NULL; /* Directory of hits files (output). */ AjPFile alignf =NULL; /* Alignment output file. */ AjPDirout aligndir=NULL; /* Directory of alignment files (output). */ AjPFile resultsf =NULL; /* Results file (output). */ AjPDirout resultsdir=NULL; /* Directory of results files (output). */ AjPStr mode = NULL; /* Mode, 1: Patch score mode, 2: Site score mode. */ ajint modei = 0; /* Selected mode as integer. */ SigPLighit lighit = NULL; embInitPV("sigscanlig", argc, argv, "SIGNATURE",VERSION); /* GET VALUES FROM ACD */ sigin = ajAcdGetDirlist("siginfilesdir"); database = ajAcdGetSeqall("dbseqall"); sub = ajAcdGetMatrixf("sub"); gapo = ajAcdGetFloat("gapo"); gape = ajAcdGetFloat("gape"); nterm = ajAcdGetListSingle("nterm"); hitsdir = ajAcdGetOutdir("hitsoutdir"); aligndir = ajAcdGetOutdir("alignoutdir"); resultsdir = ajAcdGetOutdir("resultsoutdir"); mode = ajAcdGetListSingle("mode"); /*Assign N-terminal matching option etc. */ ajFmtScanS(nterm, "%d", &ntermi); modei = (ajint) ajStrGetCharFirst(mode)-48; /* READ & PROCESS SIGNATURES */ siglist = ajListNew(); while(ajListPop(sigin, (void **) &signame)) { /* Read signature files, compile signatures and populate list. */ sigok = ajFalse; if((sigf = ajFileNewInNameS(signame))) if((sig = embSignatureReadNew(sigf))) if(embSignatureCompile(&sig, gapo, gape, sub)) { sigok=ajTrue; ajListPushAppend(siglist, sig); /* ajFmtPrint("Id: %S\nDomid: %S\nLigid: %S\nns: %d\n" "sn: %d\nnp: %d\npn: %d\nminpatch: %d\n" "maxgap: %d\n", sig->Id, sig->Domid, sig->Ligid, sig->ns, sig->sn, sig->np, sig->pn, sig->minpatch, sig->maxgap); */ } if(!sigok) { ajWarn("Could not process %S", signame); embSignatureDel(&sig); ajFileClose(&sigf); ajStrDel(&signame); continue; } ajFileClose(&sigf); ajStrDel(&signame); } ajListFree(&sigin); /* ALIGN EACH QUERY SEQUENCE TO LIST OF SIGNATURE */ while(ajSeqallNext(database, &seq)) { /* Do sequence-signature alignment and save results */ hits = ajListNew(); sigiter = ajListIterNew(siglist); while((sig = (EmbPSignature) ajListIterGet(sigiter))) { if(embSignatureAlignSeq(sig, seq, &hit, ntermi)) { hit->Sig = sig; ajListPushAppend(hits, hit); hit=NULL; /* To force reallocation by embSignatureAlignSeq */ } /* There has to be a hit for each signature for correct generation of the LHF by sigscanlig_WriteFasta. So push an empty hit if necessary. 'hit'=NULL forces reallocation by embSignatureAlignSeq. */ /* else { hit = embHitNew(); ajListPushAppend(hits, hit); hit=NULL; } */ } ajListIterDel(&sigiter); /* Rank-order the list of hits by score */ ajListSort(hits, embMatchinvScore); /* Write ligand hits & alignment files (output) */ hitsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), hitsdir); alignf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), aligndir); resultsf = ajFileNewOutNameDirS(ajSeqGetNameS(seq), resultsdir); /* if((!sigscanlig_WriteFasta(hitsf, siglist, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); */ if((!sigscanlig_WriteFasta(hitsf, hits))) ajFatal("Bad args to sigscanlig_WriteFasta"); if((!sigscanlig_SignatureAlignWriteBlock(alignf, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); /* if((!sigscanlig_SignatureAlignWriteBlock(alignf, siglist, hits))) ajFatal("Bad args to sigscanlig_SignatureAlignWriteBlock"); */ /* Sort list of hits by ligand type and site number. Process list of ligands and print out. */ ajListSortTwo(hits, embMatchLigid, embMatchSN); if(modei==1) ligands = sigscanlig_score_ligands_patch(hits); else if(modei==2) ligands = sigscanlig_score_ligands_site(hits); else ajFatal("Unrecognised mode"); sigscanlig_WriteResults(ligands, resultsf); ajFileClose(&hitsf); ajFileClose(&alignf); ajFileClose(&resultsf); /* Memory management */ while(ajListPop(hits, (void **) &hit)) embHitDel(&hit); ajListFree(&hits); while(ajListPop(ligands, (void **) &lighit)) sigscanlig_LigHitDel(&lighit); ajListFree(&ligands); } /* MEMORY MANAGEMENT */ while(ajListPop(siglist, (void **) &sig)) embSignatureDel(&sig); ajListFree(&siglist); ajSeqallDel(&database); ajMatrixfDel(&sub); ajStrDel(&nterm); ajDiroutDel(&hitsdir); ajDiroutDel(&aligndir); ajDiroutDel(&resultsdir); ajStrDel(&mode); embExit(); return 0; }
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; AjBool ret = ajTrue; AjPStr sumline = NULL; line = ajStrNewC(""); sumline = ajStrNew(); while(!ajStrPrefixC(line,"//") && ret) { if(ajStrPrefixC(line,"LOCUS")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); } if(entry->do_sv) if(ajStrPrefixC(line,"VERSION")) embBtreeGenBankAC(line,entry->sv); if(entry->do_accession) if(ajStrPrefixC(line,"ACCESSION")) embBtreeGenBankAC(line,entry->ac); if(entry->do_keyword) if(ajStrPrefixC(line,"KEYWORDS")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankKW(sumline,entry->kw,entry->kwlen); continue; } if(entry->do_description) if(ajStrPrefixC(line,"DEFINITION")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankDE(sumline,entry->de,entry->delen); continue; } if(entry->do_taxonomy) if(ajStrPrefixC(line,"SOURCE")) { ret = ajReadlineTrim(inf,&line); ajStrAppendC(&line,";"); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankTX(sumline,entry->tx,entry->txlen); continue; } pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) ret = ajFalse; } ajStrDel(&line); ajStrDel(&sumline); return ret; }
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropAmino *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE); for(i=0; i < EMBPROPSIZE; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"aa")) ajFatal("Incorrect (old?) format amino data file"); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) ajFatal("Amino file line doesn't begin with a single character"); i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Amino file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d", &ret[i]->tiny, &ret[i]->sm_all, &ret[i]->aliphatic, &ret[i]->aromatic, &ret[i]->nonpolar, &ret[i]->polar, &ret[i]->charge, &ret[i]->pve, &ret[i]->nve, &ret[i]->extcoeff); if(n!= 10) ajFatal("Only %d columns in amino file - expected %d",n+1,11); } ajStrDel(&line); ajStrDel(&token); return ret; }
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropMolwt *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE+2); for(i=0; i < EMBPROPSIZE+2; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"Mol")) ajFatal("Incorrect format molwt file: '%S'", line); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) { if(ajStrPrefixC(token,"HYDROGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPHINDEX]->average, &ret[EMBPROPHINDEX]->mono) != 2) ajFatal("Bad format hydrogen data line"); } else if(ajStrPrefixC(token,"OXYGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPOINDEX]->average, &ret[EMBPROPOINDEX]->mono) != 2) ajFatal("Bad format oxygen data line"); } else if(ajStrPrefixC(token,"WATER")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPWINDEX]->average, &ret[EMBPROPWINDEX]->mono) != 2) ajFatal("Bad format water data line"); } else ajFatal("Unknown molwt token %S",token); continue; } i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Molwt file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%lf%lf", &ret[i]->average, &ret[i]->mono); if(n != 2) ajFatal("Only %d columns in amino file - expected %d",n,3); } ajStrDel(&line); ajStrDel(&token); return ret; }
/* @funcstatic acdrelations_procacdfile *************************************** ** ** Process ACD file and write new ACD file with new relations: attributes ** added (replaced if necessary). ** ** @param [r] inf [AjPFile] ACD input file ** @param [r] outf [AjPFile] ACD output file ** @param [r] P [PEdam] edam object ** @param [r] T [PKtype] ktype object ** @return [void] ** @@ ******************************************************************************/ static void acdrelations_procacdfile (AjPFile inf, AjPFile outf, PEdam P, PKtype T) { AjPStr line = NULL; AjPStr tok = NULL; AjPStr acdtype = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr *strarr = NULL; ajint nstr = 0; /* Allocate memory */ line = ajStrNew(); tok = ajStrNew(); acdtype = ajStrNew(); strlist = ajListstrNew(); /* Read next line */ while(ajReadline(inf,&line)) { ajFmtScanS(line, "%S", &tok); /* Write application definition or section definition out as-is */ if(ajStrMatchC(tok, "application:") || ajStrMatchC(tok, "section:")) { ajFmtPrintF(outf, "%S", line); while(ajReadline(inf,&line)) { ajFmtPrintF(outf, "%S", line); ajFmtScanS(line, "%S", &tok); if(ajStrMatchC(tok, "]")) break; } } /* Write variables, endsection definitions and comments out as-is */ else if(ajStrMatchC(tok, "variable:") || ajStrMatchC(tok, "endsection:") || ajStrMatchC(tok, "#")) ajFmtPrintF(outf, "%S", line); /* Write out blank lines as-is */ else if (!ajFmtScanS(line, "%S", &tok)) ajFmtPrintF(outf, "%S", line); /* Process data definition */ else /* First line of data definition */ { /* Process and write datatype line */ ajFmtPrintF(outf, "%S", line); ajFmtScanS(line, "%S", &acdtype); ajStrRemoveSetC(&acdtype, ":"); /* Process subsequent (attribute) lines */ while(ajReadline(inf,&line)) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, line); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); ajFmtScanS(line, "%S", &tok); /* Reached end of data definition */ if(ajStrMatchC(tok, "]")) { nstr = ajListstrToarray(strlist, &strarr); /* Write relations: line */ acdrelations_writerelations(outf, acdtype, strarr, nstr, P, T); AJFREE(strarr); ajListstrFreeData(&strlist); strlist = ajListstrNew(); ajFmtPrintF(outf, "%S", line); break; } /* Ignore existing relations: lines */ else if(ajStrMatchC(tok, "relations:")) continue; ajFmtPrintF(outf, "%S", line); } } } /* Free memory */ ajStrDel(&line); ajStrDel(&tok); ajStrDel(&acdtype); ajListstrFreeData(&strlist); return; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
static AjPTable jaspscan_ReadFamList(const AjPStr jaspdir) { AjPTable ret = NULL; AjPStr lfile = NULL; AjPStr line = NULL; AjPStr key = NULL; AjPStr str = NULL; PJspmat info = NULL; AjPFile inf = NULL; const char *p = NULL; const char *q = NULL; lfile = ajStrNew(); line = ajStrNew(); str = ajStrNew(); ajFmtPrintS(&lfile,"%S%s%s",jaspdir,SLASH_STRING,J_LIST); inf = ajDatafileNewInNameS(lfile); if(!inf) ajFatal("Matrix list file %S not found",lfile); ret = ajTablestrNewLen(JASPTAB_GUESS); while(ajReadlineTrim(inf,&line)) { info = jaspscan_infonew(); ajFmtScanS(line,"%S%S",&info->id,&info->num); p = ajStrGetPtr(line); while(*p !='\t') ++p; ++p; while(*p != '\t') ++p; ++p; q = p; while(*q != '\t') ++q; ajStrAssignSubC(&info->name,p,0,q-p-1); ++q; p = q; while(*q != ';') ++q; ajStrAssignSubC(&info->klass,p,0,q-p-1); ajStrRemoveWhiteExcess(&info->klass); p = q+1; while(*p) { q = p; while(* q && *q != ';') ++q; ajStrAssignSubC(&str,p,0,q-p-1); ajStrRemoveWhiteExcess(&str); jaspscan_coretoken(info, str); if(!*q) p = q; else p = q +1; } key = ajStrNew(); ajStrAssignS(&key,info->id); ajTablePut(ret,(void *)key,(void *) info); } ajFileClose(&inf); ajStrDel(&lfile); ajStrDel(&line); ajStrDel(&str); return ret; }
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; AjBool ret = ajTrue; AjPStr sumline = NULL; line = ajStrNewC(""); sumline = ajStrNew(); while(!ajStrPrefixC(line,"//") && ret) { if(ajStrPrefixC(line,"LOCUS")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); } if(svfield) if(ajStrPrefixC(line,"VERSION")) embBtreeGenBankAC(line,svfield->data); if(accfield) if(ajStrPrefixC(line,"ACCESSION")) embBtreeGenBankAC(line,accfield->data); if(keyfield) if(ajStrPrefixC(line,"KEYWORDS")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankKW(sumline,keyfield->data,keyfield->len); continue; } if(desfield) if(ajStrPrefixC(line,"DEFINITION")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankDE(sumline,desfield->data,desfield->len); continue; } if(orgfield) if(ajStrPrefixC(line,"SOURCE")) { ret = ajReadlineTrim(inf,&line); ajStrAppendC(&line,";"); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankTX(sumline,orgfield->data,orgfield->len); continue; } pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) ret = ajFalse; } ajStrDel(&line); ajStrDel(&sumline); return ret; }