static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; line = ajStrNewC(""); while(!ajStrPrefixC(line,"//")) { pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } if(ajStrPrefixC(line,"ID")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); ajStrTrimEndC(&entry->id, ";"); /* ++global; printf("%d. %s\n",global,ajStrGetPtr(entry->id)); */ if(entry->do_sv) embBtreeEmblSV(line,entry->sv); } if(entry->do_sv) if(ajStrPrefixC(line,"SV") || ajStrPrefixC(line,"IV")) /* emblcds database format */ embBtreeEmblAC(line,entry->sv); if(entry->do_accession) if(ajStrPrefixC(line,"AC") || ajStrPrefixC(line,"PA")) /* emblcds database format */ embBtreeEmblAC(line,entry->ac); if(entry->do_keyword) if(ajStrPrefixC(line,"KW")) embBtreeEmblKW(line,entry->kw,entry->kwlen); if(entry->do_description) if(ajStrPrefixC(line,"DE")) embBtreeEmblDE(line,entry->de,entry->delen); if(entry->do_taxonomy) if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS")) embBtreeEmblTX(line,entry->tx,entry->txlen); } ajStrDel(&line); return ajTrue; }
static AjBool dbxflat_ParseEmbl(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; line = ajStrNewC(""); while(!ajStrPrefixC(line,"//")) { pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } if(ajStrPrefixC(line,"ID")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); ajStrTrimEndC(&entry->id, ";"); /* ++global; printf("%d. %s\n",global,ajStrGetPtr(entry->id)); */ if(svfield) embBtreeEmblSV(line,svfield->data); } if(svfield) if(ajStrPrefixC(line,"SV") || ajStrPrefixC(line,"IV")) /* emblcds database format */ embBtreeEmblAC(line,svfield->data); if(accfield) if(ajStrPrefixC(line,"AC") || ajStrPrefixC(line,"PA")) /* emblcds database format */ embBtreeEmblAC(line,accfield->data); if(keyfield) if(ajStrPrefixC(line,"KW")) embBtreeEmblKW(line,keyfield->data,keyfield->len); if(desfield) if(ajStrPrefixC(line,"DE")) embBtreeEmblDE(line,desfield->data,desfield->len); if(orgfield) if(ajStrPrefixC(line,"OC") || ajStrPrefixC(line,"OS")) embBtreeEmblTX(line,orgfield->data,orgfield->len); } ajStrDel(&line); return ajTrue; }
static void acdrelations_readtypefile (AjPFile inf, PKtype *T) { AjPStr line = NULL; PKtypedat dattmp = NULL; AjPList datlist = NULL; if(!T) ajFatal("Null arg error 1 in acdrelations_readtypefile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readtypefile"); /* Allocate memory */ line = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Create object for holding line */ dattmp = ajKtypedatNew(); /* Tokenise line delimited by '|' Parse first token (value of knowntype: attribute) */ ajStrAssignS(&dattmp->ktype, ajStrParseC(line, "|")); ajStrRemoveSetC(&dattmp->ktype, "_"); ajStrRemoveWhite(&dattmp->ktype); /* Parse second token (ACD datatype) */ ajStrAssignS(&dattmp->acdtype, ajStrParseC(NULL, "|")); /* Parse third token (EDAM relations: value ) */ ajStrAssignS(&dattmp->edam, ajStrParseC(NULL, "|")); /* Push line onto list */ ajListPushAppend(datlist, dattmp); } /* Write PKtype structure */ ((*T)->n) = ajListToarray(datlist, (void***) &((*T)->dat)); /* Free memory */ ajStrDel(&line); ajListFree(&datlist); return; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
/* @prog ssematch *********************************************************** ** ** Searches a DCF file (domain classification file) for secondary structure ** matches. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variables declarations */ AjPFile dcfin = NULL; /* Domain classification file */ AjPFile ssin = NULL; /* Secondary structure input file*/ AjPMatrixf matrix = NULL; /* Substitution matrix */ AjPFile out_ss = NULL; /* For ss top matches*/ AjPFile out_se = NULL; /* For se top matches*/ AjPFile outfile = NULL; /* Output file*/ AjPFile logf = NULL; /* Log file */ float gapopen_sss = 0.0; /* Gap insertion penalty */ float gapopen_sse = 0.0; float gapopen = 0.0; float gapextend_sss = 0.0; /* Gap extension penalty */ float gapextend_sse = 0.0; float gapextend = 0.0; ajint max_hits = 0; /* number of top alignments to display*/ ajint mode = 0; ajint x = 0; AjPScop temp_scop = NULL; /* scop object pointer*/ AjPList scop_list = NULL; /* list of scop objects for entire domain classification file */ AjIList iter = NULL; AjPStr msg = NULL; /* Pointer to String used for messages */ AjPStr line = NULL; AjPStr qse = NULL; /* query secondary structure elements*/ AjPStr qss = NULL; /* query secondary structure (by residue)*/ AjPSeq q3se = NULL; /* query secondary structure elements, 3-letter code*/ AjPSeq q3ss = NULL; /* query secondary structure (by residue), 3-letter code*/ AjPSeq query = NULL; /* Read data from acd */ embInitPV("ssematch",argc,argv,"DOMAINATRIX",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); ssin = ajAcdGetInfile("ssinfile"); max_hits = ajAcdGetInt("maxhits"); matrix = ajAcdGetMatrixf("datafile"); gapopen_sss = ajAcdGetFloat("rgapopen"); gapextend_sss = ajAcdGetFloat("rgapextend"); gapopen_sse = ajAcdGetFloat("egapopen"); gapextend_sse = ajAcdGetFloat("egapextend"); out_ss = ajAcdGetOutfile("outssfile"); out_se = ajAcdGetOutfile("outsefile"); logf = ajAcdGetOutfile("logfile"); /* Create list of scop objects for entire input domain classification file. */ scop_list = ajListNew(); while((temp_scop = (ajScopReadCNew(dcfin, "*")))) ajListPushAppend(scop_list,temp_scop); /* Error handing if domain classification file was empty. */ if(!(ajListGetLength(scop_list))) { ajWarn("Empty list from scop input file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Error handling in case of empty query file. */ if(ssin == NULL) { ajWarn("Empty secondary structure query file\n"); ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajListIterDel(&iter); ajExit(); return 1; } /* Assign sequences in query file to sequence objects. */ qse = ajStrNew(); qss = ajStrNew(); while(ajReadlineTrim(ssin,&line)) { /* SE string */ if(ajStrPrefixC(line,"SE")) { ajFmtScanS(line, "%*s %S", &qse); /* Convert this string to 3-letter code & then convert to AjPSeq object. */ q3se = ssematch_convertbases(qse); } /* SS string */ else if(ajStrPrefixC(line,"SS")) { while((ajReadlineTrim(ssin,&line)) && !ajStrPrefixC(line,"XX")) ajStrAppendS(&qss,line); ajStrRemoveWhite(&qss); /* Convert this string to 3-letter code & then to AjPSeq object. */ q3ss = ssematch_convertbases(qss); } } /* For se & then for ss, modes 0 & 1. */ for(mode = 0; mode <= 1; mode++) { /* Assign arguments for alignment function. */ if (mode == 0) { query = q3se; gapopen = gapopen_sse; gapextend = gapextend_sse; outfile = out_se; } else if(mode == 1) { query = q3ss; gapopen = gapopen_sss; gapextend = gapextend_sss; outfile = out_ss; } /* Iterate through list of scop objects & calculate alignment scores. */ iter=ajListIterNew(scop_list); while((temp_scop=(AjPScop)ajListIterGet(iter))) { /* The function extracts the se (mode 0) or ss (mode 1) subject sequences from the scop object, performs a Needleman-Wunsch global alignment with the query sequence & allocates the score to the Score element of the scop object*/ if(!(ssematch_NWScore(temp_scop , query, mode, matrix, gapopen, gapextend))) { ajFmtPrintF(logf, "%-15s\n", "ALIGNMENT"); ajFmtPrintF(logf, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajFmtPrintS(&msg, "Could not align sequence in scop domain %S\n ", temp_scop->Entry); ajWarn(ajStrGetPtr(msg)); continue; } } ajListIterDel(&iter); temp_scop = NULL; /* Sort list of Scop objects by Score */ ajListSort(scop_list, ssematch_CompScoreInv); iter=ajListIterNew(scop_list); /* Write top-scoring hits to outfile. */ for(x=0; x < max_hits; x++ ) { temp_scop=(AjPScop)ajListIterGet(iter); /* Print score to output file. */ ajFmtPrintF(outfile, "XX ALIGNMENT SCORE %.3f\nXX\n", temp_scop->Score); /* Could also write alignment - later modification. */ if(!ajScopWrite(outfile, temp_scop)) ajFatal("Could not write output file %S\n", outfile); } ajListIterDel(&iter); temp_scop = NULL; } /* Memoryt management. */ ajFileClose(&dcfin); ajFileClose(&ssin); ajMatrixfDel(&matrix); ajFileClose(&out_ss); ajFileClose(&out_se); ajFileClose(&logf); while(ajListPop(scop_list, (void *) &temp_scop)) ajScopDel(&temp_scop); ajListFree(&scop_list); ajStrDel(&msg); ajStrDel(&line); ajStrDel(&qse); ajStrDel(&qss); ajSeqDel(&q3se); ajSeqDel(&q3ss); ajExit(); return 0; }
static void domainalign_ProcessStampFile(AjPStr in, AjPStr out, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ ajint blk = 1; /* Count of the current block in the input file. Block 1 is the numbering and protein sequences, Block 2 is the secondary structure, Block 3 is the Very/Less/Post similar records*/ AjBool ok = ajFalse; /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessStampFile"); /* Start of code for reading input file. Ignore everything up to first line beginning with 'Number'. */ while((ajReadlineTrim(inf,&line))) { /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) { ok = ajTrue; break; } } /* Read rest of input file. */ if(ok) { /* Write DOMAIN classification records to file. */ if(!(outf=ajFileNewOutNameS(out))) ajFatal("Could not open output file in domainalign_ProcessStampFile"); if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ", 75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75, " \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75, " \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } while((ajReadlineTrim(inf,&line))) { /* Increment counter for block of file. */ if((ajStrGetCharPos(line, 1)=='\0')) { blk++; if(blk==4) blk=1; continue; } /* Block of numbering line and protein sequences. */ if(blk==1) { /* Print the number line out as it is. */ if(ajStrPrefixC(line,"Number")) ajFmtPrintF(outf,"\n# %7s %S\n"," ", line); else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S", &temp1); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment " "with 'X'\n"); ajStrFmtUpper(&temp3); /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-15S%7d %S%7d\n", temp2, 0, temp3, 0); } } /* Secondary structure filled with '????' (unwanted). */ else if(blk==2) { continue; } /* Similarity lines. */ else { if(ajStrPrefixC(line,"Post")) { /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); /* Write post similar line out. */ ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ", temp3); } /* Ignore Very and Less similar lines. */ else continue; } } } else /* ok == ajFalse. */ { ajWarn("\n***********************************************\n" "* STAMP was called but output file was EMPTY! *\n" "* NO OUTPUT FILE GENERATED FOR THIS NODE. *\n" "***********************************************\n"); ajFmtPrintF(logf, "STAMP called but output file empty. " "No output file for this node!"); } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
/* @funcstatic domainalign_ProcessTcoffeeFile ********************************* ** ** Parses tcoffee output. ** ** @param [r] in [AjPStr] Name of TCOFFEE input file ** @param [r] align [AjPStr] Name of sequence alignment file for output ** @param [r] domain [AjPDomain] Domain being aligned ** @param [r] noden [ajint] Node-level of alignment** ** @param [r] logf [AjPFile] Log file ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_ProcessTcoffeeFile(AjPStr in, AjPStr align, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile"); if(!(outf=ajFileNewOutNameS(align))) ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile"); /*Write DOMAIN classification records to file*/ if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } /* Start of code for reading input file. */ /*Ignore everything up to first line beginning with 'Number'*/ while((ajReadlineTrim(inf,&line))) /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) break; /* Read rest of input file. */ while((ajReadlineTrim(inf,&line))) { if((ajStrGetCharPos(line, 1)=='\0')) continue; /* Print the number line out as it is. */ else if(ajStrPrefixC(line,"CLUSTAL")) continue; else if(ajStrPrefixC(line," ")) ajFmtPrintF(outf,"\n"); /* write out a block of protein sequences. */ else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S %S", &temp1,&temp3); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajStrFmtUpper(&temp3);*/ /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3); } } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
/* @funcstatic seqwords_TermsRead ********************************************* ** ** Read the next Terms object from a file in embl-like format. The search ** terms are modified with a leading and trailing space. ** ** @param [r] inf [AjPFile] Input file stream ** @param [w] thys [AjPTerms*] Terms object ** ** @return [AjBool] True on succcess ** @@ *****************************************************************************/ static AjBool seqwords_TermsRead(AjPFile inf, AjPTerms *thys) { AjPStr line =NULL; /* Line of text. */ AjPStr temp =NULL; AjPList list_terms =NULL; /* List of keywords for a scop node*/ AjBool ok =ajFalse; AjPStr type = NULL; /* Memory management */ (*thys)=seqwords_TermsNew(); list_terms = ajListstrNew(); line = ajStrNew(); type = ajStrNew(); /* Read first line. */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line,"//")) { if(ajStrPrefixC(line,"XX")) { ok = ajReadlineTrim(inf,&line); continue; } else if(ajStrPrefixC(line,"TY")) { ajFmtScanS(line, "%*s %S", &type); if(ajStrMatchC(type, "SCOP")) (*thys)->Type = ajSCOP; else if(ajStrMatchC(type, "CATH")) (*thys)->Type = ajCATH; } else if(ajStrPrefixC(line,"CL")) { ajStrAssignC(&(*thys)->Class,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Class); } else if(ajStrPrefixC(line,"AR")) { ajStrAssignC(&(*thys)->Architecture,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Architecture); } else if(ajStrPrefixC(line,"TP")) { ajStrAssignC(&(*thys)->Topology,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Topology); } else if(ajStrPrefixC(line,"FO")) { ajStrAssignC(&(*thys)->Fold,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Fold,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Fold); } else if(ajStrPrefixC(line,"SF")) { ajStrAssignC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Superfamily); } else if(ajStrPrefixC(line,"FA")) { ajStrAssignC(&(*thys)->Family,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Family,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Family); } else if(ajStrPrefixC(line,"TE")) { /* Copy and clean up term. */ temp = ajStrNew(); ajStrAssignC(&temp,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&temp); /* Append a leading and trailing space to search term*/ ajStrAppendK(&temp, ' '); ajStrInsertC(&temp, 0, " "); /* Add the current term to the list. */ ajListstrPush(list_terms,temp); } ok = ajReadlineTrim(inf,&line); } if(!ok) { /* Clean up. */ ajListstrFree(&list_terms); ajStrDel(&line); /* Return. */ return ajFalse; } /* Convert the AjPList of terms to array of AjPSeq's. */ if(!((*thys)->N=ajListstrToarray((AjPList)list_terms,&(*thys)->Keywords))) ajWarn("Zero sized list of terms passed into seqwords_TermsRead"); /* Clean up. Free the list (not the nodes!). */ ajListstrFree(&list_terms); ajStrDel(&line); ajStrDel(&type); return ajTrue; }
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, ajuint type, AjBool upper, AjBool lower) { AjPPatlistRegex patlist = NULL; AjPStr line = NULL; AjPStr pat = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPStr patstr = NULL; ajuint ifmt; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "regex"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistRegexNewType(type); ifmt = patternRegexFormat(fmt); if(ajStrGetCharFirst(patspec) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open regular expression file '%S'", patstr); return NULL; } line = ajStrNew(); pat = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); } break; default: while (ajBuffreadLineTrim(infile,&line)) { if (ajStrFindC(line,">")>-1) { npat++; if (ajStrGetLen(name)) { if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&name); ajStrSetClear(&pat); } ajStrCutStart(&line,1); ajStrAssignS (&name,line); if(!ajStrGetLen(name)) ajFmtPrintS(&name, "%S%u", namestr, npat); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&pat, patspec); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajStrAssignS(&name, namestr); ajPatternRegexNewList(patlist,name,pat); } ajStrDel(&name); ajStrDel(&namestr); ajStrDel(&patstr); ajStrDel(&line); ajStrDel(&pat); return patlist; }
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; AjBool ret = ajTrue; AjPStr sumline = NULL; line = ajStrNewC(""); sumline = ajStrNew(); while(!ajStrPrefixC(line,"//") && ret) { if(ajStrPrefixC(line,"LOCUS")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); } if(entry->do_sv) if(ajStrPrefixC(line,"VERSION")) embBtreeGenBankAC(line,entry->sv); if(entry->do_accession) if(ajStrPrefixC(line,"ACCESSION")) embBtreeGenBankAC(line,entry->ac); if(entry->do_keyword) if(ajStrPrefixC(line,"KEYWORDS")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankKW(sumline,entry->kw,entry->kwlen); continue; } if(entry->do_description) if(ajStrPrefixC(line,"DEFINITION")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankDE(sumline,entry->de,entry->delen); continue; } if(entry->do_taxonomy) if(ajStrPrefixC(line,"SOURCE")) { ret = ajReadlineTrim(inf,&line); ajStrAppendC(&line,";"); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankTX(sumline,entry->tx,entry->txlen); continue; } pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) ret = ajFalse; } ajStrDel(&line); ajStrDel(&sumline); return ret; }
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropAmino *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE); for(i=0; i < EMBPROPSIZE; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"aa")) ajFatal("Incorrect (old?) format amino data file"); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) ajFatal("Amino file line doesn't begin with a single character"); i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Amino file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d", &ret[i]->tiny, &ret[i]->sm_all, &ret[i]->aliphatic, &ret[i]->aromatic, &ret[i]->nonpolar, &ret[i]->polar, &ret[i]->charge, &ret[i]->pve, &ret[i]->nve, &ret[i]->extcoeff); if(n!= 10) ajFatal("Only %d columns in amino file - expected %d",n+1,11); } ajStrDel(&line); ajStrDel(&token); return ret; }
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropMolwt *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE+2); for(i=0; i < EMBPROPSIZE+2; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"Mol")) ajFatal("Incorrect format molwt file: '%S'", line); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) { if(ajStrPrefixC(token,"HYDROGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPHINDEX]->average, &ret[EMBPROPHINDEX]->mono) != 2) ajFatal("Bad format hydrogen data line"); } else if(ajStrPrefixC(token,"OXYGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPOINDEX]->average, &ret[EMBPROPOINDEX]->mono) != 2) ajFatal("Bad format oxygen data line"); } else if(ajStrPrefixC(token,"WATER")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPWINDEX]->average, &ret[EMBPROPWINDEX]->mono) != 2) ajFatal("Bad format water data line"); } else ajFatal("Unknown molwt token %S",token); continue; } i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Molwt file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%lf%lf", &ret[i]->average, &ret[i]->mono); if(n != 2) ajFatal("Only %d columns in amino file - expected %d",n,3); } ajStrDel(&line); ajStrDel(&token); return ret; }
/* @funcstatic acdrelations_writerelations ************************************ ** ** Writes relations: attribute for an ACD data definition ** The relations: values given in knowntypes.standard have highest precedence, ** then the values given in edamtoacd.dat ** Attribute values for a given datatype in edamtoacd.dat are in order of ** increasing precedence, i.e. the last line is highest and the relations: ** value will be used if all conditions are met. ** ** @param [r] outf [AjPFile] ACD output file ** @param [r] acdtype [AjPStr ] ACD datatype, e.g. "align" ** @param [r] strarr [AjPStr*] All ACD attribute lines (whitespace removed) ** for the the current ACD data item (of type ** acdtype). One line per array element. ** @param [r] n [ajint] Size of strarr ** @param [r] P [PEdam] edam object to write ** @param [r] T [PKtype] ktype object to read ** @return [void] ** @@ ******************************************************************************/ static void acdrelations_writerelations (AjPFile outf, AjPStr acdtype, AjPStr *strarr, ajint n, PEdam P, PKtype T) { ajint i = 0; ajint j = 0; ajint k = 0; ajint nmatch = 0; AjPStr relations = NULL; AjPStr ktype = NULL; /* Value of knowntype: attribute */ AjBool done = ajFalse; AjBool donetype = ajFalse; AjPStr tmpstr = NULL; if(!outf || !acdtype || !strarr || !n || !P) ajFatal("NULL args passed to acdrelations_writerelations"); /* Memory allocation */ relations = ajStrNew(); ktype = ajStrNew(); tmpstr = ajStrNew(); /* Loop through all lines in edamtoacd.dat */ for(i=0; i<P->n ;i++) { /* Found matching datatype */ if(ajStrMatchS(acdtype, P->dat[i]->acdtype)) { /* Copy first relations: string defined for this datatype (default) */ ajStrAssignS(&relations, P->dat[i]->edam); done = ajTrue; i++; /* Check next line in edamtoacd.dat */ for( ; i<P->n; i++) { /* Datatype still matches */ if(ajStrMatchS(acdtype, P->dat[i]->acdtype)) { /* Loop through all required attributes for this datatype */ for(nmatch=0, j=0; j<P->dat[i]->n; j++) { /* Loop through all attribute lines for the data defininition */ for(k=0; k<n; k++) if(ajStrMatchS(P->dat[i]->acdattr[j], strarr[k])) { nmatch++; /* ajFmtPrint("Found match %d: %S:%S\n", nmatch, P->dat[i]->acdattr[j], strarr[k]); */ break; } } /* All attribute values match */ if(nmatch == P->dat[i]->n) ajStrAssignS(&relations, P->dat[i]->edam); /* Should never happen */ else if (nmatch > P->dat[i]->n) ajFatal("Terminal weirdness in acdrelations_writerelations"); } else break; } break; } } /* Check for match of knowntype: attribute against knowntypes.standard. These have higher precedence than the rules defined in edamtoacd.dat */ for(donetype=ajFalse, i=0; i<n; i++) { if(ajStrPrefixC(strarr[i], "knowntype:")) { for(j=0;j<T->n; j++) { /* No check is made on the "Type" column in knowntypes.standard as these are not proper ACD datatype names To check these add if(ajStrMatchS(acdtype, T->dat[j]->acdtype)) */ ajFmtPrintS(&tmpstr, "knowntype:\"%S\"", T->dat[j]->ktype); if(ajStrMatchS(tmpstr, strarr[i])) { ajStrAssignS(&relations, T->dat[j]->edam); donetype=ajTrue; break; } } if(donetype) break; } else continue; } if(!done) ajFatal("No matching datatype (%S) in acdrelations_writerelations", acdtype); /* Write relations: attribute line to file */ ajFmtPrintF(outf, " relations:%S\n", relations); /* Free memory */ ajStrDel(&relations); ajStrDel(&ktype); ajStrDel(&tmpstr); return; }
static void acdrelations_readdatfile (AjPFile inf, PEdam *P) { AjPStr line = NULL; const AjPStr tok = NULL; const AjPStr subtok = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr acdtype = NULL; AjPStr relations = NULL; PEdamdat dattmp = NULL; AjPList datlist = NULL; if(!P) ajFatal("Null arg error 1 in acdrelations_readdatfile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readdatfile"); /* Allocate memory */ line = ajStrNew(); acdtype = ajStrNew(); relations = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Tokenise line, delimited by '|'. Parse first token (ACD datatype ) */ ajStrAssignS(&acdtype, ajStrParseC(line, "|")); /* Parse second token (EDAM relations: value ) */ ajStrAssignS(&relations, ajStrParseC(NULL, "|")); /* Parse third token (attribute:value strings block) */ tok = ajStrParseC(NULL, "|"); /* Create new string list */ strlist = ajListstrNew(); /* Tokenise third token itself into tokens delimited by ' ' (space) Parse tokens (individual attribute:value strings)*/ if((subtok=ajStrParseC(tok, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); while((subtok=ajStrParseC(NULL, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); } } /* Write PEdamdat structure & push onto list */ dattmp = ajEdamdatNew(); ajStrRemoveWhite(&acdtype); ajStrAssignS(&dattmp->acdtype, acdtype); ajStrAssignS(&dattmp->edam, relations); dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr); ajListPushAppend(datlist, dattmp); /* Clear nodes (but not strings) from string list */ ajListstrFree(&strlist); } /* Write PEdam structure */ ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat)); /* Free memory */ ajStrDel(&line); ajStrDel(&acdtype); ajStrDel(&relations); ajListFree(&datlist); return; }
static void jaspextract_copyfiles(AjPStr directory) { AjPStr matrixfile = NULL; AjPList flist = NULL; AjPStr wild = NULL; AjPStr entry = NULL; AjPStr bname = NULL; AjPStr line = NULL; AjPStr dest = NULL; const AjPStr datadir = NULL; ajuint preflen = 0; ajuint i = 0; const char *p = NULL; AjPFile inf = NULL; AjPFile outf = NULL; matrixfile = ajStrNew(); flist = ajListNew(); wild = ajStrNewC("*.pfm"); bname = ajStrNew(); line = ajStrNew(); dest = ajStrNew(); datadir = ajDatafileValuePath(); if(!datadir) ajFatal("jaspextract: Cannot determine the EMBOSS data directory"); ajFmtPrintS(&matrixfile,"%S%s",directory,MATRIXFILE); if(!ajFilenameExistsRead(matrixfile)) ajFatal("jaspextract: Directory (%S) doesn't appear to be a JASPAR " "one\nNo matrix_list.txt file found",directory); ajFilelistAddPathWild(flist, directory, wild); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&bname,entry); ajFilenameTrimPath(&bname); i = 0; while(Jprefix[i].Prefix) { if(!ajStrPrefixC(bname,Jprefix[i].Prefix)) { ++i; continue; } preflen = strlen(Jprefix[i].Prefix); p = ajStrGetPtr(bname); if(p[preflen]>='0' && p[preflen]<='9') break; ++i; } if(!Jprefix[i].Prefix) { ajStrDel(&entry); continue; } ajFmtPrintS(&dest,"%S%s%c%S",datadir,Jprefix[i].Directory,SLASH_CHAR, bname); outf = ajFileNewOutNameS(dest); if(!outf) ajFatal("Cannot open output file %S",dest); /* Avoid UNIX copy for portability */ inf = ajFileNewInNameS(entry); if(!inf) ajFatal("Cannot open input file: %S",entry); while(ajReadlineTrim(inf,&line)) ajFmtPrintF(outf,"%S\n",line); ajFileClose(&inf); ajFileClose(&outf); ajStrDel(&entry); } ajListFree(&flist); ajStrDel(&wild); ajStrDel(&dest); ajStrDel(&line); ajStrDel(&bname); ajStrDel(&matrixfile); return; }
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, AjBool protein, ajuint mismatches) { AjPPatlistSeq patlist = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPRegexp mismreg = NULL; AjPStr patstr = NULL; AjPStr pat = NULL; ajuint mismatch = 0; ajint ifmt = 0; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "pattern"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistSeqNewType(protein); ifmt = patternSeqFormat(fmt); ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' " "protein: %B mismatches: %d\n", patspec, patname, protein, mismatches); if(ajStrGetCharFirst(patstr) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open pattern file '%S'", patstr); return NULL; } line = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternSeqNewList(patlist,name,pat,mismatches); ajStrSetClear(&pat); } break; default: mismreg = ajRegCompC("<mismatch=(\\d+)>"); while (ajBuffreadLineTrim(infile,&line)) { if (ajStrGetCharFirst(line) == '>') { if (ajStrGetLen(name)) { ajPatternSeqNewList(patlist,name,pat, mismatch); ajStrSetClear(&name); ajStrSetClear(&pat); mismatch=mismatches; } ajStrCutStart(&line,1); if (ajRegExec(mismreg,line)) { ajRegSubI(mismreg,1,&name); ajStrToUint(name,&mismatch); ajStrTruncateLen(&line,ajRegOffset(mismreg)); ajStrTrimWhiteEnd(&line); } ajStrAssignS (&name,line); ajStrAssignEmptyS(&name, patname); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternSeqNewList(patlist,name,pat,mismatch); ajRegFree(&mismreg); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&name, namestr); ajPatternSeqNewList(patlist,name,patstr,mismatches); } ajStrDel(&name); ajStrDel(&line); ajStrDel(&pat); ajStrDel(&namestr); ajStrDel(&patstr); return patlist; }
static AjBool dbxflat_ParseGenbank(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; AjBool ret = ajTrue; AjPStr sumline = NULL; line = ajStrNewC(""); sumline = ajStrNew(); while(!ajStrPrefixC(line,"//") && ret) { if(ajStrPrefixC(line,"LOCUS")) { entry->fpos = pos; ajFmtScanS(line,"%*S%S",&entry->id); } if(svfield) if(ajStrPrefixC(line,"VERSION")) embBtreeGenBankAC(line,svfield->data); if(accfield) if(ajStrPrefixC(line,"ACCESSION")) embBtreeGenBankAC(line,accfield->data); if(keyfield) if(ajStrPrefixC(line,"KEYWORDS")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankKW(sumline,keyfield->data,keyfield->len); continue; } if(desfield) if(ajStrPrefixC(line,"DEFINITION")) { ajStrAssignS(&sumline,line); ret = ajReadlineTrim(inf,&line); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankDE(sumline,desfield->data,desfield->len); continue; } if(orgfield) if(ajStrPrefixC(line,"SOURCE")) { ret = ajReadlineTrim(inf,&line); ajStrAppendC(&line,";"); while(ret && *MAJSTRGETPTR(line)==' ') { ajStrAppendS(&sumline,line); ret = ajReadlineTrim(inf,&line); } ajStrRemoveWhiteExcess(&sumline); embBtreeGenBankTX(sumline,orgfield->data,orgfield->len); continue; } pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) ret = ajFalse; } ajStrDel(&line); ajStrDel(&sumline); return ret; }
int main(int argc, char **argv) { AjPFile inf = NULL; AjPFile inf2 = NULL; AjPFeattable tab = NULL; AjPReport report = NULL; AjPSeq sequence = NULL; AjPStr redatanew = NULL; AjPStr str = NULL; AjPStr regexp = NULL; AjPStr temp = NULL; AjPStr text = NULL; AjPStr docdata = NULL; AjPStr data = NULL; AjPStr accession = NULL; AjPStr name = NULL; EmbPPatMatch match = NULL; AjPStr savereg = NULL; AjPStr fthit = NULL; AjBool full; AjBool prune; ajint i; ajint number; ajint start; ajint end; ajint length; ajint zstart; ajint zend; const char *p; ajint seqlength; AjPStr tmpstr = NULL; AjPStr tailstr = NULL; AjPFeature gf; embInit("patmatmotifs", argc, argv); ajStrAssignC(&fthit, "SO:0001067"); savereg = ajStrNew(); str = ajStrNew(); regexp = ajStrNew(); temp = ajStrNew(); data = ajStrNew(); accession = ajStrNew(); text = ajStrNew(); name = ajStrNew(); sequence = ajAcdGetSeq("sequence"); report = ajAcdGetReport("outfile"); full = ajAcdGetBoolean("full"); prune = ajAcdGetBoolean("prune"); ajSeqFmtUpper(sequence); /* prosite regexs are all upper case */ tab = ajFeattableNewSeq(sequence); ajStrAssignC(&tailstr, ""); seqlength = ajStrGetLen(str); str = ajSeqGetSeqCopyS(sequence); redatanew = ajStrNewC("PROSITE/prosite.lines"); docdata = ajStrNewC("PROSITE/"); inf = ajDatafileNewInNameS(redatanew); if(!inf) ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running"); ajFmtPrintAppS(&tmpstr, "Full: %B\n", full); ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune); ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf); ajReportSetHeaderS(report, tmpstr); while(ajReadlineTrim(inf, ®exp)) { p=ajStrGetPtr(regexp); if(*p && *p!=' ' && *p!='^') { p=ajSysFuncStrtok(p," "); ajStrAssignC(&name,p); if(prune) if(ajStrMatchCaseC(name,"myristyl") || ajStrMatchCaseC(name,"asn_glycosylation") || ajStrMatchCaseC(name,"camp_phospho_site") || ajStrMatchCaseC(name,"pkc_phospho_site") || ajStrMatchCaseC(name,"ck2_phospho_site") || ajStrMatchCaseC(name,"tyr_phospho_site")) { for(i=0;i<4;++i) ajReadlineTrim(inf, ®exp); continue; } p=ajSysFuncStrtok(NULL," "); ajStrAssignC(&accession,p); } if(ajStrPrefixC(regexp, "^")) { p = ajStrGetPtr(regexp); ajStrAssignC(&temp,p+1); ajStrAssignC(&savereg,p+1); match = embPatMatchFind(temp, str, ajFalse, ajFalse); number = embPatMatchGetNumber(match); for(i=0; i<number; i++) { seqlength = ajStrGetLen(str); start = 1+embPatMatchGetStart(match, i); end = 1+embPatMatchGetEnd(match, i); length = embPatMatchGetLen(match, i); gf = ajFeatNew(tab, NULL, fthit, start, end, (float) length, ' ', 0); ajFmtPrintS(&tmpstr, "*motif %S", name); ajFeatTagAddSS(gf, NULL, tmpstr); if(start-5<0) zstart = 0; else zstart = start-5; if(end+5> seqlength) zend = end; else zend = end+5; ajStrAssignSubS(&temp, str, zstart, zend); } if(full && number) { ajStrAssignC(&redatanew,ajStrGetPtr(docdata)); ajStrAppendC(&redatanew,ajStrGetPtr(accession)); inf2 = ajDatafileNewInNameS(redatanew); if(!inf2) continue; /* ** Insert Prosite documentation from files made by ** prosextract.c */ ajFmtPrintAppS(&tailstr, "Motif: %S\n", name); ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number); while(ajReadlineTrim(inf2, &text)) ajFmtPrintAppS(&tailstr, "%S\n", text); ajFmtPrintAppS(&tailstr, "\n***************\n\n"); ajFileClose(&inf2); } embPatMatchDel(&match); } } ajReportSetTailS(report,tailstr); ajReportWrite(report, tab, sequence); ajReportDel(&report); ajFeattableDel(&tab); ajStrDel(&temp); ajStrDel(®exp); ajStrDel(&savereg); ajStrDel(&str); ajStrDel(&data); ajStrDel(&docdata); ajStrDel(&text); ajStrDel(&redatanew); ajStrDel(&accession); ajSeqDel(&sequence); ajStrDel(&tailstr); ajStrDel(&fthit); ajStrDel(&name); ajStrDel(&tmpstr); ajFeattableDel(&tab); ajFileClose(&inf); embExit(); return 0; }
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; ajuint seqlen = 0; ajuint qlen = 0; AjPStr tmpfd = NULL; AjPStr str = NULL; AjPStr de = NULL; AjBool ok; if(!dbxflat_wrdexp) dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)"); line = ajStrNewC(""); pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } /* first line of entry */ if(!ajStrPrefixC(line,"@")) return ajFalse; entry->fpos = pos; ajStrCutStart(&line, 1); ajStrExtractFirst(line, &de, &entry->id); if(desfield && ajStrGetLen(de)) { while(ajRegExec(dbxflat_wrdexp,de)) { ajRegSubI(dbxflat_wrdexp, 1, &tmpfd); str = ajStrNew(); ajStrAssignS(&str,tmpfd); ajListPush(desfield->data,(void *)str); ajRegPost(dbxflat_wrdexp, &de); } } /* now read sequence */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line, "+")) { ajStrRemoveWhite(&line); seqlen += MAJSTRGETLEN(line); ok = ajReadlineTrim(inf,&line); } if(!ok) return ajFalse; ok = ajReadlineTrim(inf,&line); while(ok) { qlen += MAJSTRGETLEN(line); if(qlen < seqlen) ok = ajReadlineTrim(inf,&line); else ok = ajFalse; } ajStrDel(&de); ajStrDel(&tmpfd); ajStrDel(&line); return ajTrue; }
/* @funcstatic seqwords_keysearch ******************************************** ** ** Search swissprot with terms structure and writes a hitlist structure ** ** @param [r] inf [AjPFile] File pointer to swissprot database ** @param [r] terms [AjPTerms] Terms object pointer ** @param [w] hits [EmbPHitlist*] Hitlist object pointer ** ** @return [AjBool] True on success ** @@ ******************************************************************************/ static AjBool seqwords_keysearch(AjPFile inf, AjPTerms terms, EmbPHitlist *hits) { AjPStr line =NULL; /* Line of text. */ AjPStr id =NULL; /* Line of text. */ AjPStr temp =NULL; ajint s =0; /* Temp. start of hit value. */ ajint e =0; /* Temp. end of hit value. */ AjPInt start =NULL; /* Array of start of hit(s). */ AjPInt end =NULL; /* Array of end of hit(s). */ ajint nhits =0; /* Number of hits. */ ajint x =0; AjBool foundkw =ajFalse; AjBool foundft =ajFalse; /* Check for valid args. */ if(!inf) return ajFalse; /* Allocate strings and arrays. */ line = ajStrNew(); id = ajStrNew(); start = ajIntNew(); end = ajIntNew(); /* Start of main loop. */ while((ajReadlineTrim(inf,&line))) { /* Parse the AC line. */ if(ajStrPrefixC(line,"AC")) { /* Copy accesion number and remove the ';' from the end. */ ajFmtScanS(line, "%*s %S", &id); ajStrExchangeCC(&id, ";", "\0"); /* Reset flags & no. hits. */ foundkw=ajFalse; foundft=ajFalse; nhits=0; } /* Search the description and keyword lines with search terms. */ else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW")))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a DE or KW line begins with a search ** term, we must add a leading and trailing space to line. ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); ajStrInsertC(&line, 0, " "); for (x = 0; x < terms->N; x++) /* Search term is found. */ if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { foundkw=ajTrue; break; } } /* Search the feature table line with search terms. */ else if((ajStrPrefixC(line,"FT DOMAIN"))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a FT line ends with a search ** term, we must add a trailing space to line ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); for (x = 0; x < terms->N; x++) if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { /* Search term is found. */ foundft = ajTrue; nhits++; /* Assign start and end of hit. */ ajFmtScanS(line, "%*s %*s %d %d", &s, &e); ajIntPut(&start, nhits-1, s); ajIntPut(&end, nhits-1, e); break; } } /* Parse the sequence. */ else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) || (foundft == ajTrue)))) { /* Allocate memory for temp. sequence. */ temp = ajStrNew(); /* Read the sequence into hitlist structure. */ while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//")) /* Read sequence line into temp. */ ajStrAppendC(&temp,ajStrGetPtr(line)+3); /* Clean up temp. sequence. */ ajStrRemoveWhite(&temp); /*Priority is given to domain (rather than full length) sequence.*/ if(foundft) { for(x=0;x<nhits;x++) { /* Increment counter of hits for subsequent hits*/ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure. */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Assign start and end of hit. */ (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x); (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x); /* Extract sequence within specified range */ ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, (*hits)->hits[(*hits)->N - 1]->Start - 1, (*hits)->hits[(*hits)->N - 1]->End - 1); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } } else { /* Increment counter of hits */ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Extract whole sequence */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); (*hits)->hits[(*hits)->N - 1]->Start = 1; (*hits)->hits[(*hits)->N - 1]->End = ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } /* Free temp. sequence */ ajStrDel(&temp); } } /* Clean up */ ajStrDel(&line); ajStrDel(&id); ajIntDel(&start); ajIntDel(&end); return ajTrue; }
int main(int argc, char **argv) { AjPFile infdat = NULL; AjPFile infdoc = NULL; AjPFile outf = NULL; AjPFile outs = NULL; AjBool haspattern; const char *p; AjPStr line = NULL; AjPStr text = NULL; AjPStr dirname = NULL; AjPStr filename = NULL; AjPStr id = NULL; AjPStr ac = NULL; AjPStr de = NULL; AjPStr pa = NULL; AjPStr ps = NULL; AjPStr fn = NULL; AjPStr re = NULL; AjPStr fname = NULL; AjBool flag; AjBool isopen; AjBool goback; ajlong storepos = 0L; embInit("prosextract", argc, argv); dirname = ajAcdGetDirectoryName("prositedir"); line = ajStrNew(); text = ajStrNew(); id = ajStrNew(); ac = ajStrNew(); de = ajStrNew(); pa = ajStrNew(); ps = ajStrNew(); fn=ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.dat"); if(!(infdat=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fn=ajStrNewC("PROSITE/prosite.lines"); outf = ajDatafileNewOutNameS(fn); ajStrDel(&fn); haspattern = ajFalse; while(ajReadlineTrim(infdat, &line) ) { if(ajStrPrefixC(line, "ID")) { if(ajStrSuffixC(line,"PATTERN.")) { haspattern = ajTrue; /*save id*/ p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t;"); p = ajSysFuncStrtok(NULL," \t;"); ajStrAssignC(&id,p); ajFmtPrintF(outf, "%S ", id); continue; } else { haspattern = ajFalse; continue; } } if(!haspattern) continue; if(ajStrPrefixC(line, "AC") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t;"); p = ajSysFuncStrtok(NULL, " \t;"); ajStrAssignC(&ac,p); ajFmtPrintF(outf, "%S\n ", ac); continue; } if(ajStrPrefixC(line, "DE") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAssignC(&de,p); ajFmtPrintF(outf, "%S\n ", de); continue; } if(ajStrPrefixC(line, "PA")) { ajStrAssignC(&pa,""); while(ajStrPrefixC(line,"PA")) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAppendC(&pa,p); ajReadlineTrim(infdat, &line); } ajFmtPrintF(outf, "%S\n", pa); re = embPatPrositeToRegExp(pa); ajFmtPrintF(outf, "^%S\n\n", re); ajStrDel(&re); continue; } } /* Finished processing prosite.dat so look at prosite.doc */ fn = ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.doc"); if(!(infdoc=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fname = ajStrNewC("PROSITE/"); flag = ajFalse; isopen = ajFalse; goback = ajFalse; while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text, "{PS") && isopen && !goback) goback = ajTrue; if(ajStrPrefixC(text, "{PS") && !isopen) { storepos = ajFileResetPos(infdoc); /* save out the documentation text to acc numbered outfiles . */ p = ajStrGetPtr(text)+1; p = ajSysFuncStrtok(p, ";"); ajStrAssignS(&filename, fname); ajStrAppendC(&filename, p); outs = ajDatafileNewOutNameS(filename); flag = ajTrue; isopen = ajTrue; continue; } if(ajStrPrefixC(text, "{BEGIN}") && flag) { while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text,"{END}")) break; ajFmtPrintF(outs, "%S\n", text); } ajFileClose(&outs); isopen = ajFalse; if(goback) { goback = ajFalse; ajFileSeek(infdoc,storepos,0); } } } ajStrDel(&line); ajStrDel(&text); ajStrDel(&dirname); ajStrDel(&filename); ajStrDel(&id); ajStrDel(&ac); ajStrDel(&de); ajStrDel(&pa); ajStrDel(&re); ajStrDel(&ps); ajStrDel(&fname); ajFileClose(&infdat); ajFileClose(&infdoc); ajFileClose(&outf); embExit(); return 0; }