/* @funcstatic seqwords_TermsRead ********************************************* ** ** Read the next Terms object from a file in embl-like format. The search ** terms are modified with a leading and trailing space. ** ** @param [r] inf [AjPFile] Input file stream ** @param [w] thys [AjPTerms*] Terms object ** ** @return [AjBool] True on succcess ** @@ *****************************************************************************/ static AjBool seqwords_TermsRead(AjPFile inf, AjPTerms *thys) { AjPStr line =NULL; /* Line of text. */ AjPStr temp =NULL; AjPList list_terms =NULL; /* List of keywords for a scop node*/ AjBool ok =ajFalse; AjPStr type = NULL; /* Memory management */ (*thys)=seqwords_TermsNew(); list_terms = ajListstrNew(); line = ajStrNew(); type = ajStrNew(); /* Read first line. */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line,"//")) { if(ajStrPrefixC(line,"XX")) { ok = ajReadlineTrim(inf,&line); continue; } else if(ajStrPrefixC(line,"TY")) { ajFmtScanS(line, "%*s %S", &type); if(ajStrMatchC(type, "SCOP")) (*thys)->Type = ajSCOP; else if(ajStrMatchC(type, "CATH")) (*thys)->Type = ajCATH; } else if(ajStrPrefixC(line,"CL")) { ajStrAssignC(&(*thys)->Class,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Class); } else if(ajStrPrefixC(line,"AR")) { ajStrAssignC(&(*thys)->Architecture,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Architecture); } else if(ajStrPrefixC(line,"TP")) { ajStrAssignC(&(*thys)->Topology,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&(*thys)->Topology); } else if(ajStrPrefixC(line,"FO")) { ajStrAssignC(&(*thys)->Fold,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Fold,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Fold); } else if(ajStrPrefixC(line,"SF")) { ajStrAssignC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Superfamily,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Superfamily); } else if(ajStrPrefixC(line,"FA")) { ajStrAssignC(&(*thys)->Family,ajStrGetPtr(line)+3); while(ajReadlineTrim(inf,&line)) { if(ajStrPrefixC(line,"XX")) break; ajStrAppendC(&(*thys)->Family,ajStrGetPtr(line)+3); } ajStrRemoveWhiteExcess(&(*thys)->Family); } else if(ajStrPrefixC(line,"TE")) { /* Copy and clean up term. */ temp = ajStrNew(); ajStrAssignC(&temp,ajStrGetPtr(line)+3); ajStrRemoveWhiteExcess(&temp); /* Append a leading and trailing space to search term*/ ajStrAppendK(&temp, ' '); ajStrInsertC(&temp, 0, " "); /* Add the current term to the list. */ ajListstrPush(list_terms,temp); } ok = ajReadlineTrim(inf,&line); } if(!ok) { /* Clean up. */ ajListstrFree(&list_terms); ajStrDel(&line); /* Return. */ return ajFalse; } /* Convert the AjPList of terms to array of AjPSeq's. */ if(!((*thys)->N=ajListstrToarray((AjPList)list_terms,&(*thys)->Keywords))) ajWarn("Zero sized list of terms passed into seqwords_TermsRead"); /* Clean up. Free the list (not the nodes!). */ ajListstrFree(&list_terms); ajStrDel(&line); ajStrDel(&type); return ajTrue; }
static void acdrelations_readdatfile (AjPFile inf, PEdam *P) { AjPStr line = NULL; const AjPStr tok = NULL; const AjPStr subtok = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr acdtype = NULL; AjPStr relations = NULL; PEdamdat dattmp = NULL; AjPList datlist = NULL; if(!P) ajFatal("Null arg error 1 in acdrelations_readdatfile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readdatfile"); /* Allocate memory */ line = ajStrNew(); acdtype = ajStrNew(); relations = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Tokenise line, delimited by '|'. Parse first token (ACD datatype ) */ ajStrAssignS(&acdtype, ajStrParseC(line, "|")); /* Parse second token (EDAM relations: value ) */ ajStrAssignS(&relations, ajStrParseC(NULL, "|")); /* Parse third token (attribute:value strings block) */ tok = ajStrParseC(NULL, "|"); /* Create new string list */ strlist = ajListstrNew(); /* Tokenise third token itself into tokens delimited by ' ' (space) Parse tokens (individual attribute:value strings)*/ if((subtok=ajStrParseC(tok, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); while((subtok=ajStrParseC(NULL, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); } } /* Write PEdamdat structure & push onto list */ dattmp = ajEdamdatNew(); ajStrRemoveWhite(&acdtype); ajStrAssignS(&dattmp->acdtype, acdtype); ajStrAssignS(&dattmp->edam, relations); dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr); ajListPushAppend(datlist, dattmp); /* Clear nodes (but not strings) from string list */ ajListstrFree(&strlist); } /* Write PEdam structure */ ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat)); /* Free memory */ ajStrDel(&line); ajStrDel(&acdtype); ajStrDel(&relations); ajListFree(&datlist); return; }
/* @funcstatic acdrelations_procacdfile *************************************** ** ** Process ACD file and write new ACD file with new relations: attributes ** added (replaced if necessary). ** ** @param [r] inf [AjPFile] ACD input file ** @param [r] outf [AjPFile] ACD output file ** @param [r] P [PEdam] edam object ** @param [r] T [PKtype] ktype object ** @return [void] ** @@ ******************************************************************************/ static void acdrelations_procacdfile (AjPFile inf, AjPFile outf, PEdam P, PKtype T) { AjPStr line = NULL; AjPStr tok = NULL; AjPStr acdtype = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr *strarr = NULL; ajint nstr = 0; /* Allocate memory */ line = ajStrNew(); tok = ajStrNew(); acdtype = ajStrNew(); strlist = ajListstrNew(); /* Read next line */ while(ajReadline(inf,&line)) { ajFmtScanS(line, "%S", &tok); /* Write application definition or section definition out as-is */ if(ajStrMatchC(tok, "application:") || ajStrMatchC(tok, "section:")) { ajFmtPrintF(outf, "%S", line); while(ajReadline(inf,&line)) { ajFmtPrintF(outf, "%S", line); ajFmtScanS(line, "%S", &tok); if(ajStrMatchC(tok, "]")) break; } } /* Write variables, endsection definitions and comments out as-is */ else if(ajStrMatchC(tok, "variable:") || ajStrMatchC(tok, "endsection:") || ajStrMatchC(tok, "#")) ajFmtPrintF(outf, "%S", line); /* Write out blank lines as-is */ else if (!ajFmtScanS(line, "%S", &tok)) ajFmtPrintF(outf, "%S", line); /* Process data definition */ else /* First line of data definition */ { /* Process and write datatype line */ ajFmtPrintF(outf, "%S", line); ajFmtScanS(line, "%S", &acdtype); ajStrRemoveSetC(&acdtype, ":"); /* Process subsequent (attribute) lines */ while(ajReadline(inf,&line)) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, line); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); ajFmtScanS(line, "%S", &tok); /* Reached end of data definition */ if(ajStrMatchC(tok, "]")) { nstr = ajListstrToarray(strlist, &strarr); /* Write relations: line */ acdrelations_writerelations(outf, acdtype, strarr, nstr, P, T); AJFREE(strarr); ajListstrFreeData(&strlist); strlist = ajListstrNew(); ajFmtPrintF(outf, "%S", line); break; } /* Ignore existing relations: lines */ else if(ajStrMatchC(tok, "relations:")) continue; ajFmtPrintF(outf, "%S", line); } } } /* Free memory */ ajStrDel(&line); ajStrDel(&tok); ajStrDel(&acdtype); ajListstrFreeData(&strlist); return; }
int main(int argc, char **argv) { AjPSeqout outseq = NULL; AjPList list = NULL; AjPSeq seq = NULL; AjPStr insert = NULL; AjPStr seqstr = NULL; AjPStr* seqr = NULL; AjPFile data = NULL; ajint start = 0; ajint length = 0; ajint amount = 0; ajint scmax = 0; ajint extra = 0; embInit("makeprotseq", argc, argv); data = ajAcdGetInfile("pepstatsfile"); insert = ajAcdGetString("insert"); start = ajAcdGetInt("start"); length = ajAcdGetInt("length"); amount = ajAcdGetInt("amount"); outseq = ajAcdGetSeqoutall("outseq"); list = ajListstrNew(); /* this is checked by acd if(amount <=0 || length <= 0) ajFatal("Amount or length is 0 or less. " "Unable to create any sequences"); */ /* if insert, make sure sequence is large enough */ if(ajStrGetLen(insert)) { length -= ajStrGetLen(insert); /* start= start <= 1 ? 0 : --start; */ /* checked in acd */ start--; if(length <= 0) ajFatal("Sequence smaller than inserted part. " "Unable to create sequences."); } /* make the list of AjPStr to be used in sequence creation */ if(data) { ajDebug("Distribution datafile '%s' given checking type\n", ajFileGetPrintnameC(data)); seqstr = ajStrNew(); ajReadlineTrim(data,&seqstr); if(ajStrFindC(seqstr,"PEPSTATS") == 0) { makeprotseq_parse_pepstats(&list,data); } else { ajWarn("Not pepstats file. Making completely random sequences."); makeprotseq_default_chars(&list); } ajStrDel(&seqstr); ajFileClose(&data); } else makeprotseq_default_chars(&list); /* if insert, make sure type is correct */ /* typecheking code is not working, uncomment and test after it is if(ajStrGetLen(insert)) { seqstr = ajStrNew(); if(prot) ajStrAssignC(&seqstr,"pureprotein"); if(!ajSeqTypeCheckS(&insert,seqstr)) ajFatal("Insert not the same sequence type as sequence itself."); ajStrDel(&seqstr); } */ /* array allows fast creation of a sequences */ scmax = (ajuint) ajListstrToarray(list,&seqr); if(!scmax) ajFatal("No strings in list. No characters to make the sequence."); ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n", scmax,extra,seqr[0]); ajRandomSeed(); while(amount-- > 0) { seqstr = makeprotseq_random_sequence(seqr,scmax,length); if(ajStrGetLen(insert)) ajStrInsertS(&seqstr,start,insert); ajStrFmtLower(&seqstr); seq = ajSeqNew(); ajSeqAssignSeqS(seq, seqstr); ajSeqSetProt(seq); ajSeqoutWriteSeq(outseq, seq); ajSeqDel(&seq); ajStrDel(&seqstr); } ajSeqoutClose(outseq); ajSeqoutDel(&outseq); ajListstrFreeData(&list); ajStrDel(&insert); AJFREE(seqr); embExit(); return 0; }